LCOV - differential code coverage report
Current view: top level - pageserver/src/tenant - metadata.rs (source / functions) Coverage Total Hit UBC CBC
Current: f6946e90941b557c917ac98cd5a7e9506d180f3e.info Lines: 97.9 % 234 229 5 229
Current Date: 2023-10-19 02:04:12 Functions: 52.3 % 88 46 42 46
Baseline: c8637f37369098875162f194f92736355783b050.info
Baseline Date: 2023-10-18 20:25:20

           TLA  Line data    Source code
       1                 : //! Every image of a certain timeline from [`crate::tenant::Tenant`]
       2                 : //! has a metadata that needs to be stored persistently.
       3                 : //!
       4                 : //! Later, the file gets used in [`remote_timeline_client`] as a part of
       5                 : //! external storage import and export operations.
       6                 : //!
       7                 : //! The module contains all structs and related helper methods related to timeline metadata.
       8                 : //!
       9                 : //! [`remote_timeline_client`]: super::remote_timeline_client
      10                 : 
      11                 : use std::io::{self};
      12                 : 
      13                 : use anyhow::{ensure, Context};
      14                 : use serde::{de::Error, Deserialize, Serialize, Serializer};
      15                 : use thiserror::Error;
      16                 : use utils::bin_ser::SerializeError;
      17                 : use utils::crashsafe::path_with_suffix_extension;
      18                 : use utils::{
      19                 :     bin_ser::BeSer,
      20                 :     id::{TenantId, TimelineId},
      21                 :     lsn::Lsn,
      22                 : };
      23                 : 
      24                 : use crate::config::PageServerConf;
      25                 : use crate::virtual_file::VirtualFile;
      26                 : use crate::TEMP_FILE_SUFFIX;
      27                 : 
      28                 : /// Use special format number to enable backward compatibility.
      29                 : const METADATA_FORMAT_VERSION: u16 = 4;
      30                 : 
      31                 : /// Previous supported format versions.
      32                 : const METADATA_OLD_FORMAT_VERSION: u16 = 3;
      33                 : 
      34                 : /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic.
      35                 : ///
      36                 : /// This is the same assumption that PostgreSQL makes with the control file,
      37                 : /// see PG_CONTROL_MAX_SAFE_SIZE
      38                 : const METADATA_MAX_SIZE: usize = 512;
      39                 : 
      40                 : /// Metadata stored on disk for each timeline
      41                 : ///
      42                 : /// The fields correspond to the values we hold in memory, in Timeline.
      43 CBC       14125 : #[derive(Debug, Clone, PartialEq, Eq)]
      44                 : pub struct TimelineMetadata {
      45                 :     hdr: TimelineMetadataHeader,
      46                 :     body: TimelineMetadataBodyV2,
      47                 : }
      48                 : 
      49           27258 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
      50                 : struct TimelineMetadataHeader {
      51                 :     checksum: u32,       // CRC of serialized metadata body
      52                 :     size: u16,           // size of serialized metadata
      53                 :     format_version: u16, // metadata format version (used for compatibility checks)
      54                 : }
      55                 : const METADATA_HDR_SIZE: usize = std::mem::size_of::<TimelineMetadataHeader>();
      56                 : 
      57           27256 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
      58                 : struct TimelineMetadataBodyV2 {
      59                 :     disk_consistent_lsn: Lsn,
      60                 :     // This is only set if we know it. We track it in memory when the page
      61                 :     // server is running, but we only track the value corresponding to
      62                 :     // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
      63                 :     // lot. We only store it in the metadata file when we flush *all* the
      64                 :     // in-memory data so that 'last_record_lsn' is the same as
      65                 :     // 'disk_consistent_lsn'.  That's OK, because after page server restart, as
      66                 :     // soon as we reprocess at least one record, we will have a valid
      67                 :     // 'prev_record_lsn' value in memory again. This is only really needed when
      68                 :     // doing a clean shutdown, so that there is no more WAL beyond
      69                 :     // 'disk_consistent_lsn'
      70                 :     prev_record_lsn: Option<Lsn>,
      71                 :     ancestor_timeline: Option<TimelineId>,
      72                 :     ancestor_lsn: Lsn,
      73                 :     latest_gc_cutoff_lsn: Lsn,
      74                 :     initdb_lsn: Lsn,
      75                 :     pg_version: u32,
      76                 : }
      77                 : 
      78               2 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
      79                 : struct TimelineMetadataBodyV1 {
      80                 :     disk_consistent_lsn: Lsn,
      81                 :     // This is only set if we know it. We track it in memory when the page
      82                 :     // server is running, but we only track the value corresponding to
      83                 :     // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
      84                 :     // lot. We only store it in the metadata file when we flush *all* the
      85                 :     // in-memory data so that 'last_record_lsn' is the same as
      86                 :     // 'disk_consistent_lsn'.  That's OK, because after page server restart, as
      87                 :     // soon as we reprocess at least one record, we will have a valid
      88                 :     // 'prev_record_lsn' value in memory again. This is only really needed when
      89                 :     // doing a clean shutdown, so that there is no more WAL beyond
      90                 :     // 'disk_consistent_lsn'
      91                 :     prev_record_lsn: Option<Lsn>,
      92                 :     ancestor_timeline: Option<TimelineId>,
      93                 :     ancestor_lsn: Lsn,
      94                 :     latest_gc_cutoff_lsn: Lsn,
      95                 :     initdb_lsn: Lsn,
      96                 : }
      97                 : 
      98                 : impl TimelineMetadata {
      99            6344 :     pub fn new(
     100            6344 :         disk_consistent_lsn: Lsn,
     101            6344 :         prev_record_lsn: Option<Lsn>,
     102            6344 :         ancestor_timeline: Option<TimelineId>,
     103            6344 :         ancestor_lsn: Lsn,
     104            6344 :         latest_gc_cutoff_lsn: Lsn,
     105            6344 :         initdb_lsn: Lsn,
     106            6344 :         pg_version: u32,
     107            6344 :     ) -> Self {
     108            6344 :         Self {
     109            6344 :             hdr: TimelineMetadataHeader {
     110            6344 :                 checksum: 0,
     111            6344 :                 size: 0,
     112            6344 :                 format_version: METADATA_FORMAT_VERSION,
     113            6344 :             },
     114            6344 :             body: TimelineMetadataBodyV2 {
     115            6344 :                 disk_consistent_lsn,
     116            6344 :                 prev_record_lsn,
     117            6344 :                 ancestor_timeline,
     118            6344 :                 ancestor_lsn,
     119            6344 :                 latest_gc_cutoff_lsn,
     120            6344 :                 initdb_lsn,
     121            6344 :                 pg_version,
     122            6344 :             },
     123            6344 :         }
     124            6344 :     }
     125                 : 
     126               1 :     fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
     127               1 :         let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
     128                 : 
     129                 :         // backward compatible only up to this version
     130               1 :         ensure!(
     131               1 :             hdr.format_version == METADATA_OLD_FORMAT_VERSION,
     132 UBC           0 :             "unsupported metadata format version {}",
     133                 :             hdr.format_version
     134                 :         );
     135                 : 
     136 CBC           1 :         let metadata_size = hdr.size as usize;
     137                 : 
     138               1 :         let body: TimelineMetadataBodyV1 =
     139               1 :             TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
     140                 : 
     141               1 :         let body = TimelineMetadataBodyV2 {
     142               1 :             disk_consistent_lsn: body.disk_consistent_lsn,
     143               1 :             prev_record_lsn: body.prev_record_lsn,
     144               1 :             ancestor_timeline: body.ancestor_timeline,
     145               1 :             ancestor_lsn: body.ancestor_lsn,
     146               1 :             latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn,
     147               1 :             initdb_lsn: body.initdb_lsn,
     148               1 :             pg_version: 14, // All timelines created before this version had pg_version 14
     149               1 :         };
     150               1 : 
     151               1 :         hdr.format_version = METADATA_FORMAT_VERSION;
     152               1 : 
     153               1 :         Ok(Self { hdr, body })
     154               1 :     }
     155                 : 
     156             645 :     pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
     157             645 :         ensure!(
     158             645 :             metadata_bytes.len() == METADATA_MAX_SIZE,
     159               1 :             "metadata bytes size is wrong"
     160                 :         );
     161             644 :         let hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
     162                 : 
     163             644 :         let metadata_size = hdr.size as usize;
     164             644 :         ensure!(
     165             644 :             metadata_size <= METADATA_MAX_SIZE,
     166 UBC           0 :             "corrupted metadata file"
     167                 :         );
     168 CBC         644 :         let calculated_checksum = crc32c::crc32c(&metadata_bytes[METADATA_HDR_SIZE..metadata_size]);
     169             644 :         ensure!(
     170             644 :             hdr.checksum == calculated_checksum,
     171               1 :             "metadata checksum mismatch"
     172                 :         );
     173                 : 
     174             643 :         if hdr.format_version != METADATA_FORMAT_VERSION {
     175                 :             // If metadata has the old format,
     176                 :             // upgrade it and return the result
     177               1 :             TimelineMetadata::upgrade_timeline_metadata(metadata_bytes)
     178                 :         } else {
     179             642 :             let body =
     180             642 :                 TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
     181             642 :             ensure!(
     182             642 :                 body.disk_consistent_lsn.is_aligned(),
     183 UBC           0 :                 "disk_consistent_lsn is not aligned"
     184                 :             );
     185 CBC         642 :             Ok(TimelineMetadata { hdr, body })
     186                 :         }
     187             645 :     }
     188                 : 
     189           13628 :     pub fn to_bytes(&self) -> Result<Vec<u8>, SerializeError> {
     190           13628 :         let body_bytes = self.body.ser()?;
     191           13628 :         let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
     192           13628 :         let hdr = TimelineMetadataHeader {
     193           13628 :             size: metadata_size as u16,
     194           13628 :             format_version: METADATA_FORMAT_VERSION,
     195           13628 :             checksum: crc32c::crc32c(&body_bytes),
     196           13628 :         };
     197           13628 :         let hdr_bytes = hdr.ser()?;
     198           13628 :         let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
     199           13628 :         metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
     200           13628 :         metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
     201           13628 :         Ok(metadata_bytes)
     202           13628 :     }
     203                 : 
     204                 :     /// [`Lsn`] that corresponds to the corresponding timeline directory
     205                 :     /// contents, stored locally in the pageserver workdir.
     206            8881 :     pub fn disk_consistent_lsn(&self) -> Lsn {
     207            8881 :         self.body.disk_consistent_lsn
     208            8881 :     }
     209                 : 
     210            1302 :     pub fn prev_record_lsn(&self) -> Option<Lsn> {
     211            1302 :         self.body.prev_record_lsn
     212            1302 :     }
     213                 : 
     214            1912 :     pub fn ancestor_timeline(&self) -> Option<TimelineId> {
     215            1912 :         self.body.ancestor_timeline
     216            1912 :     }
     217                 : 
     218            1302 :     pub fn ancestor_lsn(&self) -> Lsn {
     219            1302 :         self.body.ancestor_lsn
     220            1302 :     }
     221                 : 
     222            1302 :     pub fn latest_gc_cutoff_lsn(&self) -> Lsn {
     223            1302 :         self.body.latest_gc_cutoff_lsn
     224            1302 :     }
     225                 : 
     226            1302 :     pub fn initdb_lsn(&self) -> Lsn {
     227            1302 :         self.body.initdb_lsn
     228            1302 :     }
     229                 : 
     230            1302 :     pub fn pg_version(&self) -> u32 {
     231            1302 :         self.body.pg_version
     232            1302 :     }
     233                 : 
     234                 :     // Checksums make it awkward to build a valid instance by hand.  This helper
     235                 :     // provides a TimelineMetadata with a valid checksum in its header.
     236                 :     #[cfg(test)]
     237               6 :     pub fn example() -> Self {
     238               6 :         let instance = Self::new(
     239               6 :             "0/16960E8".parse::<Lsn>().unwrap(),
     240               6 :             None,
     241               6 :             None,
     242               6 :             Lsn::from_hex("00000000").unwrap(),
     243               6 :             Lsn::from_hex("00000000").unwrap(),
     244               6 :             Lsn::from_hex("00000000").unwrap(),
     245               6 :             0,
     246               6 :         );
     247               6 :         let bytes = instance.to_bytes().unwrap();
     248               6 :         Self::from_bytes(&bytes).unwrap()
     249               6 :     }
     250                 : }
     251                 : 
     252                 : impl<'de> Deserialize<'de> for TimelineMetadata {
     253             347 :     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     254             347 :     where
     255             347 :         D: serde::Deserializer<'de>,
     256             347 :     {
     257             347 :         let bytes = Vec::<u8>::deserialize(deserializer)?;
     258             347 :         Self::from_bytes(bytes.as_slice()).map_err(|e| D::Error::custom(format!("{e}")))
     259             347 :     }
     260                 : }
     261                 : 
     262                 : impl Serialize for TimelineMetadata {
     263            6972 :     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     264            6972 :     where
     265            6972 :         S: Serializer,
     266            6972 :     {
     267            6972 :         let bytes = self
     268            6972 :             .to_bytes()
     269            6972 :             .map_err(|e| serde::ser::Error::custom(format!("{e}")))?;
     270            6972 :         bytes.serialize(serializer)
     271            6972 :     }
     272                 : }
     273                 : 
     274                 : /// Save timeline metadata to file
     275           26592 : #[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
     276                 : pub async fn save_metadata(
     277                 :     conf: &'static PageServerConf,
     278                 :     tenant_id: &TenantId,
     279                 :     timeline_id: &TimelineId,
     280                 :     data: &TimelineMetadata,
     281                 : ) -> anyhow::Result<()> {
     282                 :     let path = conf.metadata_path(tenant_id, timeline_id);
     283                 :     let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
     284                 :     let metadata_bytes = data.to_bytes().context("serialize metadata")?;
     285                 :     VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
     286                 :         .await
     287                 :         .context("write metadata")?;
     288                 :     Ok(())
     289                 : }
     290                 : 
     291               9 : #[derive(Error, Debug)]
     292                 : pub enum LoadMetadataError {
     293                 :     #[error(transparent)]
     294                 :     Read(#[from] io::Error),
     295                 : 
     296                 :     #[error(transparent)]
     297                 :     Decode(#[from] anyhow::Error),
     298                 : }
     299                 : 
     300             291 : pub fn load_metadata(
     301             291 :     conf: &'static PageServerConf,
     302             291 :     tenant_id: &TenantId,
     303             291 :     timeline_id: &TimelineId,
     304             291 : ) -> Result<TimelineMetadata, LoadMetadataError> {
     305             291 :     let metadata_path = conf.metadata_path(tenant_id, timeline_id);
     306             291 :     let metadata_bytes = std::fs::read(metadata_path)?;
     307                 : 
     308             284 :     Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
     309             291 : }
     310                 : 
     311                 : #[cfg(test)]
     312                 : mod tests {
     313                 :     use super::*;
     314                 :     use crate::tenant::harness::TIMELINE_ID;
     315                 : 
     316               1 :     #[test]
     317               1 :     fn metadata_serializes_correctly() {
     318               1 :         let original_metadata = TimelineMetadata::new(
     319               1 :             Lsn(0x200),
     320               1 :             Some(Lsn(0x100)),
     321               1 :             Some(TIMELINE_ID),
     322               1 :             Lsn(0),
     323               1 :             Lsn(0),
     324               1 :             Lsn(0),
     325               1 :             // Any version will do here, so use the default
     326               1 :             crate::DEFAULT_PG_VERSION,
     327               1 :         );
     328               1 : 
     329               1 :         let metadata_bytes = original_metadata
     330               1 :             .to_bytes()
     331               1 :             .expect("Should serialize correct metadata to bytes");
     332               1 : 
     333               1 :         let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
     334               1 :             .expect("Should deserialize its own bytes");
     335               1 : 
     336               1 :         assert_eq!(
     337                 :             deserialized_metadata.body, original_metadata.body,
     338 UBC           0 :             "Metadata that was serialized to bytes and deserialized back should not change"
     339                 :         );
     340 CBC           1 :     }
     341                 : 
     342                 :     // Generate old version metadata and read it with current code.
     343                 :     // Ensure that it is upgraded correctly
     344               1 :     #[test]
     345               1 :     fn test_metadata_upgrade() {
     346               1 :         #[derive(Debug, Clone, PartialEq, Eq)]
     347               1 :         struct TimelineMetadataV1 {
     348               1 :             hdr: TimelineMetadataHeader,
     349               1 :             body: TimelineMetadataBodyV1,
     350               1 :         }
     351               1 : 
     352               1 :         let metadata_v1 = TimelineMetadataV1 {
     353               1 :             hdr: TimelineMetadataHeader {
     354               1 :                 checksum: 0,
     355               1 :                 size: 0,
     356               1 :                 format_version: METADATA_OLD_FORMAT_VERSION,
     357               1 :             },
     358               1 :             body: TimelineMetadataBodyV1 {
     359               1 :                 disk_consistent_lsn: Lsn(0x200),
     360               1 :                 prev_record_lsn: Some(Lsn(0x100)),
     361               1 :                 ancestor_timeline: Some(TIMELINE_ID),
     362               1 :                 ancestor_lsn: Lsn(0),
     363               1 :                 latest_gc_cutoff_lsn: Lsn(0),
     364               1 :                 initdb_lsn: Lsn(0),
     365               1 :             },
     366               1 :         };
     367               1 : 
     368               1 :         impl TimelineMetadataV1 {
     369               1 :             pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
     370               1 :                 let body_bytes = self.body.ser()?;
     371               1 :                 let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
     372               1 :                 let hdr = TimelineMetadataHeader {
     373               1 :                     size: metadata_size as u16,
     374               1 :                     format_version: METADATA_OLD_FORMAT_VERSION,
     375               1 :                     checksum: crc32c::crc32c(&body_bytes),
     376               1 :                 };
     377               1 :                 let hdr_bytes = hdr.ser()?;
     378               1 :                 let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
     379               1 :                 metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
     380               1 :                 metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
     381               1 :                 Ok(metadata_bytes)
     382               1 :             }
     383               1 :         }
     384               1 : 
     385               1 :         let metadata_bytes = metadata_v1
     386               1 :             .to_bytes()
     387               1 :             .expect("Should serialize correct metadata to bytes");
     388               1 : 
     389               1 :         // This should deserialize to the latest version format
     390               1 :         let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
     391               1 :             .expect("Should deserialize its own bytes");
     392               1 : 
     393               1 :         let expected_metadata = TimelineMetadata::new(
     394               1 :             Lsn(0x200),
     395               1 :             Some(Lsn(0x100)),
     396               1 :             Some(TIMELINE_ID),
     397               1 :             Lsn(0),
     398               1 :             Lsn(0),
     399               1 :             Lsn(0),
     400               1 :             14, // All timelines created before this version had pg_version 14
     401               1 :         );
     402               1 : 
     403               1 :         assert_eq!(
     404                 :             deserialized_metadata.body, expected_metadata.body,
     405 UBC           0 :             "Metadata of the old version {} should be upgraded to the latest version {}",
     406                 :             METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION
     407                 :         );
     408 CBC           1 :     }
     409                 : }
        

Generated by: LCOV version 2.1-beta