LCOV - code coverage report
Current view: top level - pageserver/src - aux_file.rs (source / functions) Coverage Total Hit
Test: fabb29a6339542ee130cd1d32b534fafdc0be240.info Lines: 91.9 % 197 181
Test Date: 2024-06-25 13:20:00 Functions: 93.3 % 15 14

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use ::metrics::IntGauge;
       4              : use bytes::{Buf, BufMut, Bytes};
       5              : use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
       6              : use tracing::warn;
       7              : 
       8              : // BEGIN Copyright (c) 2017 Servo Contributors
       9              : 
      10              : /// Const version of FNV hash.
      11              : #[inline]
      12              : #[must_use]
      13           40 : pub const fn fnv_hash(bytes: &[u8]) -> u128 {
      14           40 :     const INITIAL_STATE: u128 = 0x6c62272e07bb014262b821756295c58d;
      15           40 :     const PRIME: u128 = 0x0000000001000000000000000000013B;
      16           40 : 
      17           40 :     let mut hash = INITIAL_STATE;
      18           40 :     let mut i = 0;
      19          280 :     while i < bytes.len() {
      20          240 :         hash ^= bytes[i] as u128;
      21          240 :         hash = hash.wrapping_mul(PRIME);
      22          240 :         i += 1;
      23          240 :     }
      24           40 :     hash
      25           40 : }
      26              : 
      27              : // END Copyright (c) 2017 Servo Contributors
      28              : 
      29              : /// Create a metadata key from a hash, encoded as [AUX_KEY_PREFIX, 2B directory prefix, least significant 13B of FNV hash].
      30           34 : fn aux_hash_to_metadata_key(dir_level1: u8, dir_level2: u8, data: &[u8]) -> Key {
      31           34 :     let mut key: [u8; 16] = [0; METADATA_KEY_SIZE];
      32           34 :     let hash = fnv_hash(data).to_be_bytes();
      33           34 :     key[0] = AUX_KEY_PREFIX;
      34           34 :     key[1] = dir_level1;
      35           34 :     key[2] = dir_level2;
      36           34 :     key[3..16].copy_from_slice(&hash[3..16]);
      37           34 :     Key::from_metadata_key_fixed_size(&key)
      38           34 : }
      39              : 
      40              : const AUX_DIR_PG_LOGICAL: u8 = 0x01;
      41              : const AUX_DIR_PG_REPLSLOT: u8 = 0x02;
      42              : const AUX_DIR_PG_UNKNOWN: u8 = 0xFF;
      43              : 
      44              : /// Encode the aux file into a fixed-size key.
      45              : ///
      46              : /// The first byte is the AUX key prefix. We use the next 2 bytes of the key for the directory / aux file type.
      47              : /// We have one-to-one mapping for each of the aux file that we support. We hash the remaining part of the path
      48              : /// (usually a single file name, or several components) into 13-byte hash. The way we determine the 2-byte prefix
      49              : /// is roughly based on the first two components of the path, one unique number for one component.
      50              : ///
      51              : /// * pg_logical/mappings -> 0x0101
      52              : /// * pg_logical/snapshots -> 0x0102
      53              : /// * pg_logical/replorigin_checkpoint -> 0x0103
      54              : /// * pg_logical/others -> 0x01FF
      55              : /// * pg_replslot/ -> 0x0201
      56              : /// * others -> 0xFFFF
      57              : ///
      58              : /// If you add new AUX files to this function, please also add a test case to `test_encoding_portable`.
      59              : /// The new file type must have never been written to the storage before. Otherwise, there could be data
      60              : /// corruptions as the new file belongs to a new prefix but it might have been stored under the `others` prefix.
      61           34 : pub fn encode_aux_file_key(path: &str) -> Key {
      62           34 :     if let Some(fname) = path.strip_prefix("pg_logical/mappings/") {
      63           14 :         aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x01, fname.as_bytes())
      64           20 :     } else if let Some(fname) = path.strip_prefix("pg_logical/snapshots/") {
      65            4 :         aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x02, fname.as_bytes())
      66           16 :     } else if path == "pg_logical/replorigin_checkpoint" {
      67            4 :         aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0x03, b"")
      68           12 :     } else if let Some(fname) = path.strip_prefix("pg_logical/") {
      69            4 :         if cfg!(debug_assertions) {
      70            4 :             warn!(
      71            0 :                 "unsupported pg_logical aux file type: {}, putting to 0x01FF, would affect path scanning",
      72              :                 path
      73              :             );
      74            0 :         }
      75            4 :         aux_hash_to_metadata_key(AUX_DIR_PG_LOGICAL, 0xFF, fname.as_bytes())
      76            8 :     } else if let Some(fname) = path.strip_prefix("pg_replslot/") {
      77            4 :         aux_hash_to_metadata_key(AUX_DIR_PG_REPLSLOT, 0x01, fname.as_bytes())
      78              :     } else {
      79            4 :         if cfg!(debug_assertions) {
      80            4 :             warn!(
      81            0 :                 "unsupported aux file type: {}, putting to 0xFFFF, would affect path scanning",
      82              :                 path
      83              :             );
      84            0 :         }
      85            4 :         aux_hash_to_metadata_key(AUX_DIR_PG_UNKNOWN, 0xFF, path.as_bytes())
      86              :     }
      87           34 : }
      88              : 
      89              : const AUX_FILE_ENCODING_VERSION: u8 = 0x01;
      90              : 
      91            6 : pub fn decode_file_value(val: &[u8]) -> anyhow::Result<Vec<(&str, &[u8])>> {
      92            6 :     let mut ptr = val;
      93            6 :     if ptr.is_empty() {
      94              :         // empty value = no files
      95            2 :         return Ok(Vec::new());
      96            4 :     }
      97            4 :     assert_eq!(
      98            4 :         ptr.get_u8(),
      99              :         AUX_FILE_ENCODING_VERSION,
     100            0 :         "unsupported aux file value"
     101              :     );
     102            4 :     let mut files = vec![];
     103           10 :     while ptr.has_remaining() {
     104            6 :         let key_len = ptr.get_u32() as usize;
     105            6 :         let key = &ptr[..key_len];
     106            6 :         ptr.advance(key_len);
     107            6 :         let val_len = ptr.get_u32() as usize;
     108            6 :         let content = &ptr[..val_len];
     109            6 :         ptr.advance(val_len);
     110              : 
     111            6 :         let path = std::str::from_utf8(key)?;
     112            6 :         files.push((path, content));
     113              :     }
     114            4 :     Ok(files)
     115            6 : }
     116              : 
     117              : /// Decode an aux file key-value pair into a list of files. The returned `Bytes` contains reference
     118              : /// to the original value slice. Be cautious about memory consumption.
     119           18 : pub fn decode_file_value_bytes(val: &Bytes) -> anyhow::Result<Vec<(String, Bytes)>> {
     120           18 :     let mut ptr = val.clone();
     121           18 :     if ptr.is_empty() {
     122              :         // empty value = no files
     123            0 :         return Ok(Vec::new());
     124           18 :     }
     125           18 :     assert_eq!(
     126           18 :         ptr.get_u8(),
     127              :         AUX_FILE_ENCODING_VERSION,
     128            0 :         "unsupported aux file value"
     129              :     );
     130           18 :     let mut files = vec![];
     131           36 :     while ptr.has_remaining() {
     132           18 :         let key_len = ptr.get_u32() as usize;
     133           18 :         let key = ptr.slice(..key_len);
     134           18 :         ptr.advance(key_len);
     135           18 :         let val_len = ptr.get_u32() as usize;
     136           18 :         let content = ptr.slice(..val_len);
     137           18 :         ptr.advance(val_len);
     138              : 
     139           18 :         let path = std::str::from_utf8(&key)?.to_string();
     140           18 :         files.push((path, content));
     141              :     }
     142           18 :     Ok(files)
     143           18 : }
     144              : 
     145           14 : pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {
     146           14 :     if files.is_empty() {
     147              :         // no files = empty value
     148            2 :         return Ok(Vec::new());
     149           12 :     }
     150           12 :     let mut encoded = vec![];
     151           12 :     encoded.put_u8(AUX_FILE_ENCODING_VERSION);
     152           26 :     for (path, content) in files {
     153           14 :         if path.len() > u32::MAX as usize {
     154            0 :             anyhow::bail!("{} exceeds path size limit", path);
     155           14 :         }
     156           14 :         encoded.put_u32(path.len() as u32);
     157           14 :         encoded.put_slice(path.as_bytes());
     158           14 :         if content.len() > u32::MAX as usize {
     159            0 :             anyhow::bail!("{} exceeds content size limit", path);
     160           14 :         }
     161           14 :         encoded.put_u32(content.len() as u32);
     162           14 :         encoded.put_slice(content);
     163              :     }
     164           12 :     Ok(encoded)
     165           14 : }
     166              : 
     167              : /// An estimation of the size of aux files.
     168              : pub struct AuxFileSizeEstimator {
     169              :     aux_file_size_gauge: IntGauge,
     170              :     size: Arc<std::sync::Mutex<Option<isize>>>,
     171              : }
     172              : 
     173              : impl AuxFileSizeEstimator {
     174          381 :     pub fn new(aux_file_size_gauge: IntGauge) -> Self {
     175          381 :         Self {
     176          381 :             aux_file_size_gauge,
     177          381 :             size: Arc::new(std::sync::Mutex::new(None)),
     178          381 :         }
     179          381 :     }
     180              : 
     181              :     /// When generating base backup or doing initial logical size calculation
     182           12 :     pub fn on_initial(&self, new_size: usize) {
     183           12 :         let mut guard = self.size.lock().unwrap();
     184           12 :         *guard = Some(new_size as isize);
     185           12 :         self.report(new_size as isize);
     186           12 :     }
     187              : 
     188            8 :     pub fn on_add(&self, file_size: usize) {
     189            8 :         let mut guard = self.size.lock().unwrap();
     190            8 :         if let Some(size) = &mut *guard {
     191            6 :             *size += file_size as isize;
     192            6 :             self.report(*size);
     193            6 :         }
     194            8 :     }
     195              : 
     196            0 :     pub fn on_remove(&self, file_size: usize) {
     197            0 :         let mut guard = self.size.lock().unwrap();
     198            0 :         if let Some(size) = &mut *guard {
     199            0 :             *size -= file_size as isize;
     200            0 :             self.report(*size);
     201            0 :         }
     202            0 :     }
     203              : 
     204            2 :     pub fn on_update(&self, old_size: usize, new_size: usize) {
     205            2 :         let mut guard = self.size.lock().unwrap();
     206            2 :         if let Some(size) = &mut *guard {
     207            2 :             *size += new_size as isize - old_size as isize;
     208            2 :             self.report(*size);
     209            2 :         }
     210            2 :     }
     211              : 
     212           20 :     pub fn report(&self, size: isize) {
     213           20 :         self.aux_file_size_gauge.set(size as i64);
     214           20 :     }
     215              : }
     216              : 
     217              : #[cfg(test)]
     218              : mod tests {
     219              :     use super::*;
     220              : 
     221              :     #[test]
     222            2 :     fn test_hash_portable() {
     223            2 :         // AUX file encoding requires the hash to be portable across all platforms. This test case checks
     224            2 :         // if the algorithm produces the same hash across different environments.
     225            2 : 
     226            2 :         assert_eq!(
     227            2 :             265160408618497461376862998434862070044,
     228            2 :             super::fnv_hash("test1".as_bytes())
     229            2 :         );
     230            2 :         assert_eq!(
     231            2 :             295486155126299629456360817749600553988,
     232            2 :             super::fnv_hash("test/test2".as_bytes())
     233            2 :         );
     234            2 :         assert_eq!(
     235            2 :             144066263297769815596495629667062367629,
     236            2 :             super::fnv_hash("".as_bytes())
     237            2 :         );
     238            2 :     }
     239              : 
     240              :     #[test]
     241            2 :     fn test_encoding_portable() {
     242            2 :         // To correct retrieve AUX files, the generated keys for the same file must be the same for all versions
     243            2 :         // of the page server.
     244            2 :         assert_eq!(
     245            2 :             "62000001017F8B83D94F7081693471ABF91C",
     246            2 :             encode_aux_file_key("pg_logical/mappings/test1").to_string(),
     247            2 :         );
     248            2 :         assert_eq!(
     249            2 :             "62000001027F8E83D94F7081693471ABFCCD",
     250            2 :             encode_aux_file_key("pg_logical/snapshots/test2").to_string(),
     251            2 :         );
     252            2 :         assert_eq!(
     253            2 :             "62000001032E07BB014262B821756295C58D",
     254            2 :             encode_aux_file_key("pg_logical/replorigin_checkpoint").to_string(),
     255            2 :         );
     256            2 :         assert_eq!(
     257            2 :             "62000001FF4F38E1C74754E7D03C1A660178",
     258            2 :             encode_aux_file_key("pg_logical/unsupported").to_string(),
     259            2 :         );
     260            2 :         assert_eq!(
     261            2 :             "62000002017F8D83D94F7081693471ABFB92",
     262            2 :             encode_aux_file_key("pg_replslot/test3").to_string()
     263            2 :         );
     264            2 :         assert_eq!(
     265            2 :             "620000FFFF2B6ECC8AEF93F643DC44F15E03",
     266            2 :             encode_aux_file_key("other_file_not_supported").to_string(),
     267            2 :         );
     268            2 :     }
     269              : 
     270              :     #[test]
     271            2 :     fn test_value_encoding() {
     272            2 :         let files = vec![
     273            2 :             ("pg_logical/1.file", "1111".as_bytes()),
     274            2 :             ("pg_logical/2.file", "2222".as_bytes()),
     275            2 :         ];
     276            2 :         assert_eq!(
     277            2 :             files,
     278            2 :             decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
     279            2 :         );
     280            2 :         let files = vec![];
     281            2 :         assert_eq!(
     282            2 :             files,
     283            2 :             decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
     284            2 :         );
     285            2 :     }
     286              : }
        

Generated by: LCOV version 2.1-beta