LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - key.rs (source / functions) Coverage Total Hit
Test: 32f4a56327bc9da697706839ed4836b2a00a408f.info Lines: 88.6 % 308 273
Test Date: 2024-02-07 07:37:29 Functions: 70.6 % 51 36

            Line data    Source code
       1              : use anyhow::{bail, Result};
       2              : use byteorder::{ByteOrder, BE};
       3              : use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
       4              : use postgres_ffi::{Oid, TransactionId};
       5              : use serde::{Deserialize, Serialize};
       6              : use std::{fmt, ops::Range};
       7              : 
       8              : use crate::reltag::{BlockNumber, RelTag, SlruKind};
       9              : 
      10              : /// Key used in the Repository kv-store.
      11              : ///
      12              : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
      13              : /// for what we actually store in these fields.
      14    258576984 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
      15              : pub struct Key {
      16              :     pub field1: u8,
      17              :     pub field2: u32,
      18              :     pub field3: u32,
      19              :     pub field4: u32,
      20              :     pub field5: u8,
      21              :     pub field6: u32,
      22              : }
      23              : 
      24              : pub const KEY_SIZE: usize = 18;
      25              : 
      26              : impl Key {
      27              :     /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
      28              :     /// As long as Neon does not support tablespace (because of lack of access to local file system),
      29              :     /// we can assume that only some predefined namespace OIDs are used which can fit in u16
      30     97976722 :     pub fn to_i128(&self) -> i128 {
      31     97976722 :         assert!(self.field2 < 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
      32     97976722 :         (((self.field1 & 0xf) as i128) << 120)
      33     97976722 :             | (((self.field2 & 0xFFFF) as i128) << 104)
      34     97976722 :             | ((self.field3 as i128) << 72)
      35     97976722 :             | ((self.field4 as i128) << 40)
      36     97976722 :             | ((self.field5 as i128) << 32)
      37     97976722 :             | self.field6 as i128
      38     97976722 :     }
      39              : 
      40     13674925 :     pub const fn from_i128(x: i128) -> Self {
      41     13674925 :         Key {
      42     13674925 :             field1: ((x >> 120) & 0xf) as u8,
      43     13674925 :             field2: ((x >> 104) & 0xFFFF) as u32,
      44     13674925 :             field3: (x >> 72) as u32,
      45     13674925 :             field4: (x >> 40) as u32,
      46     13674925 :             field5: (x >> 32) as u8,
      47     13674925 :             field6: x as u32,
      48     13674925 :         }
      49     13674925 :     }
      50              : 
      51     20319480 :     pub fn next(&self) -> Key {
      52     20319480 :         self.add(1)
      53     20319480 :     }
      54              : 
      55     20326387 :     pub fn add(&self, x: u32) -> Key {
      56     20326387 :         let mut key = *self;
      57     20326387 : 
      58     20326387 :         let r = key.field6.overflowing_add(x);
      59     20326387 :         key.field6 = r.0;
      60     20326387 :         if r.1 {
      61      1619094 :             let r = key.field5.overflowing_add(1);
      62      1619094 :             key.field5 = r.0;
      63      1619094 :             if r.1 {
      64            0 :                 let r = key.field4.overflowing_add(1);
      65            0 :                 key.field4 = r.0;
      66            0 :                 if r.1 {
      67            0 :                     let r = key.field3.overflowing_add(1);
      68            0 :                     key.field3 = r.0;
      69            0 :                     if r.1 {
      70            0 :                         let r = key.field2.overflowing_add(1);
      71            0 :                         key.field2 = r.0;
      72            0 :                         if r.1 {
      73            0 :                             let r = key.field1.overflowing_add(1);
      74            0 :                             key.field1 = r.0;
      75            0 :                             assert!(!r.1);
      76            0 :                         }
      77            0 :                     }
      78            0 :                 }
      79      1619094 :             }
      80     18707293 :         }
      81     20326387 :         key
      82     20326387 :     }
      83              : 
      84     17514736 :     pub fn from_slice(b: &[u8]) -> Self {
      85     17514736 :         Key {
      86     17514736 :             field1: b[0],
      87     17514736 :             field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
      88     17514736 :             field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
      89     17514736 :             field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
      90     17514736 :             field5: b[13],
      91     17514736 :             field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
      92     17514736 :         }
      93     17514736 :     }
      94              : 
      95     76703151 :     pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
      96     76703151 :         buf[0] = self.field1;
      97     76703151 :         BE::write_u32(&mut buf[1..5], self.field2);
      98     76703151 :         BE::write_u32(&mut buf[5..9], self.field3);
      99     76703151 :         BE::write_u32(&mut buf[9..13], self.field4);
     100     76703151 :         buf[13] = self.field5;
     101     76703151 :         BE::write_u32(&mut buf[14..18], self.field6);
     102     76703151 :     }
     103              : }
     104              : 
     105              : impl fmt::Display for Key {
     106      3220934 :     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
     107      3220934 :         write!(
     108      3220934 :             f,
     109      3220934 :             "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
     110      3220934 :             self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
     111      3220934 :         )
     112      3220934 :     }
     113              : }
     114              : 
     115              : impl Key {
     116              :     pub const MIN: Key = Key {
     117              :         field1: u8::MIN,
     118              :         field2: u32::MIN,
     119              :         field3: u32::MIN,
     120              :         field4: u32::MIN,
     121              :         field5: u8::MIN,
     122              :         field6: u32::MIN,
     123              :     };
     124              :     pub const MAX: Key = Key {
     125              :         field1: u8::MAX,
     126              :         field2: u32::MAX,
     127              :         field3: u32::MAX,
     128              :         field4: u32::MAX,
     129              :         field5: u8::MAX,
     130              :         field6: u32::MAX,
     131              :     };
     132              : 
     133       202480 :     pub fn from_hex(s: &str) -> Result<Self> {
     134       202480 :         if s.len() != 36 {
     135            0 :             bail!("parse error");
     136       202480 :         }
     137       202480 :         Ok(Key {
     138       202480 :             field1: u8::from_str_radix(&s[0..2], 16)?,
     139       202480 :             field2: u32::from_str_radix(&s[2..10], 16)?,
     140       202480 :             field3: u32::from_str_radix(&s[10..18], 16)?,
     141       202480 :             field4: u32::from_str_radix(&s[18..26], 16)?,
     142       202480 :             field5: u8::from_str_radix(&s[26..28], 16)?,
     143       202480 :             field6: u32::from_str_radix(&s[28..36], 16)?,
     144              :         })
     145       202480 :     }
     146              : }
     147              : 
     148              : // Layout of the Key address space
     149              : //
     150              : // The Key struct, used to address the underlying key-value store, consists of
     151              : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
     152              : // all the data and metadata keys into those 18 bytes.
     153              : //
     154              : // Principles for the mapping:
     155              : //
     156              : // - Things that are often accessed or modified together, should be close to
     157              : //   each other in the key space. For example, if a relation is extended by one
     158              : //   block, we create a new key-value pair for the block data, and update the
     159              : //   relation size entry. Because of that, the RelSize key comes after all the
     160              : //   RelBlocks of a relation: the RelSize and the last RelBlock are always next
     161              : //   to each other.
     162              : //
     163              : // The key space is divided into four major sections, identified by the first
     164              : // byte, and the form a hierarchy:
     165              : //
     166              : // 00 Relation data and metadata
     167              : //
     168              : //   DbDir    () -> (dbnode, spcnode)
     169              : //   Filenodemap
     170              : //   RelDir   -> relnode forknum
     171              : //       RelBlocks
     172              : //       RelSize
     173              : //
     174              : // 01 SLRUs
     175              : //
     176              : //   SlruDir  kind
     177              : //   SlruSegBlocks segno
     178              : //   SlruSegSize
     179              : //
     180              : // 02 pg_twophase
     181              : //
     182              : // 03 misc
     183              : //    Controlfile
     184              : //    checkpoint
     185              : //    pg_version
     186              : //
     187              : // 04 aux files
     188              : //
     189              : // Below is a full list of the keyspace allocation:
     190              : //
     191              : // DbDir:
     192              : // 00 00000000 00000000 00000000 00   00000000
     193              : //
     194              : // Filenodemap:
     195              : // 00 SPCNODE  DBNODE   00000000 00   00000000
     196              : //
     197              : // RelDir:
     198              : // 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
     199              : //
     200              : // RelBlock:
     201              : // 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM
     202              : //
     203              : // RelSize:
     204              : // 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
     205              : //
     206              : // SlruDir:
     207              : // 01 kind     00000000 00000000 00   00000000
     208              : //
     209              : // SlruSegBlock:
     210              : // 01 kind     00000001 SEGNO    00   BLKNUM
     211              : //
     212              : // SlruSegSize:
     213              : // 01 kind     00000001 SEGNO    00   FFFFFFFF
     214              : //
     215              : // TwoPhaseDir:
     216              : // 02 00000000 00000000 00000000 00   00000000
     217              : //
     218              : // TwoPhaseFile:
     219              : // 02 00000000 00000000 00000000 00   XID
     220              : //
     221              : // ControlFile:
     222              : // 03 00000000 00000000 00000000 00   00000000
     223              : //
     224              : // Checkpoint:
     225              : // 03 00000000 00000000 00000000 00   00000001
     226              : //
     227              : // AuxFiles:
     228              : // 03 00000000 00000000 00000000 00   00000002
     229              : //
     230              : 
     231              : //-- Section 01: relation data and metadata
     232              : 
     233              : pub const DBDIR_KEY: Key = Key {
     234              :     field1: 0x00,
     235              :     field2: 0,
     236              :     field3: 0,
     237              :     field4: 0,
     238              :     field5: 0,
     239              :     field6: 0,
     240              : };
     241              : 
     242              : #[inline(always)]
     243            3 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
     244            3 :     Key {
     245            3 :         field1: 0x00,
     246            3 :         field2: spcnode,
     247            3 :         field3: dbnode,
     248            3 :         field4: 0,
     249            3 :         field5: 0,
     250            3 :         field6: 0,
     251            3 :     }..Key {
     252            3 :         field1: 0x00,
     253            3 :         field2: spcnode,
     254            3 :         field3: dbnode,
     255            3 :         field4: 0xffffffff,
     256            3 :         field5: 0xff,
     257            3 :         field6: 0xffffffff,
     258            3 :     }
     259            3 : }
     260              : 
     261              : #[inline(always)]
     262         7919 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
     263         7919 :     Key {
     264         7919 :         field1: 0x00,
     265         7919 :         field2: spcnode,
     266         7919 :         field3: dbnode,
     267         7919 :         field4: 0,
     268         7919 :         field5: 0,
     269         7919 :         field6: 0,
     270         7919 :     }
     271         7919 : }
     272              : 
     273              : #[inline(always)]
     274       988533 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
     275       988533 :     Key {
     276       988533 :         field1: 0x00,
     277       988533 :         field2: spcnode,
     278       988533 :         field3: dbnode,
     279       988533 :         field4: 0,
     280       988533 :         field5: 0,
     281       988533 :         field6: 1,
     282       988533 :     }
     283       988533 : }
     284              : 
     285              : #[inline(always)]
     286    131857985 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
     287    131857985 :     Key {
     288    131857985 :         field1: 0x00,
     289    131857985 :         field2: rel.spcnode,
     290    131857985 :         field3: rel.dbnode,
     291    131857985 :         field4: rel.relnode,
     292    131857985 :         field5: rel.forknum,
     293    131857985 :         field6: blknum,
     294    131857985 :     }
     295    131857985 : }
     296              : 
     297              : #[inline(always)]
     298      4324136 : pub fn rel_size_to_key(rel: RelTag) -> Key {
     299      4324136 :     Key {
     300      4324136 :         field1: 0x00,
     301      4324136 :         field2: rel.spcnode,
     302      4324136 :         field3: rel.dbnode,
     303      4324136 :         field4: rel.relnode,
     304      4324136 :         field5: rel.forknum,
     305      4324136 :         field6: 0xffffffff,
     306      4324136 :     }
     307      4324136 : }
     308              : 
     309              : #[inline(always)]
     310        67233 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
     311        67233 :     Key {
     312        67233 :         field1: 0x00,
     313        67233 :         field2: rel.spcnode,
     314        67233 :         field3: rel.dbnode,
     315        67233 :         field4: rel.relnode,
     316        67233 :         field5: rel.forknum,
     317        67233 :         field6: 0,
     318        67233 :     }..Key {
     319        67233 :         field1: 0x00,
     320        67233 :         field2: rel.spcnode,
     321        67233 :         field3: rel.dbnode,
     322        67233 :         field4: rel.relnode,
     323        67233 :         field5: rel.forknum + 1,
     324        67233 :         field6: 0,
     325        67233 :     }
     326        67233 : }
     327              : 
     328              : //-- Section 02: SLRUs
     329              : 
     330              : #[inline(always)]
     331        12882 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
     332        12882 :     Key {
     333        12882 :         field1: 0x01,
     334        12882 :         field2: match kind {
     335         7008 :             SlruKind::Clog => 0x00,
     336         3080 :             SlruKind::MultiXactMembers => 0x01,
     337         2794 :             SlruKind::MultiXactOffsets => 0x02,
     338              :         },
     339              :         field3: 0,
     340              :         field4: 0,
     341              :         field5: 0,
     342              :         field6: 0,
     343              :     }
     344        12882 : }
     345              : 
     346              : #[inline(always)]
     347      6178166 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
     348      6178166 :     Key {
     349      6178166 :         field1: 0x01,
     350      6178166 :         field2: match kind {
     351      6121361 :             SlruKind::Clog => 0x00,
     352        28718 :             SlruKind::MultiXactMembers => 0x01,
     353        28087 :             SlruKind::MultiXactOffsets => 0x02,
     354              :         },
     355              :         field3: 1,
     356      6178166 :         field4: segno,
     357      6178166 :         field5: 0,
     358      6178166 :         field6: blknum,
     359      6178166 :     }
     360      6178166 : }
     361              : 
     362              : #[inline(always)]
     363        11666 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
     364        11666 :     Key {
     365        11666 :         field1: 0x01,
     366        11666 :         field2: match kind {
     367         7152 :             SlruKind::Clog => 0x00,
     368         2554 :             SlruKind::MultiXactMembers => 0x01,
     369         1960 :             SlruKind::MultiXactOffsets => 0x02,
     370              :         },
     371              :         field3: 1,
     372        11666 :         field4: segno,
     373        11666 :         field5: 0,
     374        11666 :         field6: 0xffffffff,
     375        11666 :     }
     376        11666 : }
     377              : 
     378              : #[inline(always)]
     379           10 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
     380           10 :     let field2 = match kind {
     381           10 :         SlruKind::Clog => 0x00,
     382            0 :         SlruKind::MultiXactMembers => 0x01,
     383            0 :         SlruKind::MultiXactOffsets => 0x02,
     384              :     };
     385              : 
     386           10 :     Key {
     387           10 :         field1: 0x01,
     388           10 :         field2,
     389           10 :         field3: 1,
     390           10 :         field4: segno,
     391           10 :         field5: 0,
     392           10 :         field6: 0,
     393           10 :     }..Key {
     394           10 :         field1: 0x01,
     395           10 :         field2,
     396           10 :         field3: 1,
     397           10 :         field4: segno,
     398           10 :         field5: 1,
     399           10 :         field6: 0,
     400           10 :     }
     401           10 : }
     402              : 
     403              : //-- Section 03: pg_twophase
     404              : 
     405              : pub const TWOPHASEDIR_KEY: Key = Key {
     406              :     field1: 0x02,
     407              :     field2: 0,
     408              :     field3: 0,
     409              :     field4: 0,
     410              :     field5: 0,
     411              :     field6: 0,
     412              : };
     413              : 
     414              : #[inline(always)]
     415            6 : pub fn twophase_file_key(xid: TransactionId) -> Key {
     416            6 :     Key {
     417            6 :         field1: 0x02,
     418            6 :         field2: 0,
     419            6 :         field3: 0,
     420            6 :         field4: 0,
     421            6 :         field5: 0,
     422            6 :         field6: xid,
     423            6 :     }
     424            6 : }
     425              : 
     426              : #[inline(always)]
     427            2 : pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
     428            2 :     let (next_xid, overflowed) = xid.overflowing_add(1);
     429            2 : 
     430            2 :     Key {
     431            2 :         field1: 0x02,
     432            2 :         field2: 0,
     433            2 :         field3: 0,
     434            2 :         field4: 0,
     435            2 :         field5: 0,
     436            2 :         field6: xid,
     437            2 :     }..Key {
     438            2 :         field1: 0x02,
     439            2 :         field2: 0,
     440            2 :         field3: 0,
     441            2 :         field4: 0,
     442            2 :         field5: u8::from(overflowed),
     443            2 :         field6: next_xid,
     444            2 :     }
     445            2 : }
     446              : 
     447              : //-- Section 03: Control file
     448              : pub const CONTROLFILE_KEY: Key = Key {
     449              :     field1: 0x03,
     450              :     field2: 0,
     451              :     field3: 0,
     452              :     field4: 0,
     453              :     field5: 0,
     454              :     field6: 0,
     455              : };
     456              : 
     457              : pub const CHECKPOINT_KEY: Key = Key {
     458              :     field1: 0x03,
     459              :     field2: 0,
     460              :     field3: 0,
     461              :     field4: 0,
     462              :     field5: 0,
     463              :     field6: 1,
     464              : };
     465              : 
     466              : pub const AUX_FILES_KEY: Key = Key {
     467              :     field1: 0x03,
     468              :     field2: 0,
     469              :     field3: 0,
     470              :     field4: 0,
     471              :     field5: 0,
     472              :     field6: 2,
     473              : };
     474              : 
     475              : // Reverse mappings for a few Keys.
     476              : // These are needed by WAL redo manager.
     477              : 
     478              : // AUX_FILES currently stores only data for logical replication (slots etc), and
     479              : // we don't preserve these on a branch because safekeepers can't follow timeline
     480              : // switch (and generally it likely should be optional), so ignore these.
     481              : #[inline(always)]
     482     32334675 : pub fn is_inherited_key(key: Key) -> bool {
     483     32334675 :     key != AUX_FILES_KEY
     484     32334675 : }
     485              : 
     486              : #[inline(always)]
     487            0 : pub fn is_rel_fsm_block_key(key: Key) -> bool {
     488            0 :     key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
     489            0 : }
     490              : 
     491              : #[inline(always)]
     492            0 : pub fn is_rel_vm_block_key(key: Key) -> bool {
     493            0 :     key.field1 == 0x00
     494            0 :         && key.field4 != 0
     495            0 :         && key.field5 == VISIBILITYMAP_FORKNUM
     496            0 :         && key.field6 != 0xffffffff
     497            0 : }
     498              : 
     499              : #[inline(always)]
     500     18923991 : pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
     501     18923991 :     Ok(match key.field1 {
     502              :         0x01 => {
     503     18923991 :             let kind = match key.field2 {
     504     18826309 :                 0x00 => SlruKind::Clog,
     505        49392 :                 0x01 => SlruKind::MultiXactMembers,
     506        48290 :                 0x02 => SlruKind::MultiXactOffsets,
     507            0 :                 _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
     508              :             };
     509     18923991 :             let segno = key.field4;
     510     18923991 :             let blknum = key.field6;
     511     18923991 : 
     512     18923991 :             (kind, segno, blknum)
     513              :         }
     514            0 :         _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
     515              :     })
     516     18923991 : }
     517              : 
     518              : #[inline(always)]
     519            0 : pub fn is_slru_block_key(key: Key) -> bool {
     520            0 :     key.field1 == 0x01                // SLRU-related
     521            0 :         && key.field3 == 0x00000001   // but not SlruDir
     522            0 :         && key.field6 != 0xffffffff // and not SlruSegSize
     523            0 : }
     524              : 
     525              : #[inline(always)]
     526     65237179 : pub fn is_rel_block_key(key: &Key) -> bool {
     527     65237179 :     key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
     528     65237179 : }
     529              : 
     530              : /// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
     531              : #[inline(always)]
     532      2861198 : pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
     533      2861198 :     Ok(match key.field1 {
     534      2861198 :         0x00 => (
     535      2861198 :             RelTag {
     536      2861198 :                 spcnode: key.field2,
     537      2861198 :                 dbnode: key.field3,
     538      2861198 :                 relnode: key.field4,
     539      2861198 :                 forknum: key.field5,
     540      2861198 :             },
     541      2861198 :             key.field6,
     542      2861198 :         ),
     543            0 :         _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
     544              :     })
     545      2861198 : }
     546              : 
     547              : impl std::str::FromStr for Key {
     548              :     type Err = anyhow::Error;
     549              : 
     550           14 :     fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
     551           14 :         Self::from_hex(s)
     552           14 :     }
     553              : }
     554              : 
     555              : #[cfg(test)]
     556              : mod tests {
     557              :     use std::str::FromStr;
     558              : 
     559              :     use crate::key::Key;
     560              : 
     561              :     use rand::Rng;
     562              :     use rand::SeedableRng;
     563              : 
     564            2 :     #[test]
     565            2 :     fn display_fromstr_bijection() {
     566            2 :         let mut rng = rand::rngs::StdRng::seed_from_u64(42);
     567            2 : 
     568            2 :         let key = Key {
     569            2 :             field1: rng.gen(),
     570            2 :             field2: rng.gen(),
     571            2 :             field3: rng.gen(),
     572            2 :             field4: rng.gen(),
     573            2 :             field5: rng.gen(),
     574            2 :             field6: rng.gen(),
     575            2 :         };
     576            2 : 
     577            2 :         assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
     578            2 :     }
     579              : }
        

Generated by: LCOV version 2.1-beta