Line data Source code
1 : use std::fmt;
2 : use std::ops::Range;
3 :
4 : use anyhow::{Result, bail};
5 : use byteorder::{BE, ByteOrder};
6 : use bytes::Bytes;
7 : use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
8 : use postgres_ffi::{Oid, RepOriginId};
9 : use serde::{Deserialize, Serialize};
10 : use utils::const_assert;
11 :
12 : use crate::reltag::{BlockNumber, RelTag, SlruKind};
13 :
14 : /// Key used in the Repository kv-store.
15 : ///
16 : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
17 : /// for what we actually store in these fields.
18 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
19 : pub struct Key {
20 : pub field1: u8,
21 : pub field2: u32,
22 : pub field3: u32,
23 : pub field4: u32,
24 : pub field5: u8,
25 : pub field6: u32,
26 : }
27 :
28 : /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
29 : /// a struct of fields.
30 : #[derive(
31 0 : Clone, Copy, Default, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug,
32 : )]
33 : pub struct CompactKey(i128);
34 :
35 : /// The storage key size.
36 : pub const KEY_SIZE: usize = 18;
37 :
38 : /// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.
39 : /// See [`Key::to_i128`] for more information on the encoding.
40 : pub const METADATA_KEY_SIZE: usize = 16;
41 :
42 : /// The key prefix start range for the metadata keys. All keys with the first byte >= 0x60 is a metadata key.
43 : pub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x60;
44 : pub const METADATA_KEY_END_PREFIX: u8 = 0x7F;
45 :
46 : /// The (reserved) key prefix of relation sizes.
47 : pub const RELATION_SIZE_PREFIX: u8 = 0x61;
48 :
49 : /// The key prefix of AUX file keys.
50 : pub const AUX_KEY_PREFIX: u8 = 0x62;
51 :
52 : /// The key prefix of ReplOrigin keys.
53 : pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
54 :
55 : /// The key prefix of db directory keys.
56 : pub const DB_DIR_KEY_PREFIX: u8 = 0x64;
57 :
58 : /// The key prefix of rel directory keys.
59 : pub const REL_DIR_KEY_PREFIX: u8 = 0x65;
60 :
61 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
62 : pub enum RelDirExists {
63 : Exists,
64 : Removed,
65 : }
66 :
67 : #[derive(Debug)]
68 : pub struct DecodeError;
69 :
70 : impl fmt::Display for DecodeError {
71 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 0 : write!(f, "invalid marker")
73 0 : }
74 : }
75 :
76 : impl std::error::Error for DecodeError {}
77 :
78 : impl RelDirExists {
79 : /// The value of the rel directory keys that indicates the existence of a relation.
80 : const REL_EXISTS_MARKER: Bytes = Bytes::from_static(b"r");
81 :
82 0 : pub fn encode(&self) -> Bytes {
83 0 : match self {
84 0 : Self::Exists => Self::REL_EXISTS_MARKER.clone(),
85 0 : Self::Removed => SPARSE_TOMBSTONE_MARKER.clone(),
86 : }
87 0 : }
88 :
89 0 : pub fn decode_option(data: Option<impl AsRef<[u8]>>) -> Result<Self, DecodeError> {
90 0 : match data {
91 0 : Some(marker) if marker.as_ref() == Self::REL_EXISTS_MARKER => Ok(Self::Exists),
92 : // Any other marker is invalid
93 0 : Some(_) => Err(DecodeError),
94 0 : None => Ok(Self::Removed),
95 : }
96 0 : }
97 :
98 0 : pub fn decode(data: impl AsRef<[u8]>) -> Result<Self, DecodeError> {
99 0 : let data = data.as_ref();
100 0 : if data == Self::REL_EXISTS_MARKER {
101 0 : Ok(Self::Exists)
102 0 : } else if data == SPARSE_TOMBSTONE_MARKER {
103 0 : Ok(Self::Removed)
104 : } else {
105 0 : Err(DecodeError)
106 : }
107 0 : }
108 : }
109 :
110 : /// A tombstone in the sparse keyspace, which is an empty buffer.
111 : pub const SPARSE_TOMBSTONE_MARKER: Bytes = Bytes::from_static(b"");
112 :
113 : /// Check if the key falls in the range of metadata keys.
114 64 : pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
115 64 : key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
116 64 : }
117 :
118 : impl Key {
119 : /// Check if the key falls in the range of metadata keys.
120 133 : pub const fn is_metadata_key(&self) -> bool {
121 133 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
122 133 : }
123 :
124 : /// Encode a metadata key to a storage key.
125 63 : pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
126 63 : assert!(is_metadata_key_slice(key), "key not in metadata key range");
127 : // Metadata key space ends at 0x7F so it's fine to directly convert it to i128.
128 63 : Self::from_i128(i128::from_be_bytes(*key))
129 63 : }
130 :
131 : /// Encode a metadata key to a storage key.
132 1 : pub fn from_metadata_key(key: &[u8]) -> Self {
133 1 : Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
134 1 : }
135 :
136 : /// Get the range of metadata keys.
137 2560 : pub const fn metadata_key_range() -> Range<Self> {
138 2560 : Key {
139 2560 : field1: METADATA_KEY_BEGIN_PREFIX,
140 2560 : field2: 0,
141 2560 : field3: 0,
142 2560 : field4: 0,
143 2560 : field5: 0,
144 2560 : field6: 0,
145 2560 : }..Key {
146 2560 : field1: METADATA_KEY_END_PREFIX,
147 2560 : field2: 0,
148 2560 : field3: 0,
149 2560 : field4: 0,
150 2560 : field5: 0,
151 2560 : field6: 0,
152 2560 : }
153 2560 : }
154 :
155 : /// Get the range of aux keys.
156 676 : pub fn metadata_aux_key_range() -> Range<Self> {
157 676 : Key {
158 676 : field1: AUX_KEY_PREFIX,
159 676 : field2: 0,
160 676 : field3: 0,
161 676 : field4: 0,
162 676 : field5: 0,
163 676 : field6: 0,
164 676 : }..Key {
165 676 : field1: AUX_KEY_PREFIX + 1,
166 676 : field2: 0,
167 676 : field3: 0,
168 676 : field4: 0,
169 676 : field5: 0,
170 676 : field6: 0,
171 676 : }
172 676 : }
173 :
174 652 : pub fn rel_dir_sparse_key_range() -> Range<Self> {
175 652 : Key {
176 652 : field1: REL_DIR_KEY_PREFIX,
177 652 : field2: 0,
178 652 : field3: 0,
179 652 : field4: 0,
180 652 : field5: 0,
181 652 : field6: 0,
182 652 : }..Key {
183 652 : field1: REL_DIR_KEY_PREFIX + 1,
184 652 : field2: 0,
185 652 : field3: 0,
186 652 : field4: 0,
187 652 : field5: 0,
188 652 : field6: 0,
189 652 : }
190 652 : }
191 :
192 : /// This function checks more extensively what keys we can take on the write path.
193 : /// If a key beginning with 00 does not have a global/default tablespace OID, it
194 : /// will be rejected on the write path.
195 : #[allow(dead_code)]
196 0 : pub fn is_valid_key_on_write_path_strong(&self) -> bool {
197 : use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
198 0 : if !self.is_i128_representable() {
199 0 : return false;
200 0 : }
201 0 : if self.field1 == 0
202 0 : && !(self.field2 == GLOBALTABLESPACE_OID
203 0 : || self.field2 == DEFAULTTABLESPACE_OID
204 0 : || self.field2 == 0)
205 : {
206 0 : return false; // User defined tablespaces are not supported
207 0 : }
208 0 : true
209 0 : }
210 :
211 : /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply
212 : /// checks if the key is i128 representable. Note that some keys can be successfully
213 : /// ingested into the pageserver, but will cause errors on generating basebackup.
214 9633289 : pub fn is_valid_key_on_write_path(&self) -> bool {
215 9633289 : self.is_i128_representable()
216 9633289 : }
217 :
218 35244029 : pub fn is_i128_representable(&self) -> bool {
219 35244029 : self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222
220 35244029 : }
221 :
222 : /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
223 : /// As long as Neon does not support tablespace (because of lack of access to local file system),
224 : /// we can assume that only some predefined namespace OIDs are used which can fit in u16
225 25610740 : pub fn to_i128(&self) -> i128 {
226 25610740 : assert!(self.is_i128_representable(), "invalid key: {self}");
227 25610740 : (((self.field1 & 0x7F) as i128) << 120)
228 25610740 : | (((self.field2 & 0xFFFF) as i128) << 104)
229 25610740 : | ((self.field3 as i128) << 72)
230 25610740 : | ((self.field4 as i128) << 40)
231 25610740 : | ((self.field5 as i128) << 32)
232 25610740 : | self.field6 as i128
233 25610740 : }
234 :
235 23273891 : pub const fn from_i128(x: i128) -> Self {
236 23273891 : Key {
237 23273891 : field1: ((x >> 120) & 0x7F) as u8,
238 23273891 : field2: ((x >> 104) & 0xFFFF) as u32,
239 23273891 : field3: (x >> 72) as u32,
240 23273891 : field4: (x >> 40) as u32,
241 23273891 : field5: (x >> 32) as u8,
242 23273891 : field6: x as u32,
243 23273891 : }
244 23273891 : }
245 :
246 13754922 : pub fn to_compact(&self) -> CompactKey {
247 13754922 : CompactKey(self.to_i128())
248 13754922 : }
249 :
250 20242745 : pub fn from_compact(k: CompactKey) -> Self {
251 20242745 : Self::from_i128(k.0)
252 20242745 : }
253 :
254 16628927 : pub const fn next(&self) -> Key {
255 16628927 : self.add(1)
256 16628927 : }
257 :
258 16641472 : pub const fn add(&self, x: u32) -> Key {
259 16641472 : let mut key = *self;
260 16641472 :
261 16641472 : let r = key.field6.overflowing_add(x);
262 16641472 : key.field6 = r.0;
263 16641472 : if r.1 {
264 1102809 : let r = key.field5.overflowing_add(1);
265 1102809 : key.field5 = r.0;
266 1102809 : if r.1 {
267 0 : let r = key.field4.overflowing_add(1);
268 0 : key.field4 = r.0;
269 0 : if r.1 {
270 0 : let r = key.field3.overflowing_add(1);
271 0 : key.field3 = r.0;
272 0 : if r.1 {
273 0 : let r = key.field2.overflowing_add(1);
274 0 : key.field2 = r.0;
275 0 : if r.1 {
276 0 : let r = key.field1.overflowing_add(1);
277 0 : key.field1 = r.0;
278 0 : assert!(!r.1);
279 0 : }
280 0 : }
281 0 : }
282 1102809 : }
283 15538663 : }
284 16641472 : key
285 16641472 : }
286 :
287 : /// Convert a 18B slice to a key. This function should not be used for 16B metadata keys because `field2` is handled differently.
288 : /// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys). There are some restrictions on `field2`,
289 : /// and therefore not all 18B slices are valid page server keys.
290 11888388 : pub fn from_slice(b: &[u8]) -> Self {
291 11888388 : Key {
292 11888388 : field1: b[0],
293 11888388 : field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
294 11888388 : field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
295 11888388 : field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
296 11888388 : field5: b[13],
297 11888388 : field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
298 11888388 : }
299 11888388 : }
300 :
301 : /// Convert a key to a 18B slice. This function should not be used for getting a 16B metadata key because `field2` is handled differently.
302 : /// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).
303 14516475 : pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
304 14516475 : buf[0] = self.field1;
305 14516475 : BE::write_u32(&mut buf[1..5], self.field2);
306 14516475 : BE::write_u32(&mut buf[5..9], self.field3);
307 14516475 : BE::write_u32(&mut buf[9..13], self.field4);
308 14516475 : buf[13] = self.field5;
309 14516475 : BE::write_u32(&mut buf[14..18], self.field6);
310 14516475 : }
311 : }
312 :
313 : impl CompactKey {
314 10 : pub fn raw(&self) -> i128 {
315 10 : self.0
316 10 : }
317 : }
318 :
319 : impl From<i128> for CompactKey {
320 5 : fn from(value: i128) -> Self {
321 5 : Self(value)
322 5 : }
323 : }
324 :
325 : impl fmt::Display for Key {
326 759263 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
327 759263 : write!(
328 759263 : f,
329 759263 : "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
330 759263 : self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
331 759263 : )
332 759263 : }
333 : }
334 :
335 : impl fmt::Display for CompactKey {
336 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
337 0 : let k = Key::from_compact(*self);
338 0 : k.fmt(f)
339 0 : }
340 : }
341 :
342 : impl Key {
343 : pub const MIN: Key = Key {
344 : field1: u8::MIN,
345 : field2: u32::MIN,
346 : field3: u32::MIN,
347 : field4: u32::MIN,
348 : field5: u8::MIN,
349 : field6: u32::MIN,
350 : };
351 : pub const MAX: Key = Key {
352 : field1: u8::MAX,
353 : field2: u32::MAX,
354 : field3: u32::MAX,
355 : field4: u32::MAX,
356 : field5: u8::MAX,
357 : field6: u32::MAX,
358 : };
359 :
360 80517 : pub fn from_hex(s: &str) -> Result<Self> {
361 80517 : if s.len() != 36 {
362 4 : bail!("parse error");
363 80513 : }
364 80513 : Ok(Key {
365 80513 : field1: u8::from_str_radix(&s[0..2], 16)?,
366 80513 : field2: u32::from_str_radix(&s[2..10], 16)?,
367 80513 : field3: u32::from_str_radix(&s[10..18], 16)?,
368 80513 : field4: u32::from_str_radix(&s[18..26], 16)?,
369 80513 : field5: u8::from_str_radix(&s[26..28], 16)?,
370 80513 : field6: u32::from_str_radix(&s[28..36], 16)?,
371 : })
372 80517 : }
373 : }
374 :
375 : // Layout of the Key address space
376 : //
377 : // The Key struct, used to address the underlying key-value store, consists of
378 : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
379 : // all the data and metadata keys into those 18 bytes.
380 : //
381 : // Principles for the mapping:
382 : //
383 : // - Things that are often accessed or modified together, should be close to
384 : // each other in the key space. For example, if a relation is extended by one
385 : // block, we create a new key-value pair for the block data, and update the
386 : // relation size entry. Because of that, the RelSize key comes after all the
387 : // RelBlocks of a relation: the RelSize and the last RelBlock are always next
388 : // to each other.
389 : //
390 : // The key space is divided into four major sections, identified by the first
391 : // byte, and the form a hierarchy:
392 : //
393 : // 00 Relation data and metadata
394 : //
395 : // DbDir () -> (dbnode, spcnode)
396 : // Filenodemap
397 : // RelDir -> relnode forknum
398 : // RelBlocks
399 : // RelSize
400 : //
401 : // 01 SLRUs
402 : //
403 : // SlruDir kind
404 : // SlruSegBlocks segno
405 : // SlruSegSize
406 : //
407 : // 02 pg_twophase
408 : //
409 : // 03 misc
410 : // Controlfile
411 : // checkpoint
412 : // pg_version
413 : //
414 : // 04 aux files
415 : //
416 : // Below is a full list of the keyspace allocation:
417 : //
418 : // DbDir:
419 : // 00 00000000 00000000 00000000 00 00000000
420 : //
421 : // Filenodemap:
422 : // 00 SPCNODE DBNODE 00000000 00 00000000
423 : //
424 : // RelDir:
425 : // 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
426 : //
427 : // RelBlock:
428 : // 00 SPCNODE DBNODE RELNODE FORK BLKNUM
429 : //
430 : // RelSize:
431 : // 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
432 : //
433 : // SlruDir:
434 : // 01 kind 00000000 00000000 00 00000000
435 : //
436 : // SlruSegBlock:
437 : // 01 kind 00000001 SEGNO 00 BLKNUM
438 : //
439 : // SlruSegSize:
440 : // 01 kind 00000001 SEGNO 00 FFFFFFFF
441 : //
442 : // TwoPhaseDir:
443 : // 02 00000000 00000000 00000000 00 00000000
444 : //
445 : // TwoPhaseFile:
446 : //
447 : // 02 00000000 00000000 00XXXXXX XX XXXXXXXX
448 : //
449 : // \______XID_________/
450 : //
451 : // The 64-bit XID is stored a little awkwardly in field6, field5 and
452 : // field4. PostgreSQL v16 and below only stored a 32-bit XID, which
453 : // fit completely in field6, but starting with PostgreSQL v17, a full
454 : // 64-bit XID is used. Most pageserver code that accesses
455 : // TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
456 : // are just unused.
457 : //
458 : // ControlFile:
459 : // 03 00000000 00000000 00000000 00 00000000
460 : //
461 : // Checkpoint:
462 : // 03 00000000 00000000 00000000 00 00000001
463 : //
464 : // AuxFiles:
465 : // 03 00000000 00000000 00000000 00 00000002
466 : //
467 :
468 : //-- Section 01: relation data and metadata
469 :
470 : pub const DBDIR_KEY: Key = Key {
471 : field1: 0x00,
472 : field2: 0,
473 : field3: 0,
474 : field4: 0,
475 : field5: 0,
476 : field6: 0,
477 : };
478 :
479 : #[inline(always)]
480 0 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
481 0 : Key {
482 0 : field1: 0x00,
483 0 : field2: spcnode,
484 0 : field3: dbnode,
485 0 : field4: 0,
486 0 : field5: 0,
487 0 : field6: 0,
488 0 : }..Key {
489 0 : field1: 0x00,
490 0 : field2: spcnode,
491 0 : field3: dbnode,
492 0 : field4: 0xffffffff,
493 0 : field5: 0xff,
494 0 : field6: 0xffffffff,
495 0 : }
496 0 : }
497 :
498 : #[inline(always)]
499 32 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
500 32 : Key {
501 32 : field1: 0x00,
502 32 : field2: spcnode,
503 32 : field3: dbnode,
504 32 : field4: 0,
505 32 : field5: 0,
506 32 : field6: 0,
507 32 : }
508 32 : }
509 :
510 : #[inline(always)]
511 3896 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
512 3896 : Key {
513 3896 : field1: 0x00,
514 3896 : field2: spcnode,
515 3896 : field3: dbnode,
516 3896 : field4: 0,
517 3896 : field5: 0,
518 3896 : field6: 1,
519 3896 : }
520 3896 : }
521 :
522 : #[inline(always)]
523 0 : pub fn rel_tag_sparse_key(spcnode: Oid, dbnode: Oid, relnode: Oid, forknum: u8) -> Key {
524 0 : Key {
525 0 : field1: REL_DIR_KEY_PREFIX,
526 0 : field2: spcnode,
527 0 : field3: dbnode,
528 0 : field4: relnode,
529 0 : field5: forknum,
530 0 : field6: 1,
531 0 : }
532 0 : }
533 :
534 0 : pub fn rel_tag_sparse_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
535 0 : Key {
536 0 : field1: REL_DIR_KEY_PREFIX,
537 0 : field2: spcnode,
538 0 : field3: dbnode,
539 0 : field4: 0,
540 0 : field5: 0,
541 0 : field6: 0,
542 0 : }..Key {
543 0 : field1: REL_DIR_KEY_PREFIX,
544 0 : field2: spcnode,
545 0 : field3: dbnode,
546 0 : field4: u32::MAX,
547 0 : field5: u8::MAX,
548 0 : field6: u32::MAX,
549 0 : } // it's fine to exclude the last key b/c we only use field6 == 1
550 0 : }
551 :
552 : #[inline(always)]
553 2583814 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
554 2583814 : Key {
555 2583814 : field1: 0x00,
556 2583814 : field2: rel.spcnode,
557 2583814 : field3: rel.dbnode,
558 2583814 : field4: rel.relnode,
559 2583814 : field5: rel.forknum,
560 2583814 : field6: blknum,
561 2583814 : }
562 2583814 : }
563 :
564 : #[inline(always)]
565 579508 : pub fn rel_size_to_key(rel: RelTag) -> Key {
566 579508 : Key {
567 579508 : field1: 0x00,
568 579508 : field2: rel.spcnode,
569 579508 : field3: rel.dbnode,
570 579508 : field4: rel.relnode,
571 579508 : field5: rel.forknum,
572 579508 : field6: 0xffff_ffff,
573 579508 : }
574 579508 : }
575 :
576 : impl Key {
577 : #[inline(always)]
578 5 : pub fn is_rel_size_key(&self) -> bool {
579 5 : self.field1 == 0 && self.field6 == u32::MAX
580 5 : }
581 : }
582 :
583 : #[inline(always)]
584 4 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
585 4 : Key {
586 4 : field1: 0x00,
587 4 : field2: rel.spcnode,
588 4 : field3: rel.dbnode,
589 4 : field4: rel.relnode,
590 4 : field5: rel.forknum,
591 4 : field6: 0,
592 4 : }..Key {
593 4 : field1: 0x00,
594 4 : field2: rel.spcnode,
595 4 : field3: rel.dbnode,
596 4 : field4: rel.relnode,
597 4 : field5: rel.forknum + 1,
598 4 : field6: 0,
599 4 : }
600 4 : }
601 :
602 : //-- Section 02: SLRUs
603 :
604 : #[inline(always)]
605 3168 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
606 3168 : Key {
607 3168 : field1: 0x01,
608 3168 : field2: match kind {
609 1056 : SlruKind::Clog => 0x00,
610 1056 : SlruKind::MultiXactMembers => 0x01,
611 1056 : SlruKind::MultiXactOffsets => 0x02,
612 : },
613 : field3: 0,
614 : field4: 0,
615 : field5: 0,
616 : field6: 0,
617 : }
618 : }
619 :
620 : #[inline(always)]
621 2098633 : pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
622 2098633 : if key.field1 == 0x01
623 0 : && key.field3 == 0
624 0 : && key.field4 == 0
625 0 : && key.field5 == 0
626 0 : && key.field6 == 0
627 : {
628 0 : match key.field2 {
629 0 : 0 => Some(Ok(SlruKind::Clog)),
630 0 : 1 => Some(Ok(SlruKind::MultiXactMembers)),
631 0 : 2 => Some(Ok(SlruKind::MultiXactOffsets)),
632 0 : x => Some(Err(x)),
633 : }
634 : } else {
635 2098633 : None
636 : }
637 2098633 : }
638 :
639 : #[inline(always)]
640 28 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
641 28 : Key {
642 28 : field1: 0x01,
643 28 : field2: match kind {
644 20 : SlruKind::Clog => 0x00,
645 4 : SlruKind::MultiXactMembers => 0x01,
646 4 : SlruKind::MultiXactOffsets => 0x02,
647 : },
648 : field3: 1,
649 28 : field4: segno,
650 28 : field5: 0,
651 28 : field6: blknum,
652 28 : }
653 28 : }
654 :
655 : #[inline(always)]
656 12 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
657 12 : Key {
658 12 : field1: 0x01,
659 12 : field2: match kind {
660 4 : SlruKind::Clog => 0x00,
661 4 : SlruKind::MultiXactMembers => 0x01,
662 4 : SlruKind::MultiXactOffsets => 0x02,
663 : },
664 : field3: 1,
665 12 : field4: segno,
666 12 : field5: 0,
667 12 : field6: 0xffff_ffff,
668 12 : }
669 12 : }
670 :
671 : impl Key {
672 2098633 : pub fn is_slru_segment_size_key(&self) -> bool {
673 2098633 : self.field1 == 0x01
674 0 : && self.field2 < 0x03
675 0 : && self.field3 == 0x01
676 0 : && self.field5 == 0
677 0 : && self.field6 == u32::MAX
678 2098633 : }
679 :
680 2098633 : pub fn is_slru_dir_key(&self) -> bool {
681 2098633 : slru_dir_kind(self).is_some()
682 2098633 : }
683 : }
684 :
685 : #[inline(always)]
686 0 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
687 0 : let field2 = match kind {
688 0 : SlruKind::Clog => 0x00,
689 0 : SlruKind::MultiXactMembers => 0x01,
690 0 : SlruKind::MultiXactOffsets => 0x02,
691 : };
692 :
693 0 : Key {
694 0 : field1: 0x01,
695 0 : field2,
696 0 : field3: 1,
697 0 : field4: segno,
698 0 : field5: 0,
699 0 : field6: 0,
700 0 : }..Key {
701 0 : field1: 0x01,
702 0 : field2,
703 0 : field3: 1,
704 0 : field4: segno,
705 0 : field5: 1,
706 0 : field6: 0,
707 0 : }
708 0 : }
709 :
710 : //-- Section 03: pg_twophase
711 :
712 : pub const TWOPHASEDIR_KEY: Key = Key {
713 : field1: 0x02,
714 : field2: 0,
715 : field3: 0,
716 : field4: 0,
717 : field5: 0,
718 : field6: 0,
719 : };
720 :
721 : #[inline(always)]
722 0 : pub fn twophase_file_key(xid: u64) -> Key {
723 0 : Key {
724 0 : field1: 0x02,
725 0 : field2: 0,
726 0 : field3: 0,
727 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
728 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
729 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
730 0 : }
731 0 : }
732 :
733 : #[inline(always)]
734 0 : pub fn twophase_key_range(xid: u64) -> Range<Key> {
735 0 : // 64-bit XIDs really should not overflow
736 0 : let (next_xid, overflowed) = xid.overflowing_add(1);
737 0 :
738 0 : Key {
739 0 : field1: 0x02,
740 0 : field2: 0,
741 0 : field3: 0,
742 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
743 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
744 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
745 0 : }..Key {
746 0 : field1: 0x02,
747 0 : field2: 0,
748 0 : field3: u32::from(overflowed),
749 0 : field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
750 0 : field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
751 0 : field6: (next_xid & 0x00000000FFFFFFFF) as u32,
752 0 : }
753 0 : }
754 :
755 : //-- Section 03: Control file
756 : pub const CONTROLFILE_KEY: Key = Key {
757 : field1: 0x03,
758 : field2: 0,
759 : field3: 0,
760 : field4: 0,
761 : field5: 0,
762 : field6: 0,
763 : };
764 :
765 : pub const CHECKPOINT_KEY: Key = Key {
766 : field1: 0x03,
767 : field2: 0,
768 : field3: 0,
769 : field4: 0,
770 : field5: 0,
771 : field6: 1,
772 : };
773 :
774 : pub const AUX_FILES_KEY: Key = Key {
775 : field1: 0x03,
776 : field2: 0,
777 : field3: 0,
778 : field4: 0,
779 : field5: 0,
780 : field6: 2,
781 : };
782 :
783 : #[inline(always)]
784 0 : pub fn repl_origin_key(origin_id: RepOriginId) -> Key {
785 0 : Key {
786 0 : field1: REPL_ORIGIN_KEY_PREFIX,
787 0 : field2: 0,
788 0 : field3: 0,
789 0 : field4: 0,
790 0 : field5: 0,
791 0 : field6: origin_id as u32,
792 0 : }
793 0 : }
794 :
795 : /// Get the range of replorigin keys.
796 652 : pub fn repl_origin_key_range() -> Range<Key> {
797 652 : Key {
798 652 : field1: REPL_ORIGIN_KEY_PREFIX,
799 652 : field2: 0,
800 652 : field3: 0,
801 652 : field4: 0,
802 652 : field5: 0,
803 652 : field6: 0,
804 652 : }..Key {
805 652 : field1: REPL_ORIGIN_KEY_PREFIX,
806 652 : field2: 0,
807 652 : field3: 0,
808 652 : field4: 0,
809 652 : field5: 0,
810 652 : field6: 0x10000,
811 652 : }
812 652 : }
813 :
814 : // Reverse mappings for a few Keys.
815 : // These are needed by WAL redo manager.
816 :
817 : /// Non inherited range for vectored get.
818 : pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
819 : /// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
820 : pub const SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
821 :
822 : impl Key {
823 : // AUX_FILES currently stores only data for logical replication (slots etc), and
824 : // we don't preserve these on a branch because safekeepers can't follow timeline
825 : // switch (and generally it likely should be optional), so ignore these.
826 : #[inline(always)]
827 0 : pub fn is_inherited_key(self) -> bool {
828 0 : if self.is_sparse() {
829 0 : self.is_inherited_sparse_key()
830 : } else {
831 0 : !NON_INHERITED_RANGE.contains(&self)
832 : }
833 : }
834 :
835 : #[inline(always)]
836 1482369 : pub fn is_sparse(self) -> bool {
837 1482369 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
838 1482369 : }
839 :
840 : /// Check if the key belongs to the inherited keyspace.
841 0 : fn is_inherited_sparse_key(self) -> bool {
842 0 : debug_assert!(self.is_sparse());
843 0 : self.field1 == RELATION_SIZE_PREFIX
844 0 : }
845 :
846 1704675 : pub const fn sparse_non_inherited_keyspace() -> Range<Key> {
847 1704675 : // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
848 1704675 : const_assert!(AUX_KEY_PREFIX + 1 == REPL_ORIGIN_KEY_PREFIX);
849 1704675 : Key {
850 1704675 : field1: AUX_KEY_PREFIX,
851 1704675 : field2: 0,
852 1704675 : field3: 0,
853 1704675 : field4: 0,
854 1704675 : field5: 0,
855 1704675 : field6: 0,
856 1704675 : }..Key {
857 1704675 : field1: REPL_ORIGIN_KEY_PREFIX + 1,
858 1704675 : field2: 0,
859 1704675 : field3: 0,
860 1704675 : field4: 0,
861 1704675 : field5: 0,
862 1704675 : field6: 0,
863 1704675 : }
864 1704675 : }
865 :
866 : #[inline(always)]
867 0 : pub fn is_rel_fsm_block_key(self) -> bool {
868 0 : self.field1 == 0x00
869 0 : && self.field4 != 0
870 0 : && self.field5 == FSM_FORKNUM
871 0 : && self.field6 != 0xffffffff
872 : }
873 :
874 : #[inline(always)]
875 0 : pub fn is_rel_vm_block_key(self) -> bool {
876 0 : self.field1 == 0x00
877 0 : && self.field4 != 0
878 0 : && self.field5 == VISIBILITYMAP_FORKNUM
879 0 : && self.field6 != 0xffffffff
880 : }
881 :
882 : #[inline(always)]
883 0 : pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
884 0 : Ok(match self.field1 {
885 : 0x01 => {
886 0 : let kind = match self.field2 {
887 0 : 0x00 => SlruKind::Clog,
888 0 : 0x01 => SlruKind::MultiXactMembers,
889 0 : 0x02 => SlruKind::MultiXactOffsets,
890 0 : _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", self.field2),
891 : };
892 0 : let segno = self.field4;
893 0 : let blknum = self.field6;
894 0 :
895 0 : (kind, segno, blknum)
896 : }
897 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
898 : })
899 : }
900 :
901 : #[inline(always)]
902 3244229 : pub fn is_slru_block_key(self) -> bool {
903 3244229 : self.field1 == 0x01 // SLRU-related
904 1300 : && self.field3 == 0x00000001 // but not SlruDir
905 40 : && self.field6 != 0xffffffff // and not SlruSegSize
906 3244229 : }
907 :
908 : #[inline(always)]
909 7997238 : pub fn is_rel_block_key(&self) -> bool {
910 7997238 : self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
911 7997238 : }
912 :
913 : #[inline(always)]
914 400 : pub fn is_rel_dir_key(&self) -> bool {
915 400 : self.field1 == 0x00
916 400 : && self.field2 != 0
917 0 : && self.field3 != 0
918 0 : && self.field4 == 0
919 0 : && self.field5 == 0
920 0 : && self.field6 == 1
921 : }
922 :
923 : #[inline(always)]
924 2099033 : pub fn is_aux_file_key(&self) -> bool {
925 2099033 : self.field1 == AUX_KEY_PREFIX
926 2099033 : }
927 :
928 : /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
929 : #[inline(always)]
930 291296 : pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
931 291296 : Ok(match self.field1 {
932 291296 : 0x00 => (
933 291296 : RelTag {
934 291296 : spcnode: self.field2,
935 291296 : dbnode: self.field3,
936 291296 : relnode: self.field4,
937 291296 : forknum: self.field5,
938 291296 : },
939 291296 : self.field6,
940 291296 : ),
941 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
942 : })
943 : }
944 : }
945 :
946 : impl std::str::FromStr for Key {
947 : type Err = anyhow::Error;
948 :
949 9 : fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
950 9 : Self::from_hex(s)
951 9 : }
952 : }
953 :
954 : #[cfg(test)]
955 : mod tests {
956 : use std::str::FromStr;
957 :
958 : use rand::{Rng, SeedableRng};
959 :
960 : use super::AUX_KEY_PREFIX;
961 : use crate::key::{Key, is_metadata_key_slice};
962 :
963 : #[test]
964 1 : fn display_fromstr_bijection() {
965 1 : let mut rng = rand::rngs::StdRng::seed_from_u64(42);
966 1 :
967 1 : let key = Key {
968 1 : field1: rng.r#gen(),
969 1 : field2: rng.r#gen(),
970 1 : field3: rng.r#gen(),
971 1 : field4: rng.r#gen(),
972 1 : field5: rng.r#gen(),
973 1 : field6: rng.r#gen(),
974 1 : };
975 1 :
976 1 : assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
977 1 : }
978 :
979 : #[test]
980 1 : fn test_metadata_keys() {
981 1 : let mut metadata_key = vec![AUX_KEY_PREFIX];
982 1 : metadata_key.extend_from_slice(&[0xFF; 15]);
983 1 : let encoded_key = Key::from_metadata_key(&metadata_key);
984 1 : let output_key = encoded_key.to_i128().to_be_bytes();
985 1 : assert_eq!(metadata_key, output_key);
986 1 : assert!(encoded_key.is_metadata_key());
987 1 : assert!(is_metadata_key_slice(&metadata_key));
988 1 : }
989 :
990 : #[test]
991 1 : fn test_possible_largest_key() {
992 1 : Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);
993 1 : // TODO: put this key into the system and see if anything breaks.
994 1 : }
995 : }
|