Line data Source code
1 : use std::fmt;
2 : use std::ops::Range;
3 :
4 : use anyhow::{Result, bail};
5 : use byteorder::{BE, ByteOrder};
6 : use bytes::Bytes;
7 : use postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
8 : use postgres_ffi_types::{Oid, RepOriginId};
9 : use serde::{Deserialize, Serialize};
10 : use utils::const_assert;
11 :
12 : use crate::reltag::{BlockNumber, RelTag, SlruKind};
13 :
14 : /// Key used in the Repository kv-store.
15 : ///
16 : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
17 : /// for what we actually store in these fields.
18 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
19 : pub struct Key {
20 : pub field1: u8,
21 : pub field2: u32,
22 : pub field3: u32,
23 : pub field4: u32,
24 : pub field5: u8,
25 : pub field6: u32,
26 : }
27 :
28 : /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
29 : /// a struct of fields.
30 : #[derive(
31 0 : Clone, Copy, Default, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug,
32 : )]
33 : pub struct CompactKey(i128);
34 :
35 : /// The storage key size.
36 : pub const KEY_SIZE: usize = 18;
37 :
38 : /// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.
39 : /// See [`Key::to_i128`] for more information on the encoding.
40 : pub const METADATA_KEY_SIZE: usize = 16;
41 :
42 : /// The key prefix start range for the metadata keys. All keys with the first byte >= 0x60 is a metadata key.
43 : pub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x60;
44 : pub const METADATA_KEY_END_PREFIX: u8 = 0x7F;
45 :
46 : /// The (reserved) key prefix of relation sizes.
47 : pub const RELATION_SIZE_PREFIX: u8 = 0x61;
48 :
49 : /// The key prefix of AUX file keys.
50 : pub const AUX_KEY_PREFIX: u8 = 0x62;
51 :
52 : /// The key prefix of ReplOrigin keys.
53 : pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
54 :
55 : /// The key prefix of db directory keys.
56 : pub const DB_DIR_KEY_PREFIX: u8 = 0x64;
57 :
58 : /// The key prefix of rel directory keys.
59 : pub const REL_DIR_KEY_PREFIX: u8 = 0x65;
60 :
61 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
62 : pub enum RelDirExists {
63 : Exists,
64 : Removed,
65 : }
66 :
67 : #[derive(Debug)]
68 : pub struct DecodeError;
69 :
70 : impl fmt::Display for DecodeError {
71 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72 0 : write!(f, "invalid marker")
73 0 : }
74 : }
75 :
76 : impl std::error::Error for DecodeError {}
77 :
78 : impl RelDirExists {
79 : /// The value of the rel directory keys that indicates the existence of a relation.
80 : const REL_EXISTS_MARKER: Bytes = Bytes::from_static(b"r");
81 :
82 0 : pub fn encode(&self) -> Bytes {
83 0 : match self {
84 0 : Self::Exists => Self::REL_EXISTS_MARKER.clone(),
85 0 : Self::Removed => SPARSE_TOMBSTONE_MARKER.clone(),
86 : }
87 0 : }
88 :
89 0 : pub fn decode_option(data: Option<impl AsRef<[u8]>>) -> Result<Self, DecodeError> {
90 0 : match data {
91 0 : Some(marker) if marker.as_ref() == Self::REL_EXISTS_MARKER => Ok(Self::Exists),
92 : // Any other marker is invalid
93 0 : Some(_) => Err(DecodeError),
94 0 : None => Ok(Self::Removed),
95 : }
96 0 : }
97 :
98 0 : pub fn decode(data: impl AsRef<[u8]>) -> Result<Self, DecodeError> {
99 0 : let data = data.as_ref();
100 0 : if data == Self::REL_EXISTS_MARKER {
101 0 : Ok(Self::Exists)
102 0 : } else if data == SPARSE_TOMBSTONE_MARKER {
103 0 : Ok(Self::Removed)
104 : } else {
105 0 : Err(DecodeError)
106 : }
107 0 : }
108 : }
109 :
110 : /// A tombstone in the sparse keyspace, which is an empty buffer.
111 : pub const SPARSE_TOMBSTONE_MARKER: Bytes = Bytes::from_static(b"");
112 :
113 : /// Check if the key falls in the range of metadata keys.
114 22 : pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
115 22 : key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
116 22 : }
117 :
118 : impl Key {
119 : /// Check if the key falls in the range of metadata keys.
120 319 : pub const fn is_metadata_key(&self) -> bool {
121 319 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
122 319 : }
123 :
124 : /// Encode a metadata key to a storage key.
125 21 : pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
126 21 : assert!(is_metadata_key_slice(key), "key not in metadata key range");
127 : // Metadata key space ends at 0x7F so it's fine to directly convert it to i128.
128 21 : Self::from_i128(i128::from_be_bytes(*key))
129 21 : }
130 :
131 : /// Encode a metadata key to a storage key.
132 1 : pub fn from_metadata_key(key: &[u8]) -> Self {
133 1 : Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
134 1 : }
135 :
136 : /// Get the range of metadata keys.
137 450 : pub const fn metadata_key_range() -> Range<Self> {
138 450 : Key {
139 450 : field1: METADATA_KEY_BEGIN_PREFIX,
140 450 : field2: 0,
141 450 : field3: 0,
142 450 : field4: 0,
143 450 : field5: 0,
144 450 : field6: 0,
145 450 : }..Key {
146 450 : field1: METADATA_KEY_END_PREFIX,
147 450 : field2: 0,
148 450 : field3: 0,
149 450 : field4: 0,
150 450 : field5: 0,
151 450 : field6: 0,
152 450 : }
153 450 : }
154 :
155 : /// Get the range of aux keys.
156 175 : pub fn metadata_aux_key_range() -> Range<Self> {
157 175 : Key {
158 175 : field1: AUX_KEY_PREFIX,
159 175 : field2: 0,
160 175 : field3: 0,
161 175 : field4: 0,
162 175 : field5: 0,
163 175 : field6: 0,
164 175 : }..Key {
165 175 : field1: AUX_KEY_PREFIX + 1,
166 175 : field2: 0,
167 175 : field3: 0,
168 175 : field4: 0,
169 175 : field5: 0,
170 175 : field6: 0,
171 175 : }
172 175 : }
173 :
174 169 : pub fn rel_dir_sparse_key_range() -> Range<Self> {
175 169 : Key {
176 169 : field1: REL_DIR_KEY_PREFIX,
177 169 : field2: 0,
178 169 : field3: 0,
179 169 : field4: 0,
180 169 : field5: 0,
181 169 : field6: 0,
182 169 : }..Key {
183 169 : field1: REL_DIR_KEY_PREFIX + 1,
184 169 : field2: 0,
185 169 : field3: 0,
186 169 : field4: 0,
187 169 : field5: 0,
188 169 : field6: 0,
189 169 : }
190 169 : }
191 :
192 : /// This function checks more extensively what keys we can take on the write path.
193 : /// If a key beginning with 00 does not have a global/default tablespace OID, it
194 : /// will be rejected on the write path.
195 : #[allow(dead_code)]
196 0 : pub fn is_valid_key_on_write_path_strong(&self) -> bool {
197 : use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
198 0 : if !self.is_i128_representable() {
199 0 : return false;
200 0 : }
201 0 : if self.field1 == 0
202 0 : && !(self.field2 == GLOBALTABLESPACE_OID
203 0 : || self.field2 == DEFAULTTABLESPACE_OID
204 0 : || self.field2 == 0)
205 : {
206 0 : return false; // User defined tablespaces are not supported
207 0 : }
208 0 : true
209 0 : }
210 :
211 : /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply
212 : /// checks if the key is i128 representable. Note that some keys can be successfully
213 : /// ingested into the pageserver, but will cause errors on generating basebackup.
214 2408328 : pub fn is_valid_key_on_write_path(&self) -> bool {
215 2408328 : self.is_i128_representable()
216 2408328 : }
217 :
218 8934583 : pub fn is_i128_representable(&self) -> bool {
219 8934583 : self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222
220 8934583 : }
221 :
222 : /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
223 : /// As long as Neon does not support tablespace (because of lack of access to local file system),
224 : /// we can assume that only some predefined namespace OIDs are used which can fit in u16
225 6526255 : pub fn to_i128(&self) -> i128 {
226 6526255 : assert!(self.is_i128_representable(), "invalid key: {self}");
227 6526255 : (((self.field1 & 0x7F) as i128) << 120)
228 6526255 : | (((self.field2 & 0xFFFF) as i128) << 104)
229 6526255 : | ((self.field3 as i128) << 72)
230 6526255 : | ((self.field4 as i128) << 40)
231 6526255 : | ((self.field5 as i128) << 32)
232 6526255 : | self.field6 as i128
233 6526255 : }
234 :
235 5908331 : pub const fn from_i128(x: i128) -> Self {
236 5908331 : Key {
237 5908331 : field1: ((x >> 120) & 0x7F) as u8,
238 5908331 : field2: ((x >> 104) & 0xFFFF) as u32,
239 5908331 : field3: (x >> 72) as u32,
240 5908331 : field4: (x >> 40) as u32,
241 5908331 : field5: (x >> 32) as u8,
242 5908331 : field6: x as u32,
243 5908331 : }
244 5908331 : }
245 :
246 3473732 : pub fn to_compact(&self) -> CompactKey {
247 3473732 : CompactKey(self.to_i128())
248 3473732 : }
249 :
250 5075056 : pub fn from_compact(k: CompactKey) -> Self {
251 5075056 : Self::from_i128(k.0)
252 5075056 : }
253 :
254 6098727 : pub const fn next(&self) -> Key {
255 6098727 : self.add(1)
256 6098727 : }
257 :
258 6165073 : pub const fn add(&self, x: u32) -> Key {
259 6165073 : let mut key = *self;
260 :
261 6165073 : let r = key.field6.overflowing_add(x);
262 6165073 : key.field6 = r.0;
263 6165073 : if r.1 {
264 270583 : let r = key.field5.overflowing_add(1);
265 270583 : key.field5 = r.0;
266 270583 : if r.1 {
267 0 : let r = key.field4.overflowing_add(1);
268 0 : key.field4 = r.0;
269 0 : if r.1 {
270 0 : let r = key.field3.overflowing_add(1);
271 0 : key.field3 = r.0;
272 0 : if r.1 {
273 0 : let r = key.field2.overflowing_add(1);
274 0 : key.field2 = r.0;
275 0 : if r.1 {
276 0 : let r = key.field1.overflowing_add(1);
277 0 : key.field1 = r.0;
278 0 : assert!(!r.1);
279 0 : }
280 0 : }
281 0 : }
282 270583 : }
283 5894490 : }
284 6165073 : key
285 6165073 : }
286 :
287 : /// Convert a 18B slice to a key. This function should not be used for 16B metadata keys because `field2` is handled differently.
288 : /// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys). There are some restrictions on `field2`,
289 : /// and therefore not all 18B slices are valid page server keys.
290 3994251 : pub fn from_slice(b: &[u8]) -> Self {
291 3994251 : Key {
292 3994251 : field1: b[0],
293 3994251 : field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
294 3994251 : field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
295 3994251 : field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
296 3994251 : field5: b[13],
297 3994251 : field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
298 3994251 : }
299 3994251 : }
300 :
301 : /// Convert a key to a 18B slice. This function should not be used for getting a 16B metadata key because `field2` is handled differently.
302 : /// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).
303 3422650 : pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
304 3422650 : buf[0] = self.field1;
305 3422650 : BE::write_u32(&mut buf[1..5], self.field2);
306 3422650 : BE::write_u32(&mut buf[5..9], self.field3);
307 3422650 : BE::write_u32(&mut buf[9..13], self.field4);
308 3422650 : buf[13] = self.field5;
309 3422650 : BE::write_u32(&mut buf[14..18], self.field6);
310 3422650 : }
311 : }
312 :
313 : impl CompactKey {
314 10 : pub fn raw(&self) -> i128 {
315 10 : self.0
316 10 : }
317 : }
318 :
319 : impl From<i128> for CompactKey {
320 5 : fn from(value: i128) -> Self {
321 5 : Self(value)
322 5 : }
323 : }
324 :
325 : impl fmt::Display for Key {
326 98062 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
327 98062 : write!(
328 98062 : f,
329 98062 : "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
330 : self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
331 : )
332 98062 : }
333 : }
334 :
335 : impl fmt::Display for CompactKey {
336 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
337 0 : let k = Key::from_compact(*self);
338 0 : k.fmt(f)
339 0 : }
340 : }
341 :
342 : impl Key {
343 : pub const MIN: Key = Key {
344 : field1: u8::MIN,
345 : field2: u32::MIN,
346 : field3: u32::MIN,
347 : field4: u32::MIN,
348 : field5: u8::MIN,
349 : field6: u32::MIN,
350 : };
351 : pub const MAX: Key = Key {
352 : field1: u8::MAX,
353 : field2: u32::MAX,
354 : field3: u32::MAX,
355 : field4: u32::MAX,
356 : field5: u8::MAX,
357 : field6: u32::MAX,
358 : };
359 :
360 20944 : pub fn from_hex(s: &str) -> Result<Self> {
361 20944 : if s.len() != 36 {
362 4 : bail!("parse error");
363 20940 : }
364 : Ok(Key {
365 20940 : field1: u8::from_str_radix(&s[0..2], 16)?,
366 20940 : field2: u32::from_str_radix(&s[2..10], 16)?,
367 20940 : field3: u32::from_str_radix(&s[10..18], 16)?,
368 20940 : field4: u32::from_str_radix(&s[18..26], 16)?,
369 20940 : field5: u8::from_str_radix(&s[26..28], 16)?,
370 20940 : field6: u32::from_str_radix(&s[28..36], 16)?,
371 : })
372 20944 : }
373 : }
374 :
375 : // Layout of the Key address space
376 : //
377 : // The Key struct, used to address the underlying key-value store, consists of
378 : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
379 : // all the data and metadata keys into those 18 bytes.
380 : //
381 : // Principles for the mapping:
382 : //
383 : // - Things that are often accessed or modified together, should be close to
384 : // each other in the key space. For example, if a relation is extended by one
385 : // block, we create a new key-value pair for the block data, and update the
386 : // relation size entry. Because of that, the RelSize key comes after all the
387 : // RelBlocks of a relation: the RelSize and the last RelBlock are always next
388 : // to each other.
389 : //
390 : // The key space is divided into four major sections, identified by the first
391 : // byte, and the form a hierarchy:
392 : //
393 : // 00 Relation data and metadata
394 : //
395 : // DbDir () -> (dbnode, spcnode)
396 : // Filenodemap
397 : // RelDir -> relnode forknum
398 : // RelBlocks
399 : // RelSize
400 : //
401 : // 01 SLRUs
402 : //
403 : // SlruDir kind
404 : // SlruSegBlocks segno
405 : // SlruSegSize
406 : //
407 : // 02 pg_twophase
408 : //
409 : // 03 misc
410 : // Controlfile
411 : // checkpoint
412 : // pg_version
413 : //
414 : // 04 aux files
415 : //
416 : // Below is a full list of the keyspace allocation:
417 : //
418 : // DbDir:
419 : // 00 00000000 00000000 00000000 00 00000000
420 : //
421 : // Filenodemap:
422 : // 00 SPCNODE DBNODE 00000000 00 00000000
423 : //
424 : // RelDir:
425 : // 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
426 : //
427 : // RelBlock:
428 : // 00 SPCNODE DBNODE RELNODE FORK BLKNUM
429 : //
430 : // RelSize:
431 : // 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
432 : //
433 : // SlruDir:
434 : // 01 kind 00000000 00000000 00 00000000
435 : //
436 : // SlruSegBlock:
437 : // 01 kind 00000001 SEGNO 00 BLKNUM
438 : //
439 : // SlruSegSize:
440 : // 01 kind 00000001 SEGNO 00 FFFFFFFF
441 : //
442 : // TwoPhaseDir:
443 : // 02 00000000 00000000 00000000 00 00000000
444 : //
445 : // TwoPhaseFile:
446 : //
447 : // 02 00000000 00000000 00XXXXXX XX XXXXXXXX
448 : //
449 : // \______XID_________/
450 : //
451 : // The 64-bit XID is stored a little awkwardly in field6, field5 and
452 : // field4. PostgreSQL v16 and below only stored a 32-bit XID, which
453 : // fit completely in field6, but starting with PostgreSQL v17, a full
454 : // 64-bit XID is used. Most pageserver code that accesses
455 : // TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
456 : // are just unused.
457 : //
458 : // ControlFile:
459 : // 03 00000000 00000000 00000000 00 00000000
460 : //
461 : // Checkpoint:
462 : // 03 00000000 00000000 00000000 00 00000001
463 : //
464 : // AuxFiles:
465 : // 03 00000000 00000000 00000000 00 00000002
466 : //
467 :
468 : //-- Section 01: relation data and metadata
469 :
470 : pub const DBDIR_KEY: Key = Key {
471 : field1: 0x00,
472 : field2: 0,
473 : field3: 0,
474 : field4: 0,
475 : field5: 0,
476 : field6: 0,
477 : };
478 :
479 : #[inline(always)]
480 0 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
481 0 : Key {
482 0 : field1: 0x00,
483 0 : field2: spcnode,
484 0 : field3: dbnode,
485 0 : field4: 0,
486 0 : field5: 0,
487 0 : field6: 0,
488 0 : }..Key {
489 0 : field1: 0x00,
490 0 : field2: spcnode,
491 0 : field3: dbnode,
492 0 : field4: 0xffffffff,
493 0 : field5: 0xff,
494 0 : field6: 0xffffffff,
495 0 : }
496 0 : }
497 :
498 : #[inline(always)]
499 8 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
500 8 : Key {
501 8 : field1: 0x00,
502 8 : field2: spcnode,
503 8 : field3: dbnode,
504 8 : field4: 0,
505 8 : field5: 0,
506 8 : field6: 0,
507 8 : }
508 0 : }
509 :
510 : #[inline(always)]
511 974 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
512 974 : Key {
513 974 : field1: 0x00,
514 974 : field2: spcnode,
515 974 : field3: dbnode,
516 974 : field4: 0,
517 974 : field5: 0,
518 974 : field6: 1,
519 974 : }
520 0 : }
521 :
522 : #[inline(always)]
523 0 : pub fn rel_tag_sparse_key(spcnode: Oid, dbnode: Oid, relnode: Oid, forknum: u8) -> Key {
524 0 : Key {
525 0 : field1: REL_DIR_KEY_PREFIX,
526 0 : field2: spcnode,
527 0 : field3: dbnode,
528 0 : field4: relnode,
529 0 : field5: forknum,
530 0 : field6: 1,
531 0 : }
532 0 : }
533 :
534 0 : pub fn rel_tag_sparse_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
535 0 : Key {
536 0 : field1: REL_DIR_KEY_PREFIX,
537 0 : field2: spcnode,
538 0 : field3: dbnode,
539 0 : field4: 0,
540 0 : field5: 0,
541 0 : field6: 0,
542 0 : }..Key {
543 0 : field1: REL_DIR_KEY_PREFIX,
544 0 : field2: spcnode,
545 0 : field3: dbnode,
546 0 : field4: u32::MAX,
547 0 : field5: u8::MAX,
548 0 : field6: u32::MAX,
549 0 : } // it's fine to exclude the last key b/c we only use field6 == 1
550 0 : }
551 :
552 : #[inline(always)]
553 645958 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
554 645958 : Key {
555 645958 : field1: 0x00,
556 645958 : field2: rel.spcnode,
557 645958 : field3: rel.dbnode,
558 645958 : field4: rel.relnode,
559 645958 : field5: rel.forknum,
560 645958 : field6: blknum,
561 645958 : }
562 0 : }
563 :
564 : #[inline(always)]
565 142314 : pub fn rel_size_to_key(rel: RelTag) -> Key {
566 142314 : Key {
567 142314 : field1: 0x00,
568 142314 : field2: rel.spcnode,
569 142314 : field3: rel.dbnode,
570 142314 : field4: rel.relnode,
571 142314 : field5: rel.forknum,
572 142314 : field6: 0xffff_ffff,
573 142314 : }
574 0 : }
575 :
576 : impl Key {
577 : #[inline(always)]
578 82 : pub fn is_rel_size_key(&self) -> bool {
579 82 : self.field1 == 0 && self.field6 == u32::MAX
580 82 : }
581 : }
582 :
583 : #[inline(always)]
584 1 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
585 1 : Key {
586 1 : field1: 0x00,
587 1 : field2: rel.spcnode,
588 1 : field3: rel.dbnode,
589 1 : field4: rel.relnode,
590 1 : field5: rel.forknum,
591 1 : field6: 0,
592 1 : }..Key {
593 1 : field1: 0x00,
594 1 : field2: rel.spcnode,
595 1 : field3: rel.dbnode,
596 1 : field4: rel.relnode,
597 1 : field5: rel.forknum + 1,
598 1 : field6: 0,
599 1 : }
600 0 : }
601 :
602 : //-- Section 02: SLRUs
603 :
604 : #[inline(always)]
605 828 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
606 : Key {
607 : field1: 0x01,
608 828 : field2: match kind {
609 276 : SlruKind::Clog => 0x00,
610 276 : SlruKind::MultiXactMembers => 0x01,
611 276 : SlruKind::MultiXactOffsets => 0x02,
612 : },
613 : field3: 0,
614 : field4: 0,
615 : field5: 0,
616 : field6: 0,
617 : }
618 0 : }
619 :
620 : #[inline(always)]
621 36192 : pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
622 36192 : if key.field1 == 0x01
623 24 : && key.field3 == 0
624 24 : && key.field4 == 0
625 24 : && key.field5 == 0
626 24 : && key.field6 == 0
627 : {
628 24 : match key.field2 {
629 8 : 0 => Some(Ok(SlruKind::Clog)),
630 8 : 1 => Some(Ok(SlruKind::MultiXactMembers)),
631 8 : 2 => Some(Ok(SlruKind::MultiXactOffsets)),
632 0 : x => Some(Err(x)),
633 : }
634 : } else {
635 36168 : None
636 : }
637 36192 : }
638 :
639 : #[inline(always)]
640 7 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
641 : Key {
642 : field1: 0x01,
643 7 : field2: match kind {
644 5 : SlruKind::Clog => 0x00,
645 1 : SlruKind::MultiXactMembers => 0x01,
646 1 : SlruKind::MultiXactOffsets => 0x02,
647 : },
648 : field3: 1,
649 7 : field4: segno,
650 : field5: 0,
651 7 : field6: blknum,
652 : }
653 0 : }
654 :
655 : #[inline(always)]
656 3 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
657 : Key {
658 : field1: 0x01,
659 3 : field2: match kind {
660 1 : SlruKind::Clog => 0x00,
661 1 : SlruKind::MultiXactMembers => 0x01,
662 1 : SlruKind::MultiXactOffsets => 0x02,
663 : },
664 : field3: 1,
665 3 : field4: segno,
666 : field5: 0,
667 : field6: 0xffff_ffff,
668 : }
669 0 : }
670 :
671 : impl Key {
672 36192 : pub fn is_slru_segment_size_key(&self) -> bool {
673 36192 : self.field1 == 0x01
674 24 : && self.field2 < 0x03
675 24 : && self.field3 == 0x01
676 0 : && self.field5 == 0
677 0 : && self.field6 == u32::MAX
678 36192 : }
679 :
680 36192 : pub fn is_slru_dir_key(&self) -> bool {
681 36192 : slru_dir_kind(self).is_some()
682 36192 : }
683 : }
684 :
685 : #[inline(always)]
686 0 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
687 0 : let field2 = match kind {
688 0 : SlruKind::Clog => 0x00,
689 0 : SlruKind::MultiXactMembers => 0x01,
690 0 : SlruKind::MultiXactOffsets => 0x02,
691 : };
692 :
693 0 : Key {
694 0 : field1: 0x01,
695 0 : field2,
696 0 : field3: 1,
697 0 : field4: segno,
698 0 : field5: 0,
699 0 : field6: 0,
700 0 : }..Key {
701 0 : field1: 0x01,
702 0 : field2,
703 0 : field3: 1,
704 0 : field4: segno,
705 0 : field5: 1,
706 0 : field6: 0,
707 0 : }
708 0 : }
709 :
710 : //-- Section 03: pg_twophase
711 :
712 : pub const TWOPHASEDIR_KEY: Key = Key {
713 : field1: 0x02,
714 : field2: 0,
715 : field3: 0,
716 : field4: 0,
717 : field5: 0,
718 : field6: 0,
719 : };
720 :
721 : #[inline(always)]
722 0 : pub fn twophase_file_key(xid: u64) -> Key {
723 0 : Key {
724 0 : field1: 0x02,
725 0 : field2: 0,
726 0 : field3: 0,
727 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
728 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
729 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
730 0 : }
731 0 : }
732 :
733 : #[inline(always)]
734 0 : pub fn twophase_key_range(xid: u64) -> Range<Key> {
735 : // 64-bit XIDs really should not overflow
736 0 : let (next_xid, overflowed) = xid.overflowing_add(1);
737 :
738 0 : Key {
739 0 : field1: 0x02,
740 0 : field2: 0,
741 0 : field3: 0,
742 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
743 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
744 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
745 0 : }..Key {
746 0 : field1: 0x02,
747 0 : field2: 0,
748 0 : field3: u32::from(overflowed),
749 0 : field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
750 0 : field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
751 0 : field6: (next_xid & 0x00000000FFFFFFFF) as u32,
752 0 : }
753 0 : }
754 :
755 : //-- Section 03: Control file
756 : pub const CONTROLFILE_KEY: Key = Key {
757 : field1: 0x03,
758 : field2: 0,
759 : field3: 0,
760 : field4: 0,
761 : field5: 0,
762 : field6: 0,
763 : };
764 :
765 : pub const CHECKPOINT_KEY: Key = Key {
766 : field1: 0x03,
767 : field2: 0,
768 : field3: 0,
769 : field4: 0,
770 : field5: 0,
771 : field6: 1,
772 : };
773 :
774 : pub const AUX_FILES_KEY: Key = Key {
775 : field1: 0x03,
776 : field2: 0,
777 : field3: 0,
778 : field4: 0,
779 : field5: 0,
780 : field6: 2,
781 : };
782 :
783 : #[inline(always)]
784 3 : pub fn repl_origin_key(origin_id: RepOriginId) -> Key {
785 3 : Key {
786 3 : field1: REPL_ORIGIN_KEY_PREFIX,
787 3 : field2: 0,
788 3 : field3: 0,
789 3 : field4: 0,
790 3 : field5: 0,
791 3 : field6: origin_id as u32,
792 3 : }
793 0 : }
794 :
795 : /// Get the range of replorigin keys.
796 171 : pub fn repl_origin_key_range() -> Range<Key> {
797 171 : Key {
798 171 : field1: REPL_ORIGIN_KEY_PREFIX,
799 171 : field2: 0,
800 171 : field3: 0,
801 171 : field4: 0,
802 171 : field5: 0,
803 171 : field6: 0,
804 171 : }..Key {
805 171 : field1: REPL_ORIGIN_KEY_PREFIX,
806 171 : field2: 0,
807 171 : field3: 0,
808 171 : field4: 0,
809 171 : field5: 0,
810 171 : field6: 0x10000,
811 171 : }
812 171 : }
813 :
814 : // Reverse mappings for a few Keys.
815 : // These are needed by WAL redo manager.
816 :
817 : /// Non inherited range for vectored get.
818 : pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
819 : /// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
820 : pub const SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
821 :
822 : impl Key {
823 : // AUX_FILES currently stores only data for logical replication (slots etc), and
824 : // we don't preserve these on a branch because safekeepers can't follow timeline
825 : // switch (and generally it likely should be optional), so ignore these.
826 : #[inline(always)]
827 0 : pub fn is_inherited_key(self) -> bool {
828 0 : if self.is_sparse() {
829 0 : self.is_inherited_sparse_key()
830 : } else {
831 0 : !NON_INHERITED_RANGE.contains(&self)
832 : }
833 0 : }
834 :
835 : #[inline(always)]
836 1797835 : pub fn is_sparse(self) -> bool {
837 1797835 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
838 0 : }
839 :
840 : /// Check if the key belongs to the inherited keyspace.
841 0 : fn is_inherited_sparse_key(self) -> bool {
842 0 : debug_assert!(self.is_sparse());
843 0 : self.field1 == RELATION_SIZE_PREFIX
844 0 : }
845 :
846 425394 : pub const fn sparse_non_inherited_keyspace() -> Range<Key> {
847 : // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
848 : const_assert!(AUX_KEY_PREFIX + 1 == REPL_ORIGIN_KEY_PREFIX);
849 425394 : Key {
850 425394 : field1: AUX_KEY_PREFIX,
851 425394 : field2: 0,
852 425394 : field3: 0,
853 425394 : field4: 0,
854 425394 : field5: 0,
855 425394 : field6: 0,
856 425394 : }..Key {
857 425394 : field1: REPL_ORIGIN_KEY_PREFIX + 1,
858 425394 : field2: 0,
859 425394 : field3: 0,
860 425394 : field4: 0,
861 425394 : field5: 0,
862 425394 : field6: 0,
863 425394 : }
864 425394 : }
865 :
866 : #[inline(always)]
867 0 : pub fn is_rel_fsm_block_key(self) -> bool {
868 0 : self.field1 == 0x00
869 0 : && self.field4 != 0
870 0 : && self.field5 == FSM_FORKNUM
871 0 : && self.field6 != 0xffffffff
872 0 : }
873 :
874 : #[inline(always)]
875 0 : pub fn is_rel_vm_block_key(self) -> bool {
876 0 : self.field1 == 0x00
877 0 : && self.field4 != 0
878 0 : && self.field5 == VISIBILITYMAP_FORKNUM
879 0 : && self.field6 != 0xffffffff
880 0 : }
881 :
882 : #[inline(always)]
883 0 : pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
884 0 : Ok(match self.field1 {
885 : 0x01 => {
886 0 : let kind = match self.field2 {
887 0 : 0x00 => SlruKind::Clog,
888 0 : 0x01 => SlruKind::MultiXactMembers,
889 0 : 0x02 => SlruKind::MultiXactOffsets,
890 0 : _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", self.field2),
891 : };
892 0 : let segno = self.field4;
893 0 : let blknum = self.field6;
894 :
895 0 : (kind, segno, blknum)
896 : }
897 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
898 : })
899 0 : }
900 :
901 : #[inline(always)]
902 322636 : pub fn is_slru_block_key(self) -> bool {
903 322636 : self.field1 == 0x01 // SLRU-related
904 367 : && self.field3 == 0x00000001 // but not SlruDir
905 10 : && self.field6 != 0xffffffff // and not SlruSegSize
906 36192 : }
907 :
908 : #[inline(always)]
909 533787 : pub fn is_rel_block_key(&self) -> bool {
910 533787 : self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
911 108416 : }
912 :
913 : #[inline(always)]
914 0 : pub fn is_rel_block_of_rel(&self, rel: Oid) -> bool {
915 0 : self.is_rel_block_key() && self.field4 == rel
916 0 : }
917 :
918 : #[inline(always)]
919 100 : pub fn is_rel_dir_key(&self) -> bool {
920 100 : self.field1 == 0x00
921 100 : && self.field2 != 0
922 0 : && self.field3 != 0
923 0 : && self.field4 == 0
924 0 : && self.field5 == 0
925 0 : && self.field6 == 1
926 0 : }
927 :
928 : #[inline(always)]
929 36292 : pub fn is_aux_file_key(&self) -> bool {
930 36292 : self.field1 == AUX_KEY_PREFIX
931 36192 : }
932 :
933 : /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
934 : #[inline(always)]
935 72824 : pub fn to_rel_block(self) -> Result<(RelTag, BlockNumber), ToRelBlockError> {
936 72824 : Ok(match self.field1 {
937 72824 : 0x00 => (
938 72824 : RelTag {
939 72824 : spcnode: self.field2,
940 72824 : dbnode: self.field3,
941 72824 : relnode: self.field4,
942 72824 : forknum: self.field5,
943 72824 : },
944 72824 : self.field6,
945 72824 : ),
946 0 : _ => return Err(ToRelBlockError(self.field1)),
947 : })
948 0 : }
949 : }
950 :
951 : impl std::str::FromStr for Key {
952 : type Err = anyhow::Error;
953 :
954 9 : fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
955 9 : Self::from_hex(s)
956 9 : }
957 : }
958 :
959 : #[derive(Debug)]
960 : pub struct ToRelBlockError(u8);
961 :
962 : impl fmt::Display for ToRelBlockError {
963 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
964 0 : write!(f, "unexpected value kind 0x{:02x}", self.0)
965 0 : }
966 : }
967 :
968 : impl std::error::Error for ToRelBlockError {}
969 :
970 : #[cfg(test)]
971 : mod tests {
972 : use std::str::FromStr;
973 :
974 : use rand::{Rng, SeedableRng};
975 :
976 : use super::AUX_KEY_PREFIX;
977 : use crate::key::{Key, is_metadata_key_slice};
978 :
979 : #[test]
980 1 : fn display_fromstr_bijection() {
981 1 : let mut rng = rand::rngs::StdRng::seed_from_u64(42);
982 :
983 1 : let key = Key {
984 1 : field1: rng.r#gen(),
985 1 : field2: rng.r#gen(),
986 1 : field3: rng.r#gen(),
987 1 : field4: rng.r#gen(),
988 1 : field5: rng.r#gen(),
989 1 : field6: rng.r#gen(),
990 1 : };
991 :
992 1 : assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
993 1 : }
994 :
995 : #[test]
996 1 : fn test_metadata_keys() {
997 1 : let mut metadata_key = vec![AUX_KEY_PREFIX];
998 1 : metadata_key.extend_from_slice(&[0xFF; 15]);
999 1 : let encoded_key = Key::from_metadata_key(&metadata_key);
1000 1 : let output_key = encoded_key.to_i128().to_be_bytes();
1001 1 : assert_eq!(metadata_key, output_key);
1002 1 : assert!(encoded_key.is_metadata_key());
1003 1 : assert!(is_metadata_key_slice(&metadata_key));
1004 1 : }
1005 :
1006 : #[test]
1007 1 : fn test_possible_largest_key() {
1008 1 : Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);
1009 : // TODO: put this key into the system and see if anything breaks.
1010 1 : }
1011 : }
|