Line data Source code
1 : use anyhow::{bail, Result};
2 : use byteorder::{ByteOrder, BE};
3 : use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
4 : use postgres_ffi::Oid;
5 : use postgres_ffi::RepOriginId;
6 : use serde::{Deserialize, Serialize};
7 : use std::{fmt, ops::Range};
8 :
9 : use crate::reltag::{BlockNumber, RelTag, SlruKind};
10 :
11 : /// Key used in the Repository kv-store.
12 : ///
13 : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
14 : /// for what we actually store in these fields.
15 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
16 : pub struct Key {
17 : pub field1: u8,
18 : pub field2: u32,
19 : pub field3: u32,
20 : pub field4: u32,
21 : pub field5: u8,
22 : pub field6: u32,
23 : }
24 :
25 : /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
26 : /// a struct of fields.
27 : #[derive(
28 0 : Clone, Copy, Default, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug,
29 : )]
30 : pub struct CompactKey(i128);
31 :
32 : /// The storage key size.
33 : pub const KEY_SIZE: usize = 18;
34 :
35 : /// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.
36 : /// See [`Key::to_i128`] for more information on the encoding.
37 : pub const METADATA_KEY_SIZE: usize = 16;
38 :
39 : /// The key prefix start range for the metadata keys. All keys with the first byte >= 0x60 is a metadata key.
40 : pub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x60;
41 : pub const METADATA_KEY_END_PREFIX: u8 = 0x7F;
42 :
43 : /// The (reserved) key prefix of relation sizes.
44 : pub const RELATION_SIZE_PREFIX: u8 = 0x61;
45 :
46 : /// The key prefix of AUX file keys.
47 : pub const AUX_KEY_PREFIX: u8 = 0x62;
48 :
49 : /// The key prefix of ReplOrigin keys.
50 : pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
51 :
52 : /// Check if the key falls in the range of metadata keys.
53 64 : pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
54 64 : key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
55 64 : }
56 :
57 : impl Key {
58 : /// Check if the key falls in the range of metadata keys.
59 133 : pub const fn is_metadata_key(&self) -> bool {
60 133 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
61 133 : }
62 :
63 : /// Encode a metadata key to a storage key.
64 63 : pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
65 63 : assert!(is_metadata_key_slice(key), "key not in metadata key range");
66 : // Metadata key space ends at 0x7F so it's fine to directly convert it to i128.
67 63 : Self::from_i128(i128::from_be_bytes(*key))
68 63 : }
69 :
70 : /// Encode a metadata key to a storage key.
71 1 : pub fn from_metadata_key(key: &[u8]) -> Self {
72 1 : Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
73 1 : }
74 :
75 : /// Get the range of metadata keys.
76 2536 : pub const fn metadata_key_range() -> Range<Self> {
77 2536 : Key {
78 2536 : field1: METADATA_KEY_BEGIN_PREFIX,
79 2536 : field2: 0,
80 2536 : field3: 0,
81 2536 : field4: 0,
82 2536 : field5: 0,
83 2536 : field6: 0,
84 2536 : }..Key {
85 2536 : field1: METADATA_KEY_END_PREFIX,
86 2536 : field2: 0,
87 2536 : field3: 0,
88 2536 : field4: 0,
89 2536 : field5: 0,
90 2536 : field6: 0,
91 2536 : }
92 2536 : }
93 :
94 : /// Get the range of aux keys.
95 664 : pub fn metadata_aux_key_range() -> Range<Self> {
96 664 : Key {
97 664 : field1: AUX_KEY_PREFIX,
98 664 : field2: 0,
99 664 : field3: 0,
100 664 : field4: 0,
101 664 : field5: 0,
102 664 : field6: 0,
103 664 : }..Key {
104 664 : field1: AUX_KEY_PREFIX + 1,
105 664 : field2: 0,
106 664 : field3: 0,
107 664 : field4: 0,
108 664 : field5: 0,
109 664 : field6: 0,
110 664 : }
111 664 : }
112 :
113 : /// This function checks more extensively what keys we can take on the write path.
114 : /// If a key beginning with 00 does not have a global/default tablespace OID, it
115 : /// will be rejected on the write path.
116 : #[allow(dead_code)]
117 0 : pub fn is_valid_key_on_write_path_strong(&self) -> bool {
118 : use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
119 0 : if !self.is_i128_representable() {
120 0 : return false;
121 0 : }
122 0 : if self.field1 == 0
123 0 : && !(self.field2 == GLOBALTABLESPACE_OID
124 0 : || self.field2 == DEFAULTTABLESPACE_OID
125 0 : || self.field2 == 0)
126 : {
127 0 : return false; // User defined tablespaces are not supported
128 0 : }
129 0 : true
130 0 : }
131 :
132 : /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply
133 : /// checks if the key is i128 representable. Note that some keys can be successfully
134 : /// ingested into the pageserver, but will cause errors on generating basebackup.
135 9633289 : pub fn is_valid_key_on_write_path(&self) -> bool {
136 9633289 : self.is_i128_representable()
137 9633289 : }
138 :
139 32949035 : pub fn is_i128_representable(&self) -> bool {
140 32949035 : self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222
141 32949035 : }
142 :
143 : /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
144 : /// As long as Neon does not support tablespace (because of lack of access to local file system),
145 : /// we can assume that only some predefined namespace OIDs are used which can fit in u16
146 23315746 : pub fn to_i128(&self) -> i128 {
147 23315746 : assert!(self.is_i128_representable(), "invalid key: {self}");
148 23315746 : (((self.field1 & 0x7F) as i128) << 120)
149 23315746 : | (((self.field2 & 0xFFFF) as i128) << 104)
150 23315746 : | ((self.field3 as i128) << 72)
151 23315746 : | ((self.field4 as i128) << 40)
152 23315746 : | ((self.field5 as i128) << 32)
153 23315746 : | self.field6 as i128
154 23315746 : }
155 :
156 21248230 : pub const fn from_i128(x: i128) -> Self {
157 21248230 : Key {
158 21248230 : field1: ((x >> 120) & 0x7F) as u8,
159 21248230 : field2: ((x >> 104) & 0xFFFF) as u32,
160 21248230 : field3: (x >> 72) as u32,
161 21248230 : field4: (x >> 40) as u32,
162 21248230 : field5: (x >> 32) as u8,
163 21248230 : field6: x as u32,
164 21248230 : }
165 21248230 : }
166 :
167 13753630 : pub fn to_compact(&self) -> CompactKey {
168 13753630 : CompactKey(self.to_i128())
169 13753630 : }
170 :
171 20242386 : pub fn from_compact(k: CompactKey) -> Self {
172 20242386 : Self::from_i128(k.0)
173 20242386 : }
174 :
175 16363002 : pub const fn next(&self) -> Key {
176 16363002 : self.add(1)
177 16363002 : }
178 :
179 16375463 : pub const fn add(&self, x: u32) -> Key {
180 16375463 : let mut key = *self;
181 16375463 :
182 16375463 : let r = key.field6.overflowing_add(x);
183 16375463 : key.field6 = r.0;
184 16375463 : if r.1 {
185 1102809 : let r = key.field5.overflowing_add(1);
186 1102809 : key.field5 = r.0;
187 1102809 : if r.1 {
188 0 : let r = key.field4.overflowing_add(1);
189 0 : key.field4 = r.0;
190 0 : if r.1 {
191 0 : let r = key.field3.overflowing_add(1);
192 0 : key.field3 = r.0;
193 0 : if r.1 {
194 0 : let r = key.field2.overflowing_add(1);
195 0 : key.field2 = r.0;
196 0 : if r.1 {
197 0 : let r = key.field1.overflowing_add(1);
198 0 : key.field1 = r.0;
199 0 : assert!(!r.1);
200 0 : }
201 0 : }
202 0 : }
203 1102809 : }
204 15272654 : }
205 16375463 : key
206 16375463 : }
207 :
208 : /// Convert a 18B slice to a key. This function should not be used for 16B metadata keys because `field2` is handled differently.
209 : /// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys). There are some restrictions on `field2`,
210 : /// and therefore not all 18B slices are valid page server keys.
211 11889672 : pub fn from_slice(b: &[u8]) -> Self {
212 11889672 : Key {
213 11889672 : field1: b[0],
214 11889672 : field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
215 11889672 : field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
216 11889672 : field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
217 11889672 : field5: b[13],
218 11889672 : field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
219 11889672 : }
220 11889672 : }
221 :
222 : /// Convert a key to a 18B slice. This function should not be used for getting a 16B metadata key because `field2` is handled differently.
223 : /// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).
224 14516428 : pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
225 14516428 : buf[0] = self.field1;
226 14516428 : BE::write_u32(&mut buf[1..5], self.field2);
227 14516428 : BE::write_u32(&mut buf[5..9], self.field3);
228 14516428 : BE::write_u32(&mut buf[9..13], self.field4);
229 14516428 : buf[13] = self.field5;
230 14516428 : BE::write_u32(&mut buf[14..18], self.field6);
231 14516428 : }
232 : }
233 :
234 : impl CompactKey {
235 10 : pub fn raw(&self) -> i128 {
236 10 : self.0
237 10 : }
238 : }
239 :
240 : impl From<i128> for CompactKey {
241 5 : fn from(value: i128) -> Self {
242 5 : Self(value)
243 5 : }
244 : }
245 :
246 : impl fmt::Display for Key {
247 758909 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248 758909 : write!(
249 758909 : f,
250 758909 : "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
251 758909 : self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
252 758909 : )
253 758909 : }
254 : }
255 :
256 : impl fmt::Display for CompactKey {
257 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
258 0 : let k = Key::from_compact(*self);
259 0 : k.fmt(f)
260 0 : }
261 : }
262 :
263 : impl Key {
264 : pub const MIN: Key = Key {
265 : field1: u8::MIN,
266 : field2: u32::MIN,
267 : field3: u32::MIN,
268 : field4: u32::MIN,
269 : field5: u8::MIN,
270 : field6: u32::MIN,
271 : };
272 : pub const MAX: Key = Key {
273 : field1: u8::MAX,
274 : field2: u32::MAX,
275 : field3: u32::MAX,
276 : field4: u32::MAX,
277 : field5: u8::MAX,
278 : field6: u32::MAX,
279 : };
280 :
281 80373 : pub fn from_hex(s: &str) -> Result<Self> {
282 80373 : if s.len() != 36 {
283 4 : bail!("parse error");
284 80369 : }
285 80369 : Ok(Key {
286 80369 : field1: u8::from_str_radix(&s[0..2], 16)?,
287 80369 : field2: u32::from_str_radix(&s[2..10], 16)?,
288 80369 : field3: u32::from_str_radix(&s[10..18], 16)?,
289 80369 : field4: u32::from_str_radix(&s[18..26], 16)?,
290 80369 : field5: u8::from_str_radix(&s[26..28], 16)?,
291 80369 : field6: u32::from_str_radix(&s[28..36], 16)?,
292 : })
293 80373 : }
294 : }
295 :
296 : // Layout of the Key address space
297 : //
298 : // The Key struct, used to address the underlying key-value store, consists of
299 : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
300 : // all the data and metadata keys into those 18 bytes.
301 : //
302 : // Principles for the mapping:
303 : //
304 : // - Things that are often accessed or modified together, should be close to
305 : // each other in the key space. For example, if a relation is extended by one
306 : // block, we create a new key-value pair for the block data, and update the
307 : // relation size entry. Because of that, the RelSize key comes after all the
308 : // RelBlocks of a relation: the RelSize and the last RelBlock are always next
309 : // to each other.
310 : //
311 : // The key space is divided into four major sections, identified by the first
312 : // byte, and the form a hierarchy:
313 : //
314 : // 00 Relation data and metadata
315 : //
316 : // DbDir () -> (dbnode, spcnode)
317 : // Filenodemap
318 : // RelDir -> relnode forknum
319 : // RelBlocks
320 : // RelSize
321 : //
322 : // 01 SLRUs
323 : //
324 : // SlruDir kind
325 : // SlruSegBlocks segno
326 : // SlruSegSize
327 : //
328 : // 02 pg_twophase
329 : //
330 : // 03 misc
331 : // Controlfile
332 : // checkpoint
333 : // pg_version
334 : //
335 : // 04 aux files
336 : //
337 : // Below is a full list of the keyspace allocation:
338 : //
339 : // DbDir:
340 : // 00 00000000 00000000 00000000 00 00000000
341 : //
342 : // Filenodemap:
343 : // 00 SPCNODE DBNODE 00000000 00 00000000
344 : //
345 : // RelDir:
346 : // 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
347 : //
348 : // RelBlock:
349 : // 00 SPCNODE DBNODE RELNODE FORK BLKNUM
350 : //
351 : // RelSize:
352 : // 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
353 : //
354 : // SlruDir:
355 : // 01 kind 00000000 00000000 00 00000000
356 : //
357 : // SlruSegBlock:
358 : // 01 kind 00000001 SEGNO 00 BLKNUM
359 : //
360 : // SlruSegSize:
361 : // 01 kind 00000001 SEGNO 00 FFFFFFFF
362 : //
363 : // TwoPhaseDir:
364 : // 02 00000000 00000000 00000000 00 00000000
365 : //
366 : // TwoPhaseFile:
367 : //
368 : // 02 00000000 00000000 00XXXXXX XX XXXXXXXX
369 : //
370 : // \______XID_________/
371 : //
372 : // The 64-bit XID is stored a little awkwardly in field6, field5 and
373 : // field4. PostgreSQL v16 and below only stored a 32-bit XID, which
374 : // fit completely in field6, but starting with PostgreSQL v17, a full
375 : // 64-bit XID is used. Most pageserver code that accesses
376 : // TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
377 : // are just unused.
378 : //
379 : // ControlFile:
380 : // 03 00000000 00000000 00000000 00 00000000
381 : //
382 : // Checkpoint:
383 : // 03 00000000 00000000 00000000 00 00000001
384 : //
385 : // AuxFiles:
386 : // 03 00000000 00000000 00000000 00 00000002
387 : //
388 :
389 : //-- Section 01: relation data and metadata
390 :
391 : pub const DBDIR_KEY: Key = Key {
392 : field1: 0x00,
393 : field2: 0,
394 : field3: 0,
395 : field4: 0,
396 : field5: 0,
397 : field6: 0,
398 : };
399 :
400 : #[inline(always)]
401 0 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
402 0 : Key {
403 0 : field1: 0x00,
404 0 : field2: spcnode,
405 0 : field3: dbnode,
406 0 : field4: 0,
407 0 : field5: 0,
408 0 : field6: 0,
409 0 : }..Key {
410 0 : field1: 0x00,
411 0 : field2: spcnode,
412 0 : field3: dbnode,
413 0 : field4: 0xffffffff,
414 0 : field5: 0xff,
415 0 : field6: 0xffffffff,
416 0 : }
417 0 : }
418 :
419 : #[inline(always)]
420 32 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
421 32 : Key {
422 32 : field1: 0x00,
423 32 : field2: spcnode,
424 32 : field3: dbnode,
425 32 : field4: 0,
426 32 : field5: 0,
427 32 : field6: 0,
428 32 : }
429 32 : }
430 :
431 : #[inline(always)]
432 3896 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
433 3896 : Key {
434 3896 : field1: 0x00,
435 3896 : field2: spcnode,
436 3896 : field3: dbnode,
437 3896 : field4: 0,
438 3896 : field5: 0,
439 3896 : field6: 1,
440 3896 : }
441 3896 : }
442 :
443 : #[inline(always)]
444 2583814 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
445 2583814 : Key {
446 2583814 : field1: 0x00,
447 2583814 : field2: rel.spcnode,
448 2583814 : field3: rel.dbnode,
449 2583814 : field4: rel.relnode,
450 2583814 : field5: rel.forknum,
451 2583814 : field6: blknum,
452 2583814 : }
453 2583814 : }
454 :
455 : #[inline(always)]
456 579508 : pub fn rel_size_to_key(rel: RelTag) -> Key {
457 579508 : Key {
458 579508 : field1: 0x00,
459 579508 : field2: rel.spcnode,
460 579508 : field3: rel.dbnode,
461 579508 : field4: rel.relnode,
462 579508 : field5: rel.forknum,
463 579508 : field6: 0xffff_ffff,
464 579508 : }
465 579508 : }
466 :
467 : impl Key {
468 : #[inline(always)]
469 5 : pub fn is_rel_size_key(&self) -> bool {
470 5 : self.field1 == 0 && self.field6 == u32::MAX
471 5 : }
472 : }
473 :
474 : #[inline(always)]
475 4 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
476 4 : Key {
477 4 : field1: 0x00,
478 4 : field2: rel.spcnode,
479 4 : field3: rel.dbnode,
480 4 : field4: rel.relnode,
481 4 : field5: rel.forknum,
482 4 : field6: 0,
483 4 : }..Key {
484 4 : field1: 0x00,
485 4 : field2: rel.spcnode,
486 4 : field3: rel.dbnode,
487 4 : field4: rel.relnode,
488 4 : field5: rel.forknum + 1,
489 4 : field6: 0,
490 4 : }
491 4 : }
492 :
493 : //-- Section 02: SLRUs
494 :
495 : #[inline(always)]
496 3096 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
497 3096 : Key {
498 3096 : field1: 0x01,
499 3096 : field2: match kind {
500 1032 : SlruKind::Clog => 0x00,
501 1032 : SlruKind::MultiXactMembers => 0x01,
502 1032 : SlruKind::MultiXactOffsets => 0x02,
503 : },
504 : field3: 0,
505 : field4: 0,
506 : field5: 0,
507 : field6: 0,
508 : }
509 3096 : }
510 :
511 : #[inline(always)]
512 2098633 : pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
513 2098633 : if key.field1 == 0x01
514 0 : && key.field3 == 0
515 0 : && key.field4 == 0
516 0 : && key.field5 == 0
517 0 : && key.field6 == 0
518 : {
519 0 : match key.field2 {
520 0 : 0 => Some(Ok(SlruKind::Clog)),
521 0 : 1 => Some(Ok(SlruKind::MultiXactMembers)),
522 0 : 2 => Some(Ok(SlruKind::MultiXactOffsets)),
523 0 : x => Some(Err(x)),
524 : }
525 : } else {
526 2098633 : None
527 : }
528 2098633 : }
529 :
530 : #[inline(always)]
531 28 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
532 28 : Key {
533 28 : field1: 0x01,
534 28 : field2: match kind {
535 20 : SlruKind::Clog => 0x00,
536 4 : SlruKind::MultiXactMembers => 0x01,
537 4 : SlruKind::MultiXactOffsets => 0x02,
538 : },
539 : field3: 1,
540 28 : field4: segno,
541 28 : field5: 0,
542 28 : field6: blknum,
543 28 : }
544 28 : }
545 :
546 : #[inline(always)]
547 12 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
548 12 : Key {
549 12 : field1: 0x01,
550 12 : field2: match kind {
551 4 : SlruKind::Clog => 0x00,
552 4 : SlruKind::MultiXactMembers => 0x01,
553 4 : SlruKind::MultiXactOffsets => 0x02,
554 : },
555 : field3: 1,
556 12 : field4: segno,
557 12 : field5: 0,
558 12 : field6: 0xffff_ffff,
559 12 : }
560 12 : }
561 :
562 : impl Key {
563 2098633 : pub fn is_slru_segment_size_key(&self) -> bool {
564 2098633 : self.field1 == 0x01
565 0 : && self.field2 < 0x03
566 0 : && self.field3 == 0x01
567 0 : && self.field5 == 0
568 0 : && self.field6 == u32::MAX
569 2098633 : }
570 :
571 2098633 : pub fn is_slru_dir_key(&self) -> bool {
572 2098633 : slru_dir_kind(self).is_some()
573 2098633 : }
574 : }
575 :
576 : #[inline(always)]
577 0 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
578 0 : let field2 = match kind {
579 0 : SlruKind::Clog => 0x00,
580 0 : SlruKind::MultiXactMembers => 0x01,
581 0 : SlruKind::MultiXactOffsets => 0x02,
582 : };
583 :
584 0 : Key {
585 0 : field1: 0x01,
586 0 : field2,
587 0 : field3: 1,
588 0 : field4: segno,
589 0 : field5: 0,
590 0 : field6: 0,
591 0 : }..Key {
592 0 : field1: 0x01,
593 0 : field2,
594 0 : field3: 1,
595 0 : field4: segno,
596 0 : field5: 1,
597 0 : field6: 0,
598 0 : }
599 0 : }
600 :
601 : //-- Section 03: pg_twophase
602 :
603 : pub const TWOPHASEDIR_KEY: Key = Key {
604 : field1: 0x02,
605 : field2: 0,
606 : field3: 0,
607 : field4: 0,
608 : field5: 0,
609 : field6: 0,
610 : };
611 :
612 : #[inline(always)]
613 0 : pub fn twophase_file_key(xid: u64) -> Key {
614 0 : Key {
615 0 : field1: 0x02,
616 0 : field2: 0,
617 0 : field3: 0,
618 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
619 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
620 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
621 0 : }
622 0 : }
623 :
624 : #[inline(always)]
625 0 : pub fn twophase_key_range(xid: u64) -> Range<Key> {
626 0 : // 64-bit XIDs really should not overflow
627 0 : let (next_xid, overflowed) = xid.overflowing_add(1);
628 0 :
629 0 : Key {
630 0 : field1: 0x02,
631 0 : field2: 0,
632 0 : field3: 0,
633 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
634 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
635 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
636 0 : }..Key {
637 0 : field1: 0x02,
638 0 : field2: 0,
639 0 : field3: u32::from(overflowed),
640 0 : field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
641 0 : field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
642 0 : field6: (next_xid & 0x00000000FFFFFFFF) as u32,
643 0 : }
644 0 : }
645 :
646 : //-- Section 03: Control file
647 : pub const CONTROLFILE_KEY: Key = Key {
648 : field1: 0x03,
649 : field2: 0,
650 : field3: 0,
651 : field4: 0,
652 : field5: 0,
653 : field6: 0,
654 : };
655 :
656 : pub const CHECKPOINT_KEY: Key = Key {
657 : field1: 0x03,
658 : field2: 0,
659 : field3: 0,
660 : field4: 0,
661 : field5: 0,
662 : field6: 1,
663 : };
664 :
665 : pub const AUX_FILES_KEY: Key = Key {
666 : field1: 0x03,
667 : field2: 0,
668 : field3: 0,
669 : field4: 0,
670 : field5: 0,
671 : field6: 2,
672 : };
673 :
674 : #[inline(always)]
675 0 : pub fn repl_origin_key(origin_id: RepOriginId) -> Key {
676 0 : Key {
677 0 : field1: REPL_ORIGIN_KEY_PREFIX,
678 0 : field2: 0,
679 0 : field3: 0,
680 0 : field4: 0,
681 0 : field5: 0,
682 0 : field6: origin_id as u32,
683 0 : }
684 0 : }
685 :
686 : /// Get the range of replorigin keys.
687 640 : pub fn repl_origin_key_range() -> Range<Key> {
688 640 : Key {
689 640 : field1: REPL_ORIGIN_KEY_PREFIX,
690 640 : field2: 0,
691 640 : field3: 0,
692 640 : field4: 0,
693 640 : field5: 0,
694 640 : field6: 0,
695 640 : }..Key {
696 640 : field1: REPL_ORIGIN_KEY_PREFIX,
697 640 : field2: 0,
698 640 : field3: 0,
699 640 : field4: 0,
700 640 : field5: 0,
701 640 : field6: 0x10000,
702 640 : }
703 640 : }
704 :
705 : // Reverse mappings for a few Keys.
706 : // These are needed by WAL redo manager.
707 :
708 : /// Non inherited range for vectored get.
709 : pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
710 : /// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
711 : pub const SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
712 :
713 : impl Key {
714 : // AUX_FILES currently stores only data for logical replication (slots etc), and
715 : // we don't preserve these on a branch because safekeepers can't follow timeline
716 : // switch (and generally it likely should be optional), so ignore these.
717 : #[inline(always)]
718 0 : pub fn is_inherited_key(self) -> bool {
719 0 : if self.is_sparse() {
720 0 : self.is_inherited_sparse_key()
721 : } else {
722 0 : !NON_INHERITED_RANGE.contains(&self)
723 : }
724 0 : }
725 :
726 : #[inline(always)]
727 1482389 : pub fn is_sparse(self) -> bool {
728 1482389 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
729 1482389 : }
730 :
731 : /// Check if the key belongs to the inherited keyspace.
732 0 : fn is_inherited_sparse_key(self) -> bool {
733 0 : debug_assert!(self.is_sparse());
734 0 : self.field1 == RELATION_SIZE_PREFIX
735 0 : }
736 :
737 1706158 : pub fn sparse_non_inherited_keyspace() -> Range<Key> {
738 1706158 : // The two keys are adjacent; if we will have non-adjancent keys in the future, we should return a keyspace
739 1706158 : debug_assert_eq!(AUX_KEY_PREFIX + 1, REPL_ORIGIN_KEY_PREFIX);
740 1706158 : Key {
741 1706158 : field1: AUX_KEY_PREFIX,
742 1706158 : field2: 0,
743 1706158 : field3: 0,
744 1706158 : field4: 0,
745 1706158 : field5: 0,
746 1706158 : field6: 0,
747 1706158 : }..Key {
748 1706158 : field1: REPL_ORIGIN_KEY_PREFIX + 1,
749 1706158 : field2: 0,
750 1706158 : field3: 0,
751 1706158 : field4: 0,
752 1706158 : field5: 0,
753 1706158 : field6: 0,
754 1706158 : }
755 1706158 : }
756 :
757 : #[inline(always)]
758 0 : pub fn is_rel_fsm_block_key(self) -> bool {
759 0 : self.field1 == 0x00
760 0 : && self.field4 != 0
761 0 : && self.field5 == FSM_FORKNUM
762 0 : && self.field6 != 0xffffffff
763 0 : }
764 :
765 : #[inline(always)]
766 0 : pub fn is_rel_vm_block_key(self) -> bool {
767 0 : self.field1 == 0x00
768 0 : && self.field4 != 0
769 0 : && self.field5 == VISIBILITYMAP_FORKNUM
770 0 : && self.field6 != 0xffffffff
771 0 : }
772 :
773 : #[inline(always)]
774 0 : pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
775 0 : Ok(match self.field1 {
776 : 0x01 => {
777 0 : let kind = match self.field2 {
778 0 : 0x00 => SlruKind::Clog,
779 0 : 0x01 => SlruKind::MultiXactMembers,
780 0 : 0x02 => SlruKind::MultiXactOffsets,
781 0 : _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", self.field2),
782 : };
783 0 : let segno = self.field4;
784 0 : let blknum = self.field6;
785 0 :
786 0 : (kind, segno, blknum)
787 : }
788 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
789 : })
790 0 : }
791 :
792 : #[inline(always)]
793 3244145 : pub fn is_slru_block_key(self) -> bool {
794 3244145 : self.field1 == 0x01 // SLRU-related
795 1264 : && self.field3 == 0x00000001 // but not SlruDir
796 40 : && self.field6 != 0xffffffff // and not SlruSegSize
797 3244145 : }
798 :
799 : #[inline(always)]
800 7997154 : pub fn is_rel_block_key(&self) -> bool {
801 7997154 : self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
802 7997154 : }
803 :
804 : #[inline(always)]
805 400 : pub fn is_rel_dir_key(&self) -> bool {
806 400 : self.field1 == 0x00
807 400 : && self.field2 != 0
808 0 : && self.field3 != 0
809 0 : && self.field4 == 0
810 0 : && self.field5 == 0
811 0 : && self.field6 == 1
812 400 : }
813 :
814 : #[inline(always)]
815 2098633 : pub fn is_aux_file_key(&self) -> bool {
816 2098633 : self.field1 == AUX_KEY_PREFIX
817 2098633 : }
818 :
819 : /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
820 : #[inline(always)]
821 291296 : pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
822 291296 : Ok(match self.field1 {
823 291296 : 0x00 => (
824 291296 : RelTag {
825 291296 : spcnode: self.field2,
826 291296 : dbnode: self.field3,
827 291296 : relnode: self.field4,
828 291296 : forknum: self.field5,
829 291296 : },
830 291296 : self.field6,
831 291296 : ),
832 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
833 : })
834 291296 : }
835 : }
836 :
837 : impl std::str::FromStr for Key {
838 : type Err = anyhow::Error;
839 :
840 9 : fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
841 9 : Self::from_hex(s)
842 9 : }
843 : }
844 :
845 : #[cfg(test)]
846 : mod tests {
847 : use std::str::FromStr;
848 :
849 : use crate::key::is_metadata_key_slice;
850 : use crate::key::Key;
851 :
852 : use rand::Rng;
853 : use rand::SeedableRng;
854 :
855 : use super::AUX_KEY_PREFIX;
856 :
857 : #[test]
858 1 : fn display_fromstr_bijection() {
859 1 : let mut rng = rand::rngs::StdRng::seed_from_u64(42);
860 1 :
861 1 : let key = Key {
862 1 : field1: rng.gen(),
863 1 : field2: rng.gen(),
864 1 : field3: rng.gen(),
865 1 : field4: rng.gen(),
866 1 : field5: rng.gen(),
867 1 : field6: rng.gen(),
868 1 : };
869 1 :
870 1 : assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
871 1 : }
872 :
873 : #[test]
874 1 : fn test_metadata_keys() {
875 1 : let mut metadata_key = vec![AUX_KEY_PREFIX];
876 1 : metadata_key.extend_from_slice(&[0xFF; 15]);
877 1 : let encoded_key = Key::from_metadata_key(&metadata_key);
878 1 : let output_key = encoded_key.to_i128().to_be_bytes();
879 1 : assert_eq!(metadata_key, output_key);
880 1 : assert!(encoded_key.is_metadata_key());
881 1 : assert!(is_metadata_key_slice(&metadata_key));
882 1 : }
883 :
884 : #[test]
885 1 : fn test_possible_largest_key() {
886 1 : Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);
887 1 : // TODO: put this key into the system and see if anything breaks.
888 1 : }
889 : }
|