Line data Source code
1 : use anyhow::{bail, Result};
2 : use byteorder::{ByteOrder, BE};
3 : use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
4 : use postgres_ffi::Oid;
5 : use postgres_ffi::RepOriginId;
6 : use serde::{Deserialize, Serialize};
7 : use std::{fmt, ops::Range};
8 :
9 : use crate::reltag::{BlockNumber, RelTag, SlruKind};
10 :
11 : /// Key used in the Repository kv-store.
12 : ///
13 : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
14 : /// for what we actually store in these fields.
15 2456 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
16 : pub struct Key {
17 : pub field1: u8,
18 : pub field2: u32,
19 : pub field3: u32,
20 : pub field4: u32,
21 : pub field5: u8,
22 : pub field6: u32,
23 : }
24 :
25 : /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
26 : /// a struct of fields.
27 0 : #[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug)]
28 : pub struct CompactKey(i128);
29 :
30 : /// The storage key size.
31 : pub const KEY_SIZE: usize = 18;
32 :
33 : /// The metadata key size. 2B fewer than the storage key size because field2 is not fully utilized.
34 : /// See [`Key::to_i128`] for more information on the encoding.
35 : pub const METADATA_KEY_SIZE: usize = 16;
36 :
37 : /// The key prefix start range for the metadata keys. All keys with the first byte >= 0x60 is a metadata key.
38 : pub const METADATA_KEY_BEGIN_PREFIX: u8 = 0x60;
39 : pub const METADATA_KEY_END_PREFIX: u8 = 0x7F;
40 :
41 : /// The (reserved) key prefix of relation sizes.
42 : pub const RELATION_SIZE_PREFIX: u8 = 0x61;
43 :
44 : /// The key prefix of AUX file keys.
45 : pub const AUX_KEY_PREFIX: u8 = 0x62;
46 :
47 : /// The key prefix of ReplOrigin keys.
48 : pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
49 :
50 : /// Check if the key falls in the range of metadata keys.
51 36 : pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
52 36 : key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
53 36 : }
54 :
55 : impl Key {
56 : /// Check if the key falls in the range of metadata keys.
57 67 : pub const fn is_metadata_key(&self) -> bool {
58 67 : self.field1 >= METADATA_KEY_BEGIN_PREFIX && self.field1 < METADATA_KEY_END_PREFIX
59 67 : }
60 :
61 : /// Encode a metadata key to a storage key.
62 35 : pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
63 35 : assert!(is_metadata_key_slice(key), "key not in metadata key range");
64 : // Metadata key space ends at 0x7F so it's fine to directly convert it to i128.
65 35 : Self::from_i128(i128::from_be_bytes(*key))
66 35 : }
67 :
68 : /// Encode a metadata key to a storage key.
69 1 : pub fn from_metadata_key(key: &[u8]) -> Self {
70 1 : Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
71 1 : }
72 :
73 : /// Get the range of metadata keys.
74 1216 : pub const fn metadata_key_range() -> Range<Self> {
75 1216 : Key {
76 1216 : field1: METADATA_KEY_BEGIN_PREFIX,
77 1216 : field2: 0,
78 1216 : field3: 0,
79 1216 : field4: 0,
80 1216 : field5: 0,
81 1216 : field6: 0,
82 1216 : }..Key {
83 1216 : field1: METADATA_KEY_END_PREFIX,
84 1216 : field2: 0,
85 1216 : field3: 0,
86 1216 : field4: 0,
87 1216 : field5: 0,
88 1216 : field6: 0,
89 1216 : }
90 1216 : }
91 :
92 : /// Get the range of aux keys.
93 308 : pub fn metadata_aux_key_range() -> Range<Self> {
94 308 : Key {
95 308 : field1: AUX_KEY_PREFIX,
96 308 : field2: 0,
97 308 : field3: 0,
98 308 : field4: 0,
99 308 : field5: 0,
100 308 : field6: 0,
101 308 : }..Key {
102 308 : field1: AUX_KEY_PREFIX + 1,
103 308 : field2: 0,
104 308 : field3: 0,
105 308 : field4: 0,
106 308 : field5: 0,
107 308 : field6: 0,
108 308 : }
109 308 : }
110 :
111 : /// This function checks more extensively what keys we can take on the write path.
112 : /// If a key beginning with 00 does not have a global/default tablespace OID, it
113 : /// will be rejected on the write path.
114 : #[allow(dead_code)]
115 0 : pub fn is_valid_key_on_write_path_strong(&self) -> bool {
116 : use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
117 0 : if !self.is_i128_representable() {
118 0 : return false;
119 0 : }
120 0 : if self.field1 == 0
121 0 : && !(self.field2 == GLOBALTABLESPACE_OID
122 0 : || self.field2 == DEFAULTTABLESPACE_OID
123 0 : || self.field2 == 0)
124 : {
125 0 : return false; // User defined tablespaces are not supported
126 0 : }
127 0 : true
128 0 : }
129 :
130 : /// This is a weaker version of `is_valid_key_on_write_path_strong` that simply
131 : /// checks if the key is i128 representable. Note that some keys can be successfully
132 : /// ingested into the pageserver, but will cause errors on generating basebackup.
133 4816647 : pub fn is_valid_key_on_write_path(&self) -> bool {
134 4816647 : self.is_i128_representable()
135 4816647 : }
136 :
137 16476657 : pub fn is_i128_representable(&self) -> bool {
138 16476657 : self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222
139 16476657 : }
140 :
141 : /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
142 : /// As long as Neon does not support tablespace (because of lack of access to local file system),
143 : /// we can assume that only some predefined namespace OIDs are used which can fit in u16
144 11660010 : pub fn to_i128(&self) -> i128 {
145 11660010 : assert!(self.is_i128_representable(), "invalid key: {self}");
146 11660010 : (((self.field1 & 0x7F) as i128) << 120)
147 11660010 : | (((self.field2 & 0xFFFF) as i128) << 104)
148 11660010 : | ((self.field3 as i128) << 72)
149 11660010 : | ((self.field4 as i128) << 40)
150 11660010 : | ((self.field5 as i128) << 32)
151 11660010 : | self.field6 as i128
152 11660010 : }
153 :
154 10626454 : pub const fn from_i128(x: i128) -> Self {
155 10626454 : Key {
156 10626454 : field1: ((x >> 120) & 0x7F) as u8,
157 10626454 : field2: ((x >> 104) & 0xFFFF) as u32,
158 10626454 : field3: (x >> 72) as u32,
159 10626454 : field4: (x >> 40) as u32,
160 10626454 : field5: (x >> 32) as u8,
161 10626454 : field6: x as u32,
162 10626454 : }
163 10626454 : }
164 :
165 6876182 : pub fn to_compact(&self) -> CompactKey {
166 6876182 : CompactKey(self.to_i128())
167 6876182 : }
168 :
169 10120596 : pub fn from_compact(k: CompactKey) -> Self {
170 10120596 : Self::from_i128(k.0)
171 10120596 : }
172 :
173 8885112 : pub const fn next(&self) -> Key {
174 8885112 : self.add(1)
175 8885112 : }
176 :
177 8895027 : pub const fn add(&self, x: u32) -> Key {
178 8895027 : let mut key = *self;
179 8895027 :
180 8895027 : let r = key.field6.overflowing_add(x);
181 8895027 : key.field6 = r.0;
182 8895027 : if r.1 {
183 827101 : let r = key.field5.overflowing_add(1);
184 827101 : key.field5 = r.0;
185 827101 : if r.1 {
186 0 : let r = key.field4.overflowing_add(1);
187 0 : key.field4 = r.0;
188 0 : if r.1 {
189 0 : let r = key.field3.overflowing_add(1);
190 0 : key.field3 = r.0;
191 0 : if r.1 {
192 0 : let r = key.field2.overflowing_add(1);
193 0 : key.field2 = r.0;
194 0 : if r.1 {
195 0 : let r = key.field1.overflowing_add(1);
196 0 : key.field1 = r.0;
197 0 : assert!(!r.1);
198 0 : }
199 0 : }
200 0 : }
201 827101 : }
202 8067926 : }
203 8895027 : key
204 8895027 : }
205 :
206 : /// Convert a 18B slice to a key. This function should not be used for 16B metadata keys because `field2` is handled differently.
207 : /// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys). There are some restrictions on `field2`,
208 : /// and therefore not all 18B slices are valid page server keys.
209 5945597 : pub fn from_slice(b: &[u8]) -> Self {
210 5945597 : Key {
211 5945597 : field1: b[0],
212 5945597 : field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
213 5945597 : field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
214 5945597 : field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
215 5945597 : field5: b[13],
216 5945597 : field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
217 5945597 : }
218 5945597 : }
219 :
220 : /// Convert a key to a 18B slice. This function should not be used for getting a 16B metadata key because `field2` is handled differently.
221 : /// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).
222 7259471 : pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
223 7259471 : buf[0] = self.field1;
224 7259471 : BE::write_u32(&mut buf[1..5], self.field2);
225 7259471 : BE::write_u32(&mut buf[5..9], self.field3);
226 7259471 : BE::write_u32(&mut buf[9..13], self.field4);
227 7259471 : buf[13] = self.field5;
228 7259471 : BE::write_u32(&mut buf[14..18], self.field6);
229 7259471 : }
230 : }
231 :
232 : impl CompactKey {
233 10 : pub fn raw(&self) -> i128 {
234 10 : self.0
235 10 : }
236 : }
237 :
238 : impl From<i128> for CompactKey {
239 5 : fn from(value: i128) -> Self {
240 5 : Self(value)
241 5 : }
242 : }
243 :
244 : impl fmt::Display for Key {
245 376919 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
246 376919 : write!(
247 376919 : f,
248 376919 : "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
249 376919 : self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
250 376919 : )
251 376919 : }
252 : }
253 :
254 : impl fmt::Display for CompactKey {
255 0 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
256 0 : let k = Key::from_compact(*self);
257 0 : k.fmt(f)
258 0 : }
259 : }
260 :
261 : impl Key {
262 : pub const MIN: Key = Key {
263 : field1: u8::MIN,
264 : field2: u32::MIN,
265 : field3: u32::MIN,
266 : field4: u32::MIN,
267 : field5: u8::MIN,
268 : field6: u32::MIN,
269 : };
270 : pub const MAX: Key = Key {
271 : field1: u8::MAX,
272 : field2: u32::MAX,
273 : field3: u32::MAX,
274 : field4: u32::MAX,
275 : field5: u8::MAX,
276 : field6: u32::MAX,
277 : };
278 :
279 40473 : pub fn from_hex(s: &str) -> Result<Self> {
280 40473 : if s.len() != 36 {
281 4 : bail!("parse error");
282 40469 : }
283 40469 : Ok(Key {
284 40469 : field1: u8::from_str_radix(&s[0..2], 16)?,
285 40469 : field2: u32::from_str_radix(&s[2..10], 16)?,
286 40469 : field3: u32::from_str_radix(&s[10..18], 16)?,
287 40469 : field4: u32::from_str_radix(&s[18..26], 16)?,
288 40469 : field5: u8::from_str_radix(&s[26..28], 16)?,
289 40469 : field6: u32::from_str_radix(&s[28..36], 16)?,
290 : })
291 40473 : }
292 : }
293 :
294 : // Layout of the Key address space
295 : //
296 : // The Key struct, used to address the underlying key-value store, consists of
297 : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
298 : // all the data and metadata keys into those 18 bytes.
299 : //
300 : // Principles for the mapping:
301 : //
302 : // - Things that are often accessed or modified together, should be close to
303 : // each other in the key space. For example, if a relation is extended by one
304 : // block, we create a new key-value pair for the block data, and update the
305 : // relation size entry. Because of that, the RelSize key comes after all the
306 : // RelBlocks of a relation: the RelSize and the last RelBlock are always next
307 : // to each other.
308 : //
309 : // The key space is divided into four major sections, identified by the first
310 : // byte, and the form a hierarchy:
311 : //
312 : // 00 Relation data and metadata
313 : //
314 : // DbDir () -> (dbnode, spcnode)
315 : // Filenodemap
316 : // RelDir -> relnode forknum
317 : // RelBlocks
318 : // RelSize
319 : //
320 : // 01 SLRUs
321 : //
322 : // SlruDir kind
323 : // SlruSegBlocks segno
324 : // SlruSegSize
325 : //
326 : // 02 pg_twophase
327 : //
328 : // 03 misc
329 : // Controlfile
330 : // checkpoint
331 : // pg_version
332 : //
333 : // 04 aux files
334 : //
335 : // Below is a full list of the keyspace allocation:
336 : //
337 : // DbDir:
338 : // 00 00000000 00000000 00000000 00 00000000
339 : //
340 : // Filenodemap:
341 : // 00 SPCNODE DBNODE 00000000 00 00000000
342 : //
343 : // RelDir:
344 : // 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
345 : //
346 : // RelBlock:
347 : // 00 SPCNODE DBNODE RELNODE FORK BLKNUM
348 : //
349 : // RelSize:
350 : // 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
351 : //
352 : // SlruDir:
353 : // 01 kind 00000000 00000000 00 00000000
354 : //
355 : // SlruSegBlock:
356 : // 01 kind 00000001 SEGNO 00 BLKNUM
357 : //
358 : // SlruSegSize:
359 : // 01 kind 00000001 SEGNO 00 FFFFFFFF
360 : //
361 : // TwoPhaseDir:
362 : // 02 00000000 00000000 00000000 00 00000000
363 : //
364 : // TwoPhaseFile:
365 : //
366 : // 02 00000000 00000000 00XXXXXX XX XXXXXXXX
367 : //
368 : // \______XID_________/
369 : //
370 : // The 64-bit XID is stored a little awkwardly in field6, field5 and
371 : // field4. PostgreSQL v16 and below only stored a 32-bit XID, which
372 : // fit completely in field6, but starting with PostgreSQL v17, a full
373 : // 64-bit XID is used. Most pageserver code that accesses
374 : // TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
375 : // are just unused.
376 : //
377 : // ControlFile:
378 : // 03 00000000 00000000 00000000 00 00000000
379 : //
380 : // Checkpoint:
381 : // 03 00000000 00000000 00000000 00 00000001
382 : //
383 : // AuxFiles:
384 : // 03 00000000 00000000 00000000 00 00000002
385 : //
386 :
387 : //-- Section 01: relation data and metadata
388 :
389 : pub const DBDIR_KEY: Key = Key {
390 : field1: 0x00,
391 : field2: 0,
392 : field3: 0,
393 : field4: 0,
394 : field5: 0,
395 : field6: 0,
396 : };
397 :
398 : #[inline(always)]
399 0 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
400 0 : Key {
401 0 : field1: 0x00,
402 0 : field2: spcnode,
403 0 : field3: dbnode,
404 0 : field4: 0,
405 0 : field5: 0,
406 0 : field6: 0,
407 0 : }..Key {
408 0 : field1: 0x00,
409 0 : field2: spcnode,
410 0 : field3: dbnode,
411 0 : field4: 0xffffffff,
412 0 : field5: 0xff,
413 0 : field6: 0xffffffff,
414 0 : }
415 0 : }
416 :
417 : #[inline(always)]
418 16 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
419 16 : Key {
420 16 : field1: 0x00,
421 16 : field2: spcnode,
422 16 : field3: dbnode,
423 16 : field4: 0,
424 16 : field5: 0,
425 16 : field6: 0,
426 16 : }
427 16 : }
428 :
429 : #[inline(always)]
430 1948 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
431 1948 : Key {
432 1948 : field1: 0x00,
433 1948 : field2: spcnode,
434 1948 : field3: dbnode,
435 1948 : field4: 0,
436 1948 : field5: 0,
437 1948 : field6: 1,
438 1948 : }
439 1948 : }
440 :
441 : #[inline(always)]
442 1291910 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
443 1291910 : Key {
444 1291910 : field1: 0x00,
445 1291910 : field2: rel.spcnode,
446 1291910 : field3: rel.dbnode,
447 1291910 : field4: rel.relnode,
448 1291910 : field5: rel.forknum,
449 1291910 : field6: blknum,
450 1291910 : }
451 1291910 : }
452 :
453 : #[inline(always)]
454 289754 : pub fn rel_size_to_key(rel: RelTag) -> Key {
455 289754 : Key {
456 289754 : field1: 0x00,
457 289754 : field2: rel.spcnode,
458 289754 : field3: rel.dbnode,
459 289754 : field4: rel.relnode,
460 289754 : field5: rel.forknum,
461 289754 : field6: 0xffff_ffff,
462 289754 : }
463 289754 : }
464 :
465 : impl Key {
466 : #[inline(always)]
467 5 : pub fn is_rel_size_key(&self) -> bool {
468 5 : self.field1 == 0 && self.field6 == u32::MAX
469 5 : }
470 : }
471 :
472 : #[inline(always)]
473 2 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
474 2 : Key {
475 2 : field1: 0x00,
476 2 : field2: rel.spcnode,
477 2 : field3: rel.dbnode,
478 2 : field4: rel.relnode,
479 2 : field5: rel.forknum,
480 2 : field6: 0,
481 2 : }..Key {
482 2 : field1: 0x00,
483 2 : field2: rel.spcnode,
484 2 : field3: rel.dbnode,
485 2 : field4: rel.relnode,
486 2 : field5: rel.forknum + 1,
487 2 : field6: 0,
488 2 : }
489 2 : }
490 :
491 : //-- Section 02: SLRUs
492 :
493 : #[inline(always)]
494 1404 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
495 1404 : Key {
496 1404 : field1: 0x01,
497 1404 : field2: match kind {
498 468 : SlruKind::Clog => 0x00,
499 468 : SlruKind::MultiXactMembers => 0x01,
500 468 : SlruKind::MultiXactOffsets => 0x02,
501 : },
502 : field3: 0,
503 : field4: 0,
504 : field5: 0,
505 : field6: 0,
506 : }
507 1404 : }
508 :
509 : #[inline(always)]
510 1050057 : pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
511 1050057 : if key.field1 == 0x01
512 0 : && key.field3 == 0
513 0 : && key.field4 == 0
514 0 : && key.field5 == 0
515 0 : && key.field6 == 0
516 : {
517 0 : match key.field2 {
518 0 : 0 => Some(Ok(SlruKind::Clog)),
519 0 : 1 => Some(Ok(SlruKind::MultiXactMembers)),
520 0 : 2 => Some(Ok(SlruKind::MultiXactOffsets)),
521 0 : x => Some(Err(x)),
522 : }
523 : } else {
524 1050057 : None
525 : }
526 1050057 : }
527 :
528 : #[inline(always)]
529 14 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
530 14 : Key {
531 14 : field1: 0x01,
532 14 : field2: match kind {
533 10 : SlruKind::Clog => 0x00,
534 2 : SlruKind::MultiXactMembers => 0x01,
535 2 : SlruKind::MultiXactOffsets => 0x02,
536 : },
537 : field3: 1,
538 14 : field4: segno,
539 14 : field5: 0,
540 14 : field6: blknum,
541 14 : }
542 14 : }
543 :
544 : #[inline(always)]
545 6 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
546 6 : Key {
547 6 : field1: 0x01,
548 6 : field2: match kind {
549 2 : SlruKind::Clog => 0x00,
550 2 : SlruKind::MultiXactMembers => 0x01,
551 2 : SlruKind::MultiXactOffsets => 0x02,
552 : },
553 : field3: 1,
554 6 : field4: segno,
555 6 : field5: 0,
556 6 : field6: 0xffff_ffff,
557 6 : }
558 6 : }
559 :
560 : impl Key {
561 1050057 : pub fn is_slru_segment_size_key(&self) -> bool {
562 1050057 : self.field1 == 0x01
563 0 : && self.field2 < 0x03
564 0 : && self.field3 == 0x01
565 0 : && self.field5 == 0
566 0 : && self.field6 == u32::MAX
567 1050057 : }
568 :
569 1050057 : pub fn is_slru_dir_key(&self) -> bool {
570 1050057 : slru_dir_kind(self).is_some()
571 1050057 : }
572 : }
573 :
574 : #[inline(always)]
575 0 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
576 0 : let field2 = match kind {
577 0 : SlruKind::Clog => 0x00,
578 0 : SlruKind::MultiXactMembers => 0x01,
579 0 : SlruKind::MultiXactOffsets => 0x02,
580 : };
581 :
582 0 : Key {
583 0 : field1: 0x01,
584 0 : field2,
585 0 : field3: 1,
586 0 : field4: segno,
587 0 : field5: 0,
588 0 : field6: 0,
589 0 : }..Key {
590 0 : field1: 0x01,
591 0 : field2,
592 0 : field3: 1,
593 0 : field4: segno,
594 0 : field5: 1,
595 0 : field6: 0,
596 0 : }
597 0 : }
598 :
599 : //-- Section 03: pg_twophase
600 :
601 : pub const TWOPHASEDIR_KEY: Key = Key {
602 : field1: 0x02,
603 : field2: 0,
604 : field3: 0,
605 : field4: 0,
606 : field5: 0,
607 : field6: 0,
608 : };
609 :
610 : #[inline(always)]
611 0 : pub fn twophase_file_key(xid: u64) -> Key {
612 0 : Key {
613 0 : field1: 0x02,
614 0 : field2: 0,
615 0 : field3: 0,
616 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
617 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
618 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
619 0 : }
620 0 : }
621 :
622 : #[inline(always)]
623 0 : pub fn twophase_key_range(xid: u64) -> Range<Key> {
624 0 : // 64-bit XIDs really should not overflow
625 0 : let (next_xid, overflowed) = xid.overflowing_add(1);
626 0 :
627 0 : Key {
628 0 : field1: 0x02,
629 0 : field2: 0,
630 0 : field3: 0,
631 0 : field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
632 0 : field5: ((xid & 0x000000FF00000000) >> 32) as u8,
633 0 : field6: (xid & 0x00000000FFFFFFFF) as u32,
634 0 : }..Key {
635 0 : field1: 0x02,
636 0 : field2: 0,
637 0 : field3: u32::from(overflowed),
638 0 : field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
639 0 : field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
640 0 : field6: (next_xid & 0x00000000FFFFFFFF) as u32,
641 0 : }
642 0 : }
643 :
644 : //-- Section 03: Control file
645 : pub const CONTROLFILE_KEY: Key = Key {
646 : field1: 0x03,
647 : field2: 0,
648 : field3: 0,
649 : field4: 0,
650 : field5: 0,
651 : field6: 0,
652 : };
653 :
654 : pub const CHECKPOINT_KEY: Key = Key {
655 : field1: 0x03,
656 : field2: 0,
657 : field3: 0,
658 : field4: 0,
659 : field5: 0,
660 : field6: 1,
661 : };
662 :
663 : pub const AUX_FILES_KEY: Key = Key {
664 : field1: 0x03,
665 : field2: 0,
666 : field3: 0,
667 : field4: 0,
668 : field5: 0,
669 : field6: 2,
670 : };
671 :
672 : #[inline(always)]
673 0 : pub fn repl_origin_key(origin_id: RepOriginId) -> Key {
674 0 : Key {
675 0 : field1: REPL_ORIGIN_KEY_PREFIX,
676 0 : field2: 0,
677 0 : field3: 0,
678 0 : field4: 0,
679 0 : field5: 0,
680 0 : field6: origin_id as u32,
681 0 : }
682 0 : }
683 :
684 : /// Get the range of replorigin keys.
685 296 : pub fn repl_origin_key_range() -> Range<Key> {
686 296 : Key {
687 296 : field1: REPL_ORIGIN_KEY_PREFIX,
688 296 : field2: 0,
689 296 : field3: 0,
690 296 : field4: 0,
691 296 : field5: 0,
692 296 : field6: 0,
693 296 : }..Key {
694 296 : field1: REPL_ORIGIN_KEY_PREFIX,
695 296 : field2: 0,
696 296 : field3: 0,
697 296 : field4: 0,
698 296 : field5: 0,
699 296 : field6: 0x10000,
700 296 : }
701 296 : }
702 :
703 : // Reverse mappings for a few Keys.
704 : // These are needed by WAL redo manager.
705 :
706 : /// Non inherited range for vectored get.
707 : pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
708 : /// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
709 : pub const NON_INHERITED_SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
710 :
711 : impl Key {
712 : // AUX_FILES currently stores only data for logical replication (slots etc), and
713 : // we don't preserve these on a branch because safekeepers can't follow timeline
714 : // switch (and generally it likely should be optional), so ignore these.
715 : #[inline(always)]
716 0 : pub fn is_inherited_key(self) -> bool {
717 0 : !NON_INHERITED_RANGE.contains(&self) && !NON_INHERITED_SPARSE_RANGE.contains(&self)
718 0 : }
719 :
720 : #[inline(always)]
721 0 : pub fn is_rel_fsm_block_key(self) -> bool {
722 0 : self.field1 == 0x00
723 0 : && self.field4 != 0
724 0 : && self.field5 == FSM_FORKNUM
725 0 : && self.field6 != 0xffffffff
726 0 : }
727 :
728 : #[inline(always)]
729 0 : pub fn is_rel_vm_block_key(self) -> bool {
730 0 : self.field1 == 0x00
731 0 : && self.field4 != 0
732 0 : && self.field5 == VISIBILITYMAP_FORKNUM
733 0 : && self.field6 != 0xffffffff
734 0 : }
735 :
736 : #[inline(always)]
737 0 : pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
738 0 : Ok(match self.field1 {
739 : 0x01 => {
740 0 : let kind = match self.field2 {
741 0 : 0x00 => SlruKind::Clog,
742 0 : 0x01 => SlruKind::MultiXactMembers,
743 0 : 0x02 => SlruKind::MultiXactOffsets,
744 0 : _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", self.field2),
745 : };
746 0 : let segno = self.field4;
747 0 : let blknum = self.field6;
748 0 :
749 0 : (kind, segno, blknum)
750 : }
751 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
752 : })
753 0 : }
754 :
755 : #[inline(always)]
756 1622645 : pub fn is_slru_block_key(self) -> bool {
757 1622645 : self.field1 == 0x01 // SLRU-related
758 560 : && self.field3 == 0x00000001 // but not SlruDir
759 20 : && self.field6 != 0xffffffff // and not SlruSegSize
760 1622645 : }
761 :
762 : #[inline(always)]
763 4000626 : pub fn is_rel_block_key(&self) -> bool {
764 4000626 : self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
765 4000626 : }
766 :
767 : #[inline(always)]
768 200 : pub fn is_rel_dir_key(&self) -> bool {
769 200 : self.field1 == 0x00
770 200 : && self.field2 != 0
771 0 : && self.field3 != 0
772 0 : && self.field4 == 0
773 0 : && self.field5 == 0
774 0 : && self.field6 == 1
775 200 : }
776 :
777 : #[inline(always)]
778 1050057 : pub fn is_aux_file_key(&self) -> bool {
779 1050057 : self.field1 == AUX_KEY_PREFIX
780 1050057 : }
781 :
782 : /// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
783 : #[inline(always)]
784 145648 : pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
785 145648 : Ok(match self.field1 {
786 145648 : 0x00 => (
787 145648 : RelTag {
788 145648 : spcnode: self.field2,
789 145648 : dbnode: self.field3,
790 145648 : relnode: self.field4,
791 145648 : forknum: self.field5,
792 145648 : },
793 145648 : self.field6,
794 145648 : ),
795 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
796 : })
797 145648 : }
798 : }
799 :
800 : impl std::str::FromStr for Key {
801 : type Err = anyhow::Error;
802 :
803 9 : fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
804 9 : Self::from_hex(s)
805 9 : }
806 : }
807 :
808 : #[cfg(test)]
809 : mod tests {
810 : use std::str::FromStr;
811 :
812 : use crate::key::is_metadata_key_slice;
813 : use crate::key::Key;
814 :
815 : use rand::Rng;
816 : use rand::SeedableRng;
817 :
818 : use super::AUX_KEY_PREFIX;
819 :
820 : #[test]
821 1 : fn display_fromstr_bijection() {
822 1 : let mut rng = rand::rngs::StdRng::seed_from_u64(42);
823 1 :
824 1 : let key = Key {
825 1 : field1: rng.gen(),
826 1 : field2: rng.gen(),
827 1 : field3: rng.gen(),
828 1 : field4: rng.gen(),
829 1 : field5: rng.gen(),
830 1 : field6: rng.gen(),
831 1 : };
832 1 :
833 1 : assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
834 1 : }
835 :
836 : #[test]
837 1 : fn test_metadata_keys() {
838 1 : let mut metadata_key = vec![AUX_KEY_PREFIX];
839 1 : metadata_key.extend_from_slice(&[0xFF; 15]);
840 1 : let encoded_key = Key::from_metadata_key(&metadata_key);
841 1 : let output_key = encoded_key.to_i128().to_be_bytes();
842 1 : assert_eq!(metadata_key, output_key);
843 1 : assert!(encoded_key.is_metadata_key());
844 1 : assert!(is_metadata_key_slice(&metadata_key));
845 1 : }
846 :
847 : #[test]
848 1 : fn test_possible_largest_key() {
849 1 : Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);
850 1 : // TODO: put this key into the system and see if anything breaks.
851 1 : }
852 : }
|