Line data Source code
1 : use anyhow::{bail, Result};
2 : use byteorder::{ByteOrder, BE};
3 : use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
4 : use postgres_ffi::{Oid, TransactionId};
5 : use serde::{Deserialize, Serialize};
6 : use std::{fmt, ops::Range};
7 :
8 : use crate::reltag::{BlockNumber, RelTag, SlruKind};
9 :
10 : /// Key used in the Repository kv-store.
11 : ///
12 : /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
13 : /// for what we actually store in these fields.
14 258576984 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
15 : pub struct Key {
16 : pub field1: u8,
17 : pub field2: u32,
18 : pub field3: u32,
19 : pub field4: u32,
20 : pub field5: u8,
21 : pub field6: u32,
22 : }
23 :
24 : pub const KEY_SIZE: usize = 18;
25 :
26 : impl Key {
27 : /// 'field2' is used to store tablespaceid for relations and small enum numbers for other relish.
28 : /// As long as Neon does not support tablespace (because of lack of access to local file system),
29 : /// we can assume that only some predefined namespace OIDs are used which can fit in u16
30 97976722 : pub fn to_i128(&self) -> i128 {
31 97976722 : assert!(self.field2 < 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
32 97976722 : (((self.field1 & 0xf) as i128) << 120)
33 97976722 : | (((self.field2 & 0xFFFF) as i128) << 104)
34 97976722 : | ((self.field3 as i128) << 72)
35 97976722 : | ((self.field4 as i128) << 40)
36 97976722 : | ((self.field5 as i128) << 32)
37 97976722 : | self.field6 as i128
38 97976722 : }
39 :
40 13674925 : pub const fn from_i128(x: i128) -> Self {
41 13674925 : Key {
42 13674925 : field1: ((x >> 120) & 0xf) as u8,
43 13674925 : field2: ((x >> 104) & 0xFFFF) as u32,
44 13674925 : field3: (x >> 72) as u32,
45 13674925 : field4: (x >> 40) as u32,
46 13674925 : field5: (x >> 32) as u8,
47 13674925 : field6: x as u32,
48 13674925 : }
49 13674925 : }
50 :
51 20319480 : pub fn next(&self) -> Key {
52 20319480 : self.add(1)
53 20319480 : }
54 :
55 20326387 : pub fn add(&self, x: u32) -> Key {
56 20326387 : let mut key = *self;
57 20326387 :
58 20326387 : let r = key.field6.overflowing_add(x);
59 20326387 : key.field6 = r.0;
60 20326387 : if r.1 {
61 1619094 : let r = key.field5.overflowing_add(1);
62 1619094 : key.field5 = r.0;
63 1619094 : if r.1 {
64 0 : let r = key.field4.overflowing_add(1);
65 0 : key.field4 = r.0;
66 0 : if r.1 {
67 0 : let r = key.field3.overflowing_add(1);
68 0 : key.field3 = r.0;
69 0 : if r.1 {
70 0 : let r = key.field2.overflowing_add(1);
71 0 : key.field2 = r.0;
72 0 : if r.1 {
73 0 : let r = key.field1.overflowing_add(1);
74 0 : key.field1 = r.0;
75 0 : assert!(!r.1);
76 0 : }
77 0 : }
78 0 : }
79 1619094 : }
80 18707293 : }
81 20326387 : key
82 20326387 : }
83 :
84 17514736 : pub fn from_slice(b: &[u8]) -> Self {
85 17514736 : Key {
86 17514736 : field1: b[0],
87 17514736 : field2: u32::from_be_bytes(b[1..5].try_into().unwrap()),
88 17514736 : field3: u32::from_be_bytes(b[5..9].try_into().unwrap()),
89 17514736 : field4: u32::from_be_bytes(b[9..13].try_into().unwrap()),
90 17514736 : field5: b[13],
91 17514736 : field6: u32::from_be_bytes(b[14..18].try_into().unwrap()),
92 17514736 : }
93 17514736 : }
94 :
95 76703151 : pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
96 76703151 : buf[0] = self.field1;
97 76703151 : BE::write_u32(&mut buf[1..5], self.field2);
98 76703151 : BE::write_u32(&mut buf[5..9], self.field3);
99 76703151 : BE::write_u32(&mut buf[9..13], self.field4);
100 76703151 : buf[13] = self.field5;
101 76703151 : BE::write_u32(&mut buf[14..18], self.field6);
102 76703151 : }
103 : }
104 :
105 : impl fmt::Display for Key {
106 3220934 : fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107 3220934 : write!(
108 3220934 : f,
109 3220934 : "{:02X}{:08X}{:08X}{:08X}{:02X}{:08X}",
110 3220934 : self.field1, self.field2, self.field3, self.field4, self.field5, self.field6
111 3220934 : )
112 3220934 : }
113 : }
114 :
115 : impl Key {
116 : pub const MIN: Key = Key {
117 : field1: u8::MIN,
118 : field2: u32::MIN,
119 : field3: u32::MIN,
120 : field4: u32::MIN,
121 : field5: u8::MIN,
122 : field6: u32::MIN,
123 : };
124 : pub const MAX: Key = Key {
125 : field1: u8::MAX,
126 : field2: u32::MAX,
127 : field3: u32::MAX,
128 : field4: u32::MAX,
129 : field5: u8::MAX,
130 : field6: u32::MAX,
131 : };
132 :
133 202480 : pub fn from_hex(s: &str) -> Result<Self> {
134 202480 : if s.len() != 36 {
135 0 : bail!("parse error");
136 202480 : }
137 202480 : Ok(Key {
138 202480 : field1: u8::from_str_radix(&s[0..2], 16)?,
139 202480 : field2: u32::from_str_radix(&s[2..10], 16)?,
140 202480 : field3: u32::from_str_radix(&s[10..18], 16)?,
141 202480 : field4: u32::from_str_radix(&s[18..26], 16)?,
142 202480 : field5: u8::from_str_radix(&s[26..28], 16)?,
143 202480 : field6: u32::from_str_radix(&s[28..36], 16)?,
144 : })
145 202480 : }
146 : }
147 :
148 : // Layout of the Key address space
149 : //
150 : // The Key struct, used to address the underlying key-value store, consists of
151 : // 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
152 : // all the data and metadata keys into those 18 bytes.
153 : //
154 : // Principles for the mapping:
155 : //
156 : // - Things that are often accessed or modified together, should be close to
157 : // each other in the key space. For example, if a relation is extended by one
158 : // block, we create a new key-value pair for the block data, and update the
159 : // relation size entry. Because of that, the RelSize key comes after all the
160 : // RelBlocks of a relation: the RelSize and the last RelBlock are always next
161 : // to each other.
162 : //
163 : // The key space is divided into four major sections, identified by the first
164 : // byte, and the form a hierarchy:
165 : //
166 : // 00 Relation data and metadata
167 : //
168 : // DbDir () -> (dbnode, spcnode)
169 : // Filenodemap
170 : // RelDir -> relnode forknum
171 : // RelBlocks
172 : // RelSize
173 : //
174 : // 01 SLRUs
175 : //
176 : // SlruDir kind
177 : // SlruSegBlocks segno
178 : // SlruSegSize
179 : //
180 : // 02 pg_twophase
181 : //
182 : // 03 misc
183 : // Controlfile
184 : // checkpoint
185 : // pg_version
186 : //
187 : // 04 aux files
188 : //
189 : // Below is a full list of the keyspace allocation:
190 : //
191 : // DbDir:
192 : // 00 00000000 00000000 00000000 00 00000000
193 : //
194 : // Filenodemap:
195 : // 00 SPCNODE DBNODE 00000000 00 00000000
196 : //
197 : // RelDir:
198 : // 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0)
199 : //
200 : // RelBlock:
201 : // 00 SPCNODE DBNODE RELNODE FORK BLKNUM
202 : //
203 : // RelSize:
204 : // 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF
205 : //
206 : // SlruDir:
207 : // 01 kind 00000000 00000000 00 00000000
208 : //
209 : // SlruSegBlock:
210 : // 01 kind 00000001 SEGNO 00 BLKNUM
211 : //
212 : // SlruSegSize:
213 : // 01 kind 00000001 SEGNO 00 FFFFFFFF
214 : //
215 : // TwoPhaseDir:
216 : // 02 00000000 00000000 00000000 00 00000000
217 : //
218 : // TwoPhaseFile:
219 : // 02 00000000 00000000 00000000 00 XID
220 : //
221 : // ControlFile:
222 : // 03 00000000 00000000 00000000 00 00000000
223 : //
224 : // Checkpoint:
225 : // 03 00000000 00000000 00000000 00 00000001
226 : //
227 : // AuxFiles:
228 : // 03 00000000 00000000 00000000 00 00000002
229 : //
230 :
231 : //-- Section 01: relation data and metadata
232 :
233 : pub const DBDIR_KEY: Key = Key {
234 : field1: 0x00,
235 : field2: 0,
236 : field3: 0,
237 : field4: 0,
238 : field5: 0,
239 : field6: 0,
240 : };
241 :
242 : #[inline(always)]
243 3 : pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
244 3 : Key {
245 3 : field1: 0x00,
246 3 : field2: spcnode,
247 3 : field3: dbnode,
248 3 : field4: 0,
249 3 : field5: 0,
250 3 : field6: 0,
251 3 : }..Key {
252 3 : field1: 0x00,
253 3 : field2: spcnode,
254 3 : field3: dbnode,
255 3 : field4: 0xffffffff,
256 3 : field5: 0xff,
257 3 : field6: 0xffffffff,
258 3 : }
259 3 : }
260 :
261 : #[inline(always)]
262 7919 : pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
263 7919 : Key {
264 7919 : field1: 0x00,
265 7919 : field2: spcnode,
266 7919 : field3: dbnode,
267 7919 : field4: 0,
268 7919 : field5: 0,
269 7919 : field6: 0,
270 7919 : }
271 7919 : }
272 :
273 : #[inline(always)]
274 988533 : pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
275 988533 : Key {
276 988533 : field1: 0x00,
277 988533 : field2: spcnode,
278 988533 : field3: dbnode,
279 988533 : field4: 0,
280 988533 : field5: 0,
281 988533 : field6: 1,
282 988533 : }
283 988533 : }
284 :
285 : #[inline(always)]
286 131857985 : pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
287 131857985 : Key {
288 131857985 : field1: 0x00,
289 131857985 : field2: rel.spcnode,
290 131857985 : field3: rel.dbnode,
291 131857985 : field4: rel.relnode,
292 131857985 : field5: rel.forknum,
293 131857985 : field6: blknum,
294 131857985 : }
295 131857985 : }
296 :
297 : #[inline(always)]
298 4324136 : pub fn rel_size_to_key(rel: RelTag) -> Key {
299 4324136 : Key {
300 4324136 : field1: 0x00,
301 4324136 : field2: rel.spcnode,
302 4324136 : field3: rel.dbnode,
303 4324136 : field4: rel.relnode,
304 4324136 : field5: rel.forknum,
305 4324136 : field6: 0xffffffff,
306 4324136 : }
307 4324136 : }
308 :
309 : #[inline(always)]
310 67233 : pub fn rel_key_range(rel: RelTag) -> Range<Key> {
311 67233 : Key {
312 67233 : field1: 0x00,
313 67233 : field2: rel.spcnode,
314 67233 : field3: rel.dbnode,
315 67233 : field4: rel.relnode,
316 67233 : field5: rel.forknum,
317 67233 : field6: 0,
318 67233 : }..Key {
319 67233 : field1: 0x00,
320 67233 : field2: rel.spcnode,
321 67233 : field3: rel.dbnode,
322 67233 : field4: rel.relnode,
323 67233 : field5: rel.forknum + 1,
324 67233 : field6: 0,
325 67233 : }
326 67233 : }
327 :
328 : //-- Section 02: SLRUs
329 :
330 : #[inline(always)]
331 12882 : pub fn slru_dir_to_key(kind: SlruKind) -> Key {
332 12882 : Key {
333 12882 : field1: 0x01,
334 12882 : field2: match kind {
335 7008 : SlruKind::Clog => 0x00,
336 3080 : SlruKind::MultiXactMembers => 0x01,
337 2794 : SlruKind::MultiXactOffsets => 0x02,
338 : },
339 : field3: 0,
340 : field4: 0,
341 : field5: 0,
342 : field6: 0,
343 : }
344 12882 : }
345 :
346 : #[inline(always)]
347 6178166 : pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
348 6178166 : Key {
349 6178166 : field1: 0x01,
350 6178166 : field2: match kind {
351 6121361 : SlruKind::Clog => 0x00,
352 28718 : SlruKind::MultiXactMembers => 0x01,
353 28087 : SlruKind::MultiXactOffsets => 0x02,
354 : },
355 : field3: 1,
356 6178166 : field4: segno,
357 6178166 : field5: 0,
358 6178166 : field6: blknum,
359 6178166 : }
360 6178166 : }
361 :
362 : #[inline(always)]
363 11666 : pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
364 11666 : Key {
365 11666 : field1: 0x01,
366 11666 : field2: match kind {
367 7152 : SlruKind::Clog => 0x00,
368 2554 : SlruKind::MultiXactMembers => 0x01,
369 1960 : SlruKind::MultiXactOffsets => 0x02,
370 : },
371 : field3: 1,
372 11666 : field4: segno,
373 11666 : field5: 0,
374 11666 : field6: 0xffffffff,
375 11666 : }
376 11666 : }
377 :
378 : #[inline(always)]
379 10 : pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
380 10 : let field2 = match kind {
381 10 : SlruKind::Clog => 0x00,
382 0 : SlruKind::MultiXactMembers => 0x01,
383 0 : SlruKind::MultiXactOffsets => 0x02,
384 : };
385 :
386 10 : Key {
387 10 : field1: 0x01,
388 10 : field2,
389 10 : field3: 1,
390 10 : field4: segno,
391 10 : field5: 0,
392 10 : field6: 0,
393 10 : }..Key {
394 10 : field1: 0x01,
395 10 : field2,
396 10 : field3: 1,
397 10 : field4: segno,
398 10 : field5: 1,
399 10 : field6: 0,
400 10 : }
401 10 : }
402 :
403 : //-- Section 03: pg_twophase
404 :
405 : pub const TWOPHASEDIR_KEY: Key = Key {
406 : field1: 0x02,
407 : field2: 0,
408 : field3: 0,
409 : field4: 0,
410 : field5: 0,
411 : field6: 0,
412 : };
413 :
414 : #[inline(always)]
415 6 : pub fn twophase_file_key(xid: TransactionId) -> Key {
416 6 : Key {
417 6 : field1: 0x02,
418 6 : field2: 0,
419 6 : field3: 0,
420 6 : field4: 0,
421 6 : field5: 0,
422 6 : field6: xid,
423 6 : }
424 6 : }
425 :
426 : #[inline(always)]
427 2 : pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
428 2 : let (next_xid, overflowed) = xid.overflowing_add(1);
429 2 :
430 2 : Key {
431 2 : field1: 0x02,
432 2 : field2: 0,
433 2 : field3: 0,
434 2 : field4: 0,
435 2 : field5: 0,
436 2 : field6: xid,
437 2 : }..Key {
438 2 : field1: 0x02,
439 2 : field2: 0,
440 2 : field3: 0,
441 2 : field4: 0,
442 2 : field5: u8::from(overflowed),
443 2 : field6: next_xid,
444 2 : }
445 2 : }
446 :
447 : //-- Section 03: Control file
448 : pub const CONTROLFILE_KEY: Key = Key {
449 : field1: 0x03,
450 : field2: 0,
451 : field3: 0,
452 : field4: 0,
453 : field5: 0,
454 : field6: 0,
455 : };
456 :
457 : pub const CHECKPOINT_KEY: Key = Key {
458 : field1: 0x03,
459 : field2: 0,
460 : field3: 0,
461 : field4: 0,
462 : field5: 0,
463 : field6: 1,
464 : };
465 :
466 : pub const AUX_FILES_KEY: Key = Key {
467 : field1: 0x03,
468 : field2: 0,
469 : field3: 0,
470 : field4: 0,
471 : field5: 0,
472 : field6: 2,
473 : };
474 :
475 : // Reverse mappings for a few Keys.
476 : // These are needed by WAL redo manager.
477 :
478 : // AUX_FILES currently stores only data for logical replication (slots etc), and
479 : // we don't preserve these on a branch because safekeepers can't follow timeline
480 : // switch (and generally it likely should be optional), so ignore these.
481 : #[inline(always)]
482 32334675 : pub fn is_inherited_key(key: Key) -> bool {
483 32334675 : key != AUX_FILES_KEY
484 32334675 : }
485 :
486 : #[inline(always)]
487 0 : pub fn is_rel_fsm_block_key(key: Key) -> bool {
488 0 : key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
489 0 : }
490 :
491 : #[inline(always)]
492 0 : pub fn is_rel_vm_block_key(key: Key) -> bool {
493 0 : key.field1 == 0x00
494 0 : && key.field4 != 0
495 0 : && key.field5 == VISIBILITYMAP_FORKNUM
496 0 : && key.field6 != 0xffffffff
497 0 : }
498 :
499 : #[inline(always)]
500 18923991 : pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
501 18923991 : Ok(match key.field1 {
502 : 0x01 => {
503 18923991 : let kind = match key.field2 {
504 18826309 : 0x00 => SlruKind::Clog,
505 49392 : 0x01 => SlruKind::MultiXactMembers,
506 48290 : 0x02 => SlruKind::MultiXactOffsets,
507 0 : _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
508 : };
509 18923991 : let segno = key.field4;
510 18923991 : let blknum = key.field6;
511 18923991 :
512 18923991 : (kind, segno, blknum)
513 : }
514 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
515 : })
516 18923991 : }
517 :
518 : #[inline(always)]
519 0 : pub fn is_slru_block_key(key: Key) -> bool {
520 0 : key.field1 == 0x01 // SLRU-related
521 0 : && key.field3 == 0x00000001 // but not SlruDir
522 0 : && key.field6 != 0xffffffff // and not SlruSegSize
523 0 : }
524 :
525 : #[inline(always)]
526 65237179 : pub fn is_rel_block_key(key: &Key) -> bool {
527 65237179 : key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
528 65237179 : }
529 :
530 : /// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
531 : #[inline(always)]
532 2861198 : pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
533 2861198 : Ok(match key.field1 {
534 2861198 : 0x00 => (
535 2861198 : RelTag {
536 2861198 : spcnode: key.field2,
537 2861198 : dbnode: key.field3,
538 2861198 : relnode: key.field4,
539 2861198 : forknum: key.field5,
540 2861198 : },
541 2861198 : key.field6,
542 2861198 : ),
543 0 : _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
544 : })
545 2861198 : }
546 :
547 : impl std::str::FromStr for Key {
548 : type Err = anyhow::Error;
549 :
550 14 : fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
551 14 : Self::from_hex(s)
552 14 : }
553 : }
554 :
555 : #[cfg(test)]
556 : mod tests {
557 : use std::str::FromStr;
558 :
559 : use crate::key::Key;
560 :
561 : use rand::Rng;
562 : use rand::SeedableRng;
563 :
564 2 : #[test]
565 2 : fn display_fromstr_bijection() {
566 2 : let mut rng = rand::rngs::StdRng::seed_from_u64(42);
567 2 :
568 2 : let key = Key {
569 2 : field1: rng.gen(),
570 2 : field2: rng.gen(),
571 2 : field3: rng.gen(),
572 2 : field4: rng.gen(),
573 2 : field5: rng.gen(),
574 2 : field6: rng.gen(),
575 2 : };
576 2 :
577 2 : assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
578 2 : }
579 : }
|