Line data Source code
1 : //!
2 : //! Common utilities for dealing with PostgreSQL relation files.
3 : //!
4 : use once_cell::sync::OnceCell;
5 : use regex::Regex;
6 :
7 : //
8 : // Fork numbers, from relpath.h
9 : //
10 : pub const MAIN_FORKNUM: u8 = 0;
11 : pub const FSM_FORKNUM: u8 = 1;
12 : pub const VISIBILITYMAP_FORKNUM: u8 = 2;
13 : pub const INIT_FORKNUM: u8 = 3;
14 :
15 12 : #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
16 : pub enum FilePathError {
17 : #[error("invalid relation fork name")]
18 : InvalidForkName,
19 : #[error("invalid relation data file name")]
20 : InvalidFileName,
21 : }
22 :
23 : impl From<core::num::ParseIntError> for FilePathError {
24 2 : fn from(_e: core::num::ParseIntError) -> Self {
25 2 : FilePathError::InvalidFileName
26 2 : }
27 : }
28 :
29 : /// Convert Postgres relation file's fork suffix to fork number.
30 562476 : pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
31 562476 : match forkname {
32 : // "main" is not in filenames, it's implicit if the fork name is not present
33 417545 : None => Ok(MAIN_FORKNUM),
34 144931 : Some("fsm") => Ok(FSM_FORKNUM),
35 70672 : Some("vm") => Ok(VISIBILITYMAP_FORKNUM),
36 6 : Some("init") => Ok(INIT_FORKNUM),
37 2 : Some(_) => Err(FilePathError::InvalidForkName),
38 : }
39 562476 : }
40 :
41 : /// Convert Postgres fork number to the right suffix of the relation data file.
42 5632367 : pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
43 5632367 : match forknum {
44 5522754 : MAIN_FORKNUM => None,
45 86015 : FSM_FORKNUM => Some("fsm"),
46 23560 : VISIBILITYMAP_FORKNUM => Some("vm"),
47 38 : INIT_FORKNUM => Some("init"),
48 0 : _ => Some("UNKNOWN FORKNUM"),
49 : }
50 5632367 : }
51 :
52 : /// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.
53 : ///
54 : /// Formats:
55 : ///
56 : /// ```text
57 : /// <oid>
58 : /// <oid>_<fork name>
59 : /// <oid>.<segment number>
60 : /// <oid>_<fork name>.<segment number>
61 : /// ```
62 : ///
63 : /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
64 : ///
65 562486 : pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
66 562486 : static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
67 562486 : RELFILE_RE.get_or_init(|| {
68 384 : Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
69 562486 : });
70 :
71 562486 : let caps = RELFILE_RE
72 562486 : .get()
73 562486 : .unwrap()
74 562486 : .captures(fname)
75 562486 : .ok_or(FilePathError::InvalidFileName)?;
76 :
77 562478 : let relnode_str = caps.name("relnode").unwrap().as_str();
78 562478 : let relnode = relnode_str.parse::<u32>()?;
79 :
80 562476 : let forkname = caps.name("forkname").map(|f| f.as_str());
81 562476 : let forknum = forkname_to_number(forkname)?;
82 :
83 562474 : let segno_match = caps.name("segno");
84 562474 : let segno = if segno_match.is_none() {
85 562464 : 0
86 : } else {
87 10 : segno_match.unwrap().as_str().parse::<u32>()?
88 : };
89 :
90 562474 : Ok((relnode, forknum, segno))
91 562486 : }
92 :
93 : #[cfg(test)]
94 : mod tests {
95 : use super::*;
96 :
97 2 : #[test]
98 2 : fn test_parse_valid_relfilenames() {
99 2 : assert_eq!(parse_relfilename("1234"), Ok((1234, 0, 0)));
100 2 : assert_eq!(parse_relfilename("1234_fsm"), Ok((1234, 1, 0)));
101 2 : assert_eq!(parse_relfilename("1234_vm"), Ok((1234, 2, 0)));
102 2 : assert_eq!(parse_relfilename("1234_init"), Ok((1234, 3, 0)));
103 :
104 2 : assert_eq!(parse_relfilename("1234.12"), Ok((1234, 0, 12)));
105 2 : assert_eq!(parse_relfilename("1234_fsm.12"), Ok((1234, 1, 12)));
106 2 : assert_eq!(parse_relfilename("1234_vm.12"), Ok((1234, 2, 12)));
107 2 : assert_eq!(parse_relfilename("1234_init.12"), Ok((1234, 3, 12)));
108 :
109 : // relfilenode is unsigned, so it can go up to 2^32-1
110 2 : assert_eq!(parse_relfilename("3147483648"), Ok((3147483648, 0, 0)));
111 2 : }
112 :
113 2 : #[test]
114 2 : fn test_parse_invalid_relfilenames() {
115 2 : assert_eq!(
116 2 : parse_relfilename("foo"),
117 2 : Err(FilePathError::InvalidFileName)
118 2 : );
119 2 : assert_eq!(
120 2 : parse_relfilename("1.2.3"),
121 2 : Err(FilePathError::InvalidFileName)
122 2 : );
123 2 : assert_eq!(
124 2 : parse_relfilename("1234_invalid"),
125 2 : Err(FilePathError::InvalidForkName)
126 2 : );
127 2 : assert_eq!(
128 2 : parse_relfilename("1234_"),
129 2 : Err(FilePathError::InvalidFileName)
130 2 : );
131 :
132 : // too large for u32
133 2 : assert_eq!(
134 2 : parse_relfilename("12345678901"),
135 2 : Err(FilePathError::InvalidFileName)
136 2 : );
137 2 : assert_eq!(
138 2 : parse_relfilename("-1234"),
139 2 : Err(FilePathError::InvalidFileName)
140 2 : );
141 2 : }
142 :
143 2 : #[test]
144 2 : fn test_parse_weird_relfilenames() {
145 2 : // we accept 0 for the relfilenode, but PostgreSQL should never do that.
146 2 : assert_eq!(parse_relfilename("0"), Ok((0, 0, 0)));
147 :
148 : // PostgreSQL has a limit of 2^32-2 blocks in a table. With 8k block size and
149 : // 1 GB segments, the max segment number is 32767. But we accept larger values
150 : // currently.
151 2 : assert_eq!(parse_relfilename("1.123456"), Ok((1, 0, 123456)));
152 2 : }
153 : }
|