Line data Source code
1 : //!
2 : //! Common utilities for dealing with PostgreSQL relation files.
3 : //!
4 : use once_cell::sync::OnceCell;
5 : use regex::Regex;
6 :
7 : //
8 : // Fork numbers, from relpath.h
9 : //
10 : pub const MAIN_FORKNUM: u8 = 0;
11 : pub const FSM_FORKNUM: u8 = 1;
12 : pub const VISIBILITYMAP_FORKNUM: u8 = 2;
13 : pub const INIT_FORKNUM: u8 = 3;
14 :
15 0 : #[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
16 : pub enum FilePathError {
17 : #[error("invalid relation fork name")]
18 : InvalidForkName,
19 : #[error("invalid relation data file name")]
20 : InvalidFileName,
21 : }
22 :
23 : impl From<core::num::ParseIntError> for FilePathError {
24 1 : fn from(_e: core::num::ParseIntError) -> Self {
25 1 : FilePathError::InvalidFileName
26 1 : }
27 : }
28 :
29 : /// Convert Postgres relation file's fork suffix to fork number.
30 1911 : pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
31 1911 : match forkname {
32 : // "main" is not in filenames, it's implicit if the fork name is not present
33 1433 : None => Ok(MAIN_FORKNUM),
34 478 : Some("fsm") => Ok(FSM_FORKNUM),
35 239 : Some("vm") => Ok(VISIBILITYMAP_FORKNUM),
36 5 : Some("init") => Ok(INIT_FORKNUM),
37 3 : Some(_) => Err(FilePathError::InvalidForkName),
38 : }
39 1911 : }
40 :
41 : /// Convert Postgres fork number to the right suffix of the relation data file.
42 0 : pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
43 0 : match forknum {
44 0 : MAIN_FORKNUM => None,
45 0 : FSM_FORKNUM => Some("fsm"),
46 0 : VISIBILITYMAP_FORKNUM => Some("vm"),
47 0 : INIT_FORKNUM => Some("init"),
48 0 : _ => Some("UNKNOWN FORKNUM"),
49 : }
50 0 : }
51 :
52 : /// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.
53 : ///
54 : /// Formats:
55 : ///
56 : /// ```text
57 : /// <oid>
58 : /// <oid>_<fork name>
59 : /// <oid>.<segment number>
60 : /// <oid>_<fork name>.<segment number>
61 : /// ```
62 : ///
63 : /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
64 : ///
65 1909 : pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
66 : static RELFILE_RE: OnceCell<Regex> = OnceCell::new();
67 1909 : RELFILE_RE.get_or_init(|| {
68 5 : Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap()
69 1909 : });
70 :
71 1909 : let caps = RELFILE_RE
72 1909 : .get()
73 1909 : .unwrap()
74 1909 : .captures(fname)
75 1909 : .ok_or(FilePathError::InvalidFileName)?;
76 :
77 1905 : let relnode_str = caps.name("relnode").unwrap().as_str();
78 1905 : let relnode = relnode_str.parse::<u32>()?;
79 :
80 1904 : let forkname = caps.name("forkname").map(|f| f.as_str());
81 1904 : let forknum = forkname_to_number(forkname)?;
82 :
83 1903 : let segno_match = caps.name("segno");
84 1903 : let segno = if segno_match.is_none() {
85 1898 : 0
86 : } else {
87 5 : segno_match.unwrap().as_str().parse::<u32>()?
88 : };
89 :
90 1903 : Ok((relnode, forknum, segno))
91 1909 : }
92 :
93 : #[cfg(test)]
94 : mod tests {
95 : use super::*;
96 :
97 : #[test]
98 1 : fn test_parse_valid_relfilenames() {
99 1 : assert_eq!(parse_relfilename("1234"), Ok((1234, 0, 0)));
100 1 : assert_eq!(parse_relfilename("1234_fsm"), Ok((1234, 1, 0)));
101 1 : assert_eq!(parse_relfilename("1234_vm"), Ok((1234, 2, 0)));
102 1 : assert_eq!(parse_relfilename("1234_init"), Ok((1234, 3, 0)));
103 :
104 1 : assert_eq!(parse_relfilename("1234.12"), Ok((1234, 0, 12)));
105 1 : assert_eq!(parse_relfilename("1234_fsm.12"), Ok((1234, 1, 12)));
106 1 : assert_eq!(parse_relfilename("1234_vm.12"), Ok((1234, 2, 12)));
107 1 : assert_eq!(parse_relfilename("1234_init.12"), Ok((1234, 3, 12)));
108 :
109 : // relfilenode is unsigned, so it can go up to 2^32-1
110 1 : assert_eq!(parse_relfilename("3147483648"), Ok((3147483648, 0, 0)));
111 1 : }
112 :
113 : #[test]
114 1 : fn test_parse_invalid_relfilenames() {
115 1 : assert_eq!(
116 1 : parse_relfilename("foo"),
117 1 : Err(FilePathError::InvalidFileName)
118 1 : );
119 1 : assert_eq!(
120 1 : parse_relfilename("1.2.3"),
121 1 : Err(FilePathError::InvalidFileName)
122 1 : );
123 1 : assert_eq!(
124 1 : parse_relfilename("1234_invalid"),
125 1 : Err(FilePathError::InvalidForkName)
126 1 : );
127 1 : assert_eq!(
128 1 : parse_relfilename("1234_"),
129 1 : Err(FilePathError::InvalidFileName)
130 1 : );
131 :
132 : // too large for u32
133 1 : assert_eq!(
134 1 : parse_relfilename("12345678901"),
135 1 : Err(FilePathError::InvalidFileName)
136 1 : );
137 1 : assert_eq!(
138 1 : parse_relfilename("-1234"),
139 1 : Err(FilePathError::InvalidFileName)
140 1 : );
141 1 : }
142 :
143 : #[test]
144 1 : fn test_parse_weird_relfilenames() {
145 1 : // we accept 0 for the relfilenode, but PostgreSQL should never do that.
146 1 : assert_eq!(parse_relfilename("0"), Ok((0, 0, 0)));
147 :
148 : // PostgreSQL has a limit of 2^32-2 blocks in a table. With 8k block size and
149 : // 1 GB segments, the max segment number is 32767. But we accept larger values
150 : // currently.
151 1 : assert_eq!(parse_relfilename("1.123456"), Ok((1, 0, 123456)));
152 1 : }
153 : }
|