Line data Source code
1 : #![allow(non_upper_case_globals)]
2 : #![allow(non_camel_case_types)]
3 : #![allow(non_snake_case)]
4 : // bindgen creates some unsafe code with no doc comments.
5 : #![allow(clippy::missing_safety_doc)]
6 : // noted at 1.63 that in many cases there's a u32 -> u32 transmutes in bindgen code.
7 : #![allow(clippy::useless_transmute)]
8 : // modules included with the postgres_ffi macro depend on the types of the specific version's
9 : // types, and trigger a too eager lint.
10 : #![allow(clippy::duplicate_mod)]
11 :
12 : use bytes::Bytes;
13 : use utils::bin_ser::SerializeError;
14 : use utils::lsn::Lsn;
15 :
16 : macro_rules! postgres_ffi {
17 : ($version:ident) => {
18 : #[path = "."]
19 : pub mod $version {
20 : pub mod bindings {
21 : // bindgen generates bindings for a lot of stuff we don't need
22 : #![allow(dead_code)]
23 :
24 : use serde::{Deserialize, Serialize};
25 : include!(concat!(
26 : env!("OUT_DIR"),
27 : "/bindings_",
28 : stringify!($version),
29 : ".rs"
30 : ));
31 :
32 : include!(concat!("pg_constants_", stringify!($version), ".rs"));
33 : }
34 : pub mod controlfile_utils;
35 : pub mod nonrelfile_utils;
36 : pub mod wal_craft_test_export;
37 : pub mod waldecoder_handler;
38 : pub mod xlog_utils;
39 :
40 : pub const PG_MAJORVERSION: &str = stringify!($version);
41 :
42 : // Re-export some symbols from bindings
43 : pub use bindings::DBState_DB_SHUTDOWNED;
44 : pub use bindings::{CheckPoint, ControlFileData, XLogRecord};
45 : }
46 : };
47 : }
48 :
49 : #[macro_export]
50 : macro_rules! for_all_postgres_versions {
51 : ($macro:tt) => {
52 : $macro!(v14);
53 : $macro!(v15);
54 : };
55 : }
56 :
57 : for_all_postgres_versions! { postgres_ffi }
58 :
59 : pub mod pg_constants;
60 : pub mod relfile_utils;
61 :
62 : // Export some widely used datatypes that are unlikely to change across Postgres versions
63 : pub use v14::bindings::{uint32, uint64, Oid};
64 : pub use v14::bindings::{BlockNumber, OffsetNumber};
65 : pub use v14::bindings::{MultiXactId, TransactionId};
66 : pub use v14::bindings::{TimeLineID, TimestampTz, XLogRecPtr, XLogSegNo};
67 :
68 : // Likewise for these, although the assumption that these don't change is a little more iffy.
69 : pub use v14::bindings::{MultiXactOffset, MultiXactStatus};
70 : pub use v14::bindings::{PageHeaderData, XLogRecord};
71 : pub use v14::xlog_utils::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
72 :
73 : pub use v14::bindings::{CheckPoint, ControlFileData};
74 :
75 : // from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and
76 : // --with-segsize=SEGSIZE, but assume the defaults for now.
77 : pub const BLCKSZ: u16 = 8192;
78 : pub const RELSEG_SIZE: u32 = 1024 * 1024 * 1024 / (BLCKSZ as u32);
79 : pub const XLOG_BLCKSZ: usize = 8192;
80 : pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024;
81 :
82 : pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16;
83 :
84 : // Export some version independent functions that are used outside of this mod
85 : pub use v14::xlog_utils::encode_logical_message;
86 : pub use v14::xlog_utils::get_current_timestamp;
87 : pub use v14::xlog_utils::to_pg_timestamp;
88 : pub use v14::xlog_utils::XLogFileName;
89 :
90 : pub use v14::bindings::DBState_DB_SHUTDOWNED;
91 :
92 314422 : pub fn bkpimage_is_compressed(bimg_info: u8, version: u32) -> anyhow::Result<bool> {
93 314422 : match version {
94 314422 : 14 => Ok(bimg_info & v14::bindings::BKPIMAGE_IS_COMPRESSED != 0),
95 0 : 15 => Ok(bimg_info & v15::bindings::BKPIMAGE_COMPRESS_PGLZ != 0
96 0 : || bimg_info & v15::bindings::BKPIMAGE_COMPRESS_LZ4 != 0
97 0 : || bimg_info & v15::bindings::BKPIMAGE_COMPRESS_ZSTD != 0),
98 0 : _ => anyhow::bail!("Unknown version {}", version),
99 : }
100 314422 : }
101 :
102 661 : pub fn generate_wal_segment(
103 661 : segno: u64,
104 661 : system_id: u64,
105 661 : pg_version: u32,
106 661 : lsn: Lsn,
107 661 : ) -> Result<Bytes, SerializeError> {
108 661 : assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE));
109 :
110 661 : match pg_version {
111 661 : 14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn),
112 0 : 15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn),
113 0 : _ => Err(SerializeError::BadInput),
114 : }
115 661 : }
116 :
117 660 : pub fn generate_pg_control(
118 660 : pg_control_bytes: &[u8],
119 660 : checkpoint_bytes: &[u8],
120 660 : lsn: Lsn,
121 660 : pg_version: u32,
122 660 : ) -> anyhow::Result<(Bytes, u64)> {
123 660 : match pg_version {
124 660 : 14 => v14::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
125 0 : 15 => v15::xlog_utils::generate_pg_control(pg_control_bytes, checkpoint_bytes, lsn),
126 0 : _ => anyhow::bail!("Unknown version {}", pg_version),
127 : }
128 660 : }
129 :
130 : // PG timeline is always 1, changing it doesn't have any useful meaning in Neon.
131 : //
132 : // NOTE: this is not to be confused with Neon timelines; different concept!
133 : //
134 : // It's a shaky assumption, that it's always 1. We might import a
135 : // PostgreSQL data directory that has gone through timeline bumps,
136 : // for example. FIXME later.
137 : pub const PG_TLI: u32 = 1;
138 :
139 : // See TransactionIdIsNormal in transam.h
140 80 : pub const fn transaction_id_is_normal(id: TransactionId) -> bool {
141 80 : id > pg_constants::FIRST_NORMAL_TRANSACTION_ID
142 80 : }
143 :
144 : // See TransactionIdPrecedes in transam.c
145 40 : pub const fn transaction_id_precedes(id1: TransactionId, id2: TransactionId) -> bool {
146 40 : /*
147 40 : * If either ID is a permanent XID then we can just do unsigned
148 40 : * comparison. If both are normal, do a modulo-2^32 comparison.
149 40 : */
150 40 :
151 40 : if !(transaction_id_is_normal(id1)) || !transaction_id_is_normal(id2) {
152 0 : return id1 < id2;
153 40 : }
154 40 :
155 40 : let diff = id1.wrapping_sub(id2) as i32;
156 40 : diff < 0
157 40 : }
158 :
159 : // Check if page is not yet initialized (port of Postgres PageIsInit() macro)
160 143829 : pub fn page_is_new(pg: &[u8]) -> bool {
161 143829 : pg[14] == 0 && pg[15] == 0 // pg_upper == 0
162 143829 : }
163 :
164 : // ExtractLSN from page header
165 0 : pub fn page_get_lsn(pg: &[u8]) -> Lsn {
166 0 : Lsn(
167 0 : ((u32::from_le_bytes(pg[0..4].try_into().unwrap()) as u64) << 32)
168 0 : | u32::from_le_bytes(pg[4..8].try_into().unwrap()) as u64,
169 0 : )
170 0 : }
171 :
172 138540 : pub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {
173 138540 : pg[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());
174 138540 : pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
175 138540 : }
176 :
177 : // This is port of function with the same name from freespace.c.
178 : // The only difference is that it does not have "level" parameter because XLogRecordPageWithFreeSpace
179 : // always call it with level=FSM_BOTTOM_LEVEL
180 45 : pub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber {
181 45 : let mut leafno = addr;
182 45 : const FSM_TREE_DEPTH: u32 = if pg_constants::SLOTS_PER_FSM_PAGE >= 1626 {
183 45 : 3
184 45 : } else {
185 45 : 4
186 45 : };
187 45 :
188 45 : /* Count upper level nodes required to address the leaf page */
189 45 : let mut pages: BlockNumber = 0;
190 180 : for _l in 0..FSM_TREE_DEPTH {
191 135 : pages += leafno + 1;
192 135 : leafno /= pg_constants::SLOTS_PER_FSM_PAGE;
193 135 : }
194 : /* Turn the page count into 0-based block number */
195 45 : pages - 1
196 45 : }
197 :
198 : pub mod waldecoder {
199 :
200 : use crate::{v14, v15};
201 : use bytes::{Buf, Bytes, BytesMut};
202 : use std::num::NonZeroU32;
203 : use thiserror::Error;
204 : use utils::lsn::Lsn;
205 :
206 : pub enum State {
207 : WaitingForRecord,
208 : ReassemblingRecord {
209 : recordbuf: BytesMut,
210 : contlen: NonZeroU32,
211 : },
212 : SkippingEverything {
213 : skip_until_lsn: Lsn,
214 : },
215 : }
216 :
217 : pub struct WalStreamDecoder {
218 : pub lsn: Lsn,
219 : pub pg_version: u32,
220 : pub inputbuf: BytesMut,
221 : pub state: State,
222 : }
223 :
224 7 : #[derive(Error, Debug, Clone)]
225 : #[error("{msg} at {lsn}")]
226 : pub struct WalDecodeError {
227 : pub msg: String,
228 : pub lsn: Lsn,
229 : }
230 :
231 : impl WalStreamDecoder {
232 2597 : pub fn new(lsn: Lsn, pg_version: u32) -> WalStreamDecoder {
233 2597 : WalStreamDecoder {
234 2597 : lsn,
235 2597 : pg_version,
236 2597 : inputbuf: BytesMut::new(),
237 2597 : state: State::WaitingForRecord,
238 2597 : }
239 2597 : }
240 :
241 : // The latest LSN position fed to the decoder.
242 1460894 : pub fn available(&self) -> Lsn {
243 1460894 : self.lsn + self.inputbuf.remaining() as u64
244 1460894 : }
245 :
246 2254791 : pub fn feed_bytes(&mut self, buf: &[u8]) {
247 2254791 : self.inputbuf.extend_from_slice(buf);
248 2254791 : }
249 :
250 182719081 : pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
251 182719081 : match self.pg_version {
252 : // This is a trick to support both versions simultaneously.
253 : // See WalStreamDecoderHandler comments.
254 : 14 => {
255 : use self::v14::waldecoder_handler::WalStreamDecoderHandler;
256 182714917 : self.poll_decode_internal()
257 : }
258 : 15 => {
259 : use self::v15::waldecoder_handler::WalStreamDecoderHandler;
260 4164 : self.poll_decode_internal()
261 : }
262 0 : _ => Err(WalDecodeError {
263 0 : msg: format!("Unknown version {}", self.pg_version),
264 0 : lsn: self.lsn,
265 0 : }),
266 : }
267 182719081 : }
268 : }
269 : }
|