Line data Source code
1 : //
2 : // This file contains common utilities for dealing with PostgreSQL WAL files and
3 : // LSNs.
4 : //
5 : // Many of these functions have been copied from PostgreSQL, and rewritten in
6 : // Rust. That's why they don't follow the usual Rust naming conventions, they
7 : // have been named the same as the corresponding PostgreSQL functions instead.
8 : //
9 :
10 : use super::super::waldecoder::WalStreamDecoder;
11 : use super::bindings::{
12 : CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, FullTransactionId, TimeLineID, TimestampTz,
13 : XLogLongPageHeaderData, XLogPageHeaderData, XLogRecPtr, XLogRecord, XLogSegNo, XLOG_PAGE_MAGIC,
14 : };
15 : use super::wal_generator::LogicalMessageGenerator;
16 : use super::PG_MAJORVERSION;
17 : use crate::pg_constants;
18 : use crate::PG_TLI;
19 : use crate::{uint32, uint64, Oid};
20 : use crate::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
21 :
22 : use bytes::BytesMut;
23 : use bytes::{Buf, Bytes};
24 :
25 : use log::*;
26 :
27 : use serde::Serialize;
28 : use std::ffi::{CString, OsStr};
29 : use std::fs::File;
30 : use std::io::prelude::*;
31 : use std::io::ErrorKind;
32 : use std::io::SeekFrom;
33 : use std::path::Path;
34 : use std::time::SystemTime;
35 : use utils::bin_ser::DeserializeError;
36 : use utils::bin_ser::SerializeError;
37 :
38 : use utils::lsn::Lsn;
39 :
40 : pub const XLOG_FNAME_LEN: usize = 24;
41 : pub const XLP_BKP_REMOVABLE: u16 = 0x0004;
42 : pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
43 : pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
44 : pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
45 :
46 : pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = size_of::<XLogPageHeaderData>();
47 : pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = size_of::<XLogLongPageHeaderData>();
48 : pub const XLOG_SIZE_OF_XLOG_RECORD: usize = size_of::<XLogRecord>();
49 : #[allow(clippy::identity_op)]
50 : pub const SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT: usize = 1 * 2;
51 :
52 : /// Interval of checkpointing metadata file. We should store metadata file to enforce
53 : /// predicate that checkpoint.nextXid is larger than any XID in WAL.
54 : /// But flushing checkpoint file for each transaction seems to be too expensive,
55 : /// so XID_CHECKPOINT_INTERVAL is used to forward align nextXid and so perform
56 : /// metadata checkpoint only once per XID_CHECKPOINT_INTERVAL transactions.
57 : /// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
58 : /// in order to let CLOG_TRUNCATE mechanism correctly extend CLOG.
59 : const XID_CHECKPOINT_INTERVAL: u32 = 1024;
60 :
61 185 : pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
62 185 : (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo
63 185 : }
64 :
65 59 : pub fn XLogSegNoOffsetToRecPtr(
66 59 : segno: XLogSegNo,
67 59 : offset: u32,
68 59 : wal_segsz_bytes: usize,
69 59 : ) -> XLogRecPtr {
70 59 : segno * (wal_segsz_bytes as u64) + (offset as u64)
71 59 : }
72 :
73 63 : pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
74 63 : format!(
75 63 : "{:>08X}{:>08X}{:>08X}",
76 63 : tli,
77 63 : logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
78 63 : logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
79 63 : )
80 63 : }
81 :
82 59 : pub fn XLogFromFileName(
83 59 : fname: &OsStr,
84 59 : wal_seg_size: usize,
85 59 : ) -> anyhow::Result<(XLogSegNo, TimeLineID)> {
86 59 : if let Some(fname_str) = fname.to_str() {
87 59 : let tli = u32::from_str_radix(&fname_str[0..8], 16)?;
88 59 : let log = u32::from_str_radix(&fname_str[8..16], 16)? as XLogSegNo;
89 59 : let seg = u32::from_str_radix(&fname_str[16..24], 16)? as XLogSegNo;
90 59 : Ok((log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli))
91 : } else {
92 0 : anyhow::bail!("non-ut8 filename: {:?}", fname);
93 : }
94 59 : }
95 :
96 140 : pub fn IsXLogFileName(fname: &OsStr) -> bool {
97 140 : if let Some(fname) = fname.to_str() {
98 1896 : fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit())
99 : } else {
100 0 : false
101 : }
102 140 : }
103 :
104 6 : pub fn IsPartialXLogFileName(fname: &OsStr) -> bool {
105 6 : if let Some(fname) = fname.to_str() {
106 6 : fname.ends_with(".partial") && IsXLogFileName(OsStr::new(&fname[0..fname.len() - 8]))
107 : } else {
108 0 : false
109 : }
110 6 : }
111 :
112 : /// If LSN points to the beginning of the page, then shift it to first record,
113 : /// otherwise align on 8-bytes boundary (required for WAL records)
114 28 : pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
115 28 : if lsn.0 % XLOG_BLCKSZ as u64 == 0 {
116 0 : let hdr_size = if lsn.0 % seg_sz as u64 == 0 {
117 0 : XLOG_SIZE_OF_XLOG_LONG_PHD
118 : } else {
119 0 : XLOG_SIZE_OF_XLOG_SHORT_PHD
120 : };
121 0 : lsn + hdr_size as u64
122 : } else {
123 28 : lsn.align()
124 : }
125 28 : }
126 :
127 : /// Generate a pg_control file, for a basebackup for starting up Postgres at the given LSN
128 : ///
129 : /// 'pg_control_bytes' and 'checkpoint_bytes' are the contents of those keys persisted in
130 : /// the pageserver. They use the same format as the PostgreSQL control file and the
131 : /// checkpoint record, but see walingest.rs for how exactly they are kept up to date.
132 : /// 'lsn' is the LSN at which we're starting up.
133 : ///
134 : /// Returns:
135 : /// - pg_control file contents
136 : /// - system_identifier, extracted from the persisted information
137 : /// - true, if we're starting up from a "clean shutdown", i.e. if there was a shutdown
138 : /// checkpoint at the given LSN
139 0 : pub fn generate_pg_control(
140 0 : pg_control_bytes: &[u8],
141 0 : checkpoint_bytes: &[u8],
142 0 : lsn: Lsn,
143 0 : ) -> anyhow::Result<(Bytes, u64, bool)> {
144 0 : let mut pg_control = ControlFileData::decode(pg_control_bytes)?;
145 0 : let mut checkpoint = CheckPoint::decode(checkpoint_bytes)?;
146 :
147 : // Generate new pg_control needed for bootstrap
148 : //
149 : // NB: In the checkpoint struct that we persist in the pageserver, we have a different
150 : // convention for the 'redo' field than in PostgreSQL: On a shutdown checkpoint,
151 : // 'redo' points the *end* of the checkpoint WAL record. On PostgreSQL, it points to
152 : // the beginning. Furthermore, on an online checkpoint, 'redo' is set to 0.
153 : //
154 : // We didn't always have this convention however, and old persisted records will have
155 : // old REDO values that point to some old LSN.
156 : //
157 : // The upshot is that if 'redo' is equal to the "current" LSN, there was a shutdown
158 : // checkpoint record at that point in WAL, with no new WAL records after it. That case
159 : // can be treated as starting from a clean shutdown. All other cases are treated as
160 : // non-clean shutdown. In Neon, we don't do WAL replay at startup in either case, so
161 : // that distinction doesn't matter very much. As of this writing, it only affects
162 : // whether the persisted pg_stats information can be used or not.
163 : //
164 : // In the Checkpoint struct in the returned pg_control file, the redo pointer is
165 : // always set to the LSN we're starting at, to hint that no WAL replay is required.
166 : // (There's some neon-specific code in Postgres startup to make that work, though.
167 : // Just setting the redo pointer is not sufficient.)
168 0 : let was_shutdown = Lsn(checkpoint.redo) == lsn;
169 0 : checkpoint.redo = normalize_lsn(lsn, WAL_SEGMENT_SIZE).0;
170 0 :
171 0 : // We use DBState_DB_SHUTDOWNED even if it was not a clean shutdown. The
172 0 : // neon-specific code at postgres startup ignores the state stored in the control
173 0 : // file, similar to archive recovery in standalone PostgreSQL. Similarly, the
174 0 : // checkPoint pointer is ignored, so just set it to 0.
175 0 : pg_control.checkPoint = 0;
176 0 : pg_control.checkPointCopy = checkpoint;
177 0 : pg_control.state = DBState_DB_SHUTDOWNED;
178 0 :
179 0 : Ok((pg_control.encode(), pg_control.system_identifier, was_shutdown))
180 0 : }
181 :
182 4 : pub fn get_current_timestamp() -> TimestampTz {
183 4 : to_pg_timestamp(SystemTime::now())
184 4 : }
185 :
186 : // Module to reduce the scope of the constants
187 : mod timestamp_conversions {
188 : use std::time::Duration;
189 :
190 : use anyhow::Context;
191 :
192 : use super::*;
193 :
194 : const UNIX_EPOCH_JDATE: u64 = 2440588; // == date2j(1970, 1, 1)
195 : const POSTGRES_EPOCH_JDATE: u64 = 2451545; // == date2j(2000, 1, 1)
196 : const SECS_PER_DAY: u64 = 86400;
197 : const USECS_PER_SEC: u64 = 1000000;
198 : const SECS_DIFF_UNIX_TO_POSTGRES_EPOCH: u64 =
199 : (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY;
200 :
201 12 : pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
202 12 : match time.duration_since(SystemTime::UNIX_EPOCH) {
203 12 : Ok(n) => {
204 12 : ((n.as_secs() - SECS_DIFF_UNIX_TO_POSTGRES_EPOCH) * USECS_PER_SEC
205 12 : + n.subsec_micros() as u64) as i64
206 : }
207 0 : Err(_) => panic!("SystemTime before UNIX EPOCH!"),
208 : }
209 12 : }
210 :
211 24 : pub fn try_from_pg_timestamp(time: TimestampTz) -> anyhow::Result<SystemTime> {
212 24 : let time: u64 = time
213 24 : .try_into()
214 24 : .context("timestamp before millenium (postgres epoch)")?;
215 24 : let since_unix_epoch = time + SECS_DIFF_UNIX_TO_POSTGRES_EPOCH * USECS_PER_SEC;
216 24 : SystemTime::UNIX_EPOCH
217 24 : .checked_add(Duration::from_micros(since_unix_epoch))
218 24 : .context("SystemTime overflow")
219 24 : }
220 : }
221 :
222 : pub use timestamp_conversions::{to_pg_timestamp, try_from_pg_timestamp};
223 :
224 : // Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
225 : // start_lsn must point to some previously known record boundary (beginning of
226 : // the next record). If no valid record after is found, start_lsn is returned
227 : // back.
228 32 : pub fn find_end_of_wal(
229 32 : data_dir: &Path,
230 32 : wal_seg_size: usize,
231 32 : start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.
232 32 : ) -> anyhow::Result<Lsn> {
233 32 : let mut result = start_lsn;
234 32 : let mut curr_lsn = start_lsn;
235 32 : let mut buf = [0u8; XLOG_BLCKSZ];
236 32 : let pg_version = PG_MAJORVERSION[1..3].parse::<u32>().unwrap();
237 32 : debug!("find_end_of_wal PG_VERSION: {}", pg_version);
238 :
239 32 : let mut decoder = WalStreamDecoder::new(start_lsn, pg_version);
240 :
241 : // loop over segments
242 : loop {
243 40 : let segno = curr_lsn.segment_number(wal_seg_size);
244 40 : let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);
245 40 : let seg_file_path = data_dir.join(seg_file_name);
246 40 : match open_wal_segment(&seg_file_path)? {
247 : None => {
248 : // no more segments
249 0 : debug!(
250 0 : "find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
251 : result, seg_file_path
252 : );
253 0 : return Ok(result);
254 : }
255 40 : Some(mut segment) => {
256 40 : let seg_offs = curr_lsn.segment_offset(wal_seg_size);
257 40 : segment.seek(SeekFrom::Start(seg_offs as u64))?;
258 : // loop inside segment
259 16504 : while curr_lsn.segment_number(wal_seg_size) == segno {
260 16496 : let bytes_read = segment.read(&mut buf)?;
261 16496 : if bytes_read == 0 {
262 0 : debug!(
263 0 : "find_end_of_wal reached end at {:?}, EOF in segment {:?} at offset {}",
264 0 : result,
265 0 : seg_file_path,
266 0 : curr_lsn.segment_offset(wal_seg_size)
267 : );
268 0 : return Ok(result);
269 16496 : }
270 16496 : curr_lsn += bytes_read as u64;
271 16496 : decoder.feed_bytes(&buf[0..bytes_read]);
272 :
273 : // advance result past all completely read records
274 : loop {
275 16664 : match decoder.poll_decode() {
276 168 : Ok(Some(record)) => result = record.0,
277 32 : Err(e) => {
278 32 : debug!(
279 32 : "find_end_of_wal reached end at {:?}, decode error: {:?}",
280 : result, e
281 : );
282 32 : return Ok(result);
283 : }
284 16464 : Ok(None) => break, // need more data
285 : }
286 : }
287 : }
288 : }
289 : }
290 : }
291 32 : }
292 :
293 : // Open .partial or full WAL segment file, if present.
294 40 : fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
295 40 : let mut partial_path = seg_file_path.to_owned();
296 40 : partial_path.set_extension("partial");
297 40 : match File::open(partial_path) {
298 32 : Ok(file) => Ok(Some(file)),
299 8 : Err(e) => match e.kind() {
300 : ErrorKind::NotFound => {
301 : // .partial not found, try full
302 8 : match File::open(seg_file_path) {
303 8 : Ok(file) => Ok(Some(file)),
304 0 : Err(e) => match e.kind() {
305 0 : ErrorKind::NotFound => Ok(None),
306 0 : _ => Err(e.into()),
307 : },
308 : }
309 : }
310 0 : _ => Err(e.into()),
311 : },
312 : }
313 40 : }
314 :
315 : impl XLogRecord {
316 316737 : pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {
317 : use utils::bin_ser::LeSer;
318 316737 : XLogRecord::des(buf)
319 316737 : }
320 :
321 292300 : pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogRecord, DeserializeError> {
322 : use utils::bin_ser::LeSer;
323 292300 : XLogRecord::des_from(&mut buf.reader())
324 292300 : }
325 :
326 23854 : pub fn encode(&self) -> Result<Bytes, SerializeError> {
327 : use utils::bin_ser::LeSer;
328 23854 : Ok(self.ser()?.into())
329 23854 : }
330 :
331 : // Is this record an XLOG_SWITCH record? They need some special processing,
332 316737 : pub fn is_xlog_switch_record(&self) -> bool {
333 316737 : self.xl_info == pg_constants::XLOG_SWITCH && self.xl_rmid == pg_constants::RM_XLOG_ID
334 316737 : }
335 : }
336 :
337 : impl XLogPageHeaderData {
338 23616 : pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogPageHeaderData, DeserializeError> {
339 : use utils::bin_ser::LeSer;
340 23616 : XLogPageHeaderData::des_from(&mut buf.reader())
341 23616 : }
342 :
343 689 : pub fn encode(&self) -> Result<Bytes, SerializeError> {
344 : use utils::bin_ser::LeSer;
345 689 : self.ser().map(|b| b.into())
346 689 : }
347 : }
348 :
349 : impl XLogLongPageHeaderData {
350 8 : pub fn from_bytes<B: Buf>(buf: &mut B) -> Result<XLogLongPageHeaderData, DeserializeError> {
351 : use utils::bin_ser::LeSer;
352 8 : XLogLongPageHeaderData::des_from(&mut buf.reader())
353 8 : }
354 :
355 3 : pub fn encode(&self) -> Result<Bytes, SerializeError> {
356 : use utils::bin_ser::LeSer;
357 3 : self.ser().map(|b| b.into())
358 3 : }
359 : }
360 :
361 : pub const SIZEOF_CHECKPOINT: usize = size_of::<CheckPoint>();
362 :
363 : impl CheckPoint {
364 16 : pub fn encode(&self) -> Result<Bytes, SerializeError> {
365 : use utils::bin_ser::LeSer;
366 16 : Ok(self.ser()?.into())
367 16 : }
368 :
369 48 : pub fn decode(buf: &[u8]) -> Result<CheckPoint, DeserializeError> {
370 : use utils::bin_ser::LeSer;
371 48 : CheckPoint::des(buf)
372 48 : }
373 :
374 : /// Update next XID based on provided new_xid and stored epoch.
375 : /// Next XID should be greater than new_xid. This handles 32-bit
376 : /// XID wraparound correctly.
377 : ///
378 : /// Returns 'true' if the XID was updated.
379 291684 : pub fn update_next_xid(&mut self, xid: u32) -> bool {
380 291684 : // nextXid should be greater than any XID in WAL, so increment provided XID and check for wraparround.
381 291684 : let mut new_xid = std::cmp::max(
382 291684 : xid.wrapping_add(1),
383 291684 : pg_constants::FIRST_NORMAL_TRANSACTION_ID,
384 291684 : );
385 291684 : // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
386 291684 : // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
387 291684 : new_xid =
388 291684 : new_xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
389 291684 : let full_xid = self.nextXid.value;
390 291684 : let old_xid = full_xid as u32;
391 291684 : if new_xid.wrapping_sub(old_xid) as i32 > 0 {
392 12 : let mut epoch = full_xid >> 32;
393 12 : if new_xid < old_xid {
394 0 : // wrap-around
395 0 : epoch += 1;
396 12 : }
397 12 : let nextXid = (epoch << 32) | new_xid as u64;
398 12 :
399 12 : if nextXid != self.nextXid.value {
400 12 : self.nextXid = FullTransactionId { value: nextXid };
401 12 : return true;
402 0 : }
403 291672 : }
404 291672 : false
405 291684 : }
406 :
407 : /// Advance next multi-XID/offset to those given in arguments.
408 : ///
409 : /// It's important that this handles wraparound correctly. This should match the
410 : /// MultiXactAdvanceNextMXact() logic in PostgreSQL's xlog_redo() function.
411 : ///
412 : /// Returns 'true' if the Checkpoint was updated.
413 24 : pub fn update_next_multixid(&mut self, multi_xid: u32, multi_offset: u32) -> bool {
414 24 : let mut modified = false;
415 24 :
416 24 : if multi_xid.wrapping_sub(self.nextMulti) as i32 > 0 {
417 16 : self.nextMulti = multi_xid;
418 16 : modified = true;
419 16 : }
420 :
421 24 : if multi_offset.wrapping_sub(self.nextMultiOffset) as i32 > 0 {
422 20 : self.nextMultiOffset = multi_offset;
423 20 : modified = true;
424 20 : }
425 :
426 24 : modified
427 24 : }
428 : }
429 :
430 : /// Generate new, empty WAL segment, with correct block headers at the first
431 : /// page of the segment and the page that contains the given LSN.
432 : /// We need this segment to start compute node.
433 3 : pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Bytes, SerializeError> {
434 3 : let mut seg_buf = BytesMut::with_capacity(WAL_SEGMENT_SIZE);
435 3 :
436 3 : let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
437 3 :
438 3 : let page_off = lsn.block_offset();
439 3 : let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE);
440 3 :
441 3 : let first_page_only = seg_off < XLOG_BLCKSZ;
442 : // If first records starts in the middle of the page, pretend in page header
443 : // there is a fake record which ends where first real record starts. This
444 : // makes pg_waldump etc happy.
445 3 : let (shdr_rem_len, infoflags) = if first_page_only && seg_off > 0 {
446 0 : assert!(seg_off >= XLOG_SIZE_OF_XLOG_LONG_PHD);
447 : // xlp_rem_len doesn't include page header, hence the subtraction.
448 0 : (
449 0 : seg_off - XLOG_SIZE_OF_XLOG_LONG_PHD,
450 0 : pg_constants::XLP_FIRST_IS_CONTRECORD,
451 0 : )
452 : } else {
453 3 : (0, 0)
454 : };
455 :
456 3 : let hdr = XLogLongPageHeaderData {
457 3 : std: {
458 3 : XLogPageHeaderData {
459 3 : xlp_magic: XLOG_PAGE_MAGIC as u16,
460 3 : xlp_info: pg_constants::XLP_LONG_HEADER | infoflags,
461 3 : xlp_tli: PG_TLI,
462 3 : xlp_pageaddr: pageaddr,
463 3 : xlp_rem_len: shdr_rem_len as u32,
464 3 : ..Default::default() // Put 0 in padding fields.
465 3 : }
466 3 : },
467 3 : xlp_sysid: system_id,
468 3 : xlp_seg_size: WAL_SEGMENT_SIZE as u32,
469 3 : xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
470 3 : };
471 :
472 3 : let hdr_bytes = hdr.encode()?;
473 3 : seg_buf.extend_from_slice(&hdr_bytes);
474 3 :
475 3 : //zero out the rest of the file
476 3 : seg_buf.resize(WAL_SEGMENT_SIZE, 0);
477 3 :
478 3 : if !first_page_only {
479 3 : let block_offset = lsn.page_offset_in_segment(WAL_SEGMENT_SIZE) as usize;
480 : // see comments above about XLP_FIRST_IS_CONTRECORD and xlp_rem_len.
481 3 : let (xlp_rem_len, xlp_info) = if page_off > 0 {
482 3 : assert!(page_off >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);
483 3 : (
484 3 : (page_off - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64) as u32,
485 3 : pg_constants::XLP_FIRST_IS_CONTRECORD,
486 3 : )
487 : } else {
488 0 : (0, 0)
489 : };
490 3 : let header = XLogPageHeaderData {
491 3 : xlp_magic: XLOG_PAGE_MAGIC as u16,
492 3 : xlp_info,
493 3 : xlp_tli: PG_TLI,
494 3 : xlp_pageaddr: lsn.page_lsn().0,
495 3 : xlp_rem_len,
496 3 : ..Default::default() // Put 0 in padding fields.
497 3 : };
498 3 : let hdr_bytes = header.encode()?;
499 :
500 3 : debug_assert!(seg_buf.len() > block_offset + hdr_bytes.len());
501 3 : debug_assert_ne!(block_offset, 0);
502 :
503 3 : seg_buf[block_offset..block_offset + hdr_bytes.len()].copy_from_slice(&hdr_bytes[..]);
504 0 : }
505 :
506 3 : Ok(seg_buf.freeze())
507 3 : }
508 :
509 : #[repr(C)]
510 : #[derive(Serialize)]
511 : pub struct XlLogicalMessage {
512 : pub db_id: Oid,
513 : pub transactional: uint32, // bool, takes 4 bytes due to alignment in C structures
514 : pub prefix_size: uint64,
515 : pub message_size: uint64,
516 : }
517 :
518 : impl XlLogicalMessage {
519 11927 : pub fn encode(&self) -> Bytes {
520 : use utils::bin_ser::LeSer;
521 11927 : self.ser().unwrap().into()
522 11927 : }
523 : }
524 :
525 : /// Create new WAL record for non-transactional logical message.
526 : /// Used for creating artificial WAL for tests, as LogicalMessage
527 : /// record is basically no-op.
528 4 : pub fn encode_logical_message(prefix: &str, message: &str) -> Bytes {
529 4 : // This function can take untrusted input, so discard any NUL bytes in the prefix string.
530 4 : let prefix = CString::new(prefix.replace('\0', "")).expect("no NULs");
531 4 : let message = message.as_bytes();
532 4 : LogicalMessageGenerator::new(&prefix, message)
533 4 : .next()
534 4 : .unwrap()
535 4 : .encode(Lsn(0))
536 4 : }
537 :
538 : #[cfg(test)]
539 : mod tests {
540 : use super::*;
541 :
542 : #[test]
543 4 : fn test_ts_conversion() {
544 4 : let now = SystemTime::now();
545 4 : let round_trip = try_from_pg_timestamp(to_pg_timestamp(now)).unwrap();
546 4 :
547 4 : let now_since = now.duration_since(SystemTime::UNIX_EPOCH).unwrap();
548 4 : let round_trip_since = round_trip.duration_since(SystemTime::UNIX_EPOCH).unwrap();
549 4 : assert_eq!(now_since.as_micros(), round_trip_since.as_micros());
550 :
551 4 : let now_pg = get_current_timestamp();
552 4 : let round_trip_pg = to_pg_timestamp(try_from_pg_timestamp(now_pg).unwrap());
553 4 :
554 4 : assert_eq!(now_pg, round_trip_pg);
555 4 : }
556 :
557 : // If you need to craft WAL and write tests for this module, put it at wal_craft crate.
558 : }
|