Line data Source code
1 : use std::{ffi::CString, sync::Arc};
2 :
3 : use byteorder::{LittleEndian, WriteBytesExt};
4 : use crc32c::crc32c_append;
5 : use parking_lot::{Mutex, MutexGuard};
6 : use postgres_ffi::{
7 : pg_constants::{
8 : RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE, XLP_LONG_HEADER, XLR_BLOCK_ID_DATA_LONG,
9 : XLR_BLOCK_ID_DATA_SHORT,
10 : },
11 : v16::{
12 : wal_craft_test_export::{XLogLongPageHeaderData, XLogPageHeaderData, XLOG_PAGE_MAGIC},
13 : xlog_utils::{
14 : XLogSegNoOffsetToRecPtr, XlLogicalMessage, XLOG_RECORD_CRC_OFFS,
15 : XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
16 : XLP_FIRST_IS_CONTRECORD,
17 : },
18 : XLogRecord,
19 : },
20 : WAL_SEGMENT_SIZE, XLOG_BLCKSZ,
21 : };
22 : use utils::lsn::Lsn;
23 :
24 : use super::block_storage::BlockStorage;
25 :
26 : /// Simulation implementation of walproposer WAL storage.
27 : pub struct DiskWalProposer {
28 : state: Mutex<State>,
29 : }
30 :
31 : impl DiskWalProposer {
32 74171 : pub fn new() -> Arc<DiskWalProposer> {
33 74171 : Arc::new(DiskWalProposer {
34 74171 : state: Mutex::new(State {
35 74171 : internal_available_lsn: Lsn(0),
36 74171 : prev_lsn: Lsn(0),
37 74171 : disk: BlockStorage::new(),
38 74171 : }),
39 74171 : })
40 74171 : }
41 :
42 70952 : pub fn lock(&self) -> MutexGuard<State> {
43 70952 : self.state.lock()
44 70952 : }
45 : }
46 :
47 : pub struct State {
48 : // flush_lsn
49 : internal_available_lsn: Lsn,
50 : // needed for WAL generation
51 : prev_lsn: Lsn,
52 : // actual WAL storage
53 : disk: BlockStorage,
54 : }
55 :
56 : impl State {
57 24876 : pub fn read(&self, pos: u64, buf: &mut [u8]) {
58 24876 : self.disk.read(pos, buf);
59 24876 : // TODO: fail on reading uninitialized data
60 24876 : }
61 :
62 129295 : pub fn write(&mut self, pos: u64, buf: &[u8]) {
63 129295 : self.disk.write(pos, buf);
64 129295 : }
65 :
66 : /// Update the internal available LSN to the given value.
67 2574 : pub fn reset_to(&mut self, lsn: Lsn) {
68 2574 : self.internal_available_lsn = lsn;
69 2574 : }
70 :
71 : /// Get current LSN.
72 10186 : pub fn flush_rec_ptr(&self) -> Lsn {
73 10186 : self.internal_available_lsn
74 10186 : }
75 :
76 : /// Generate a new WAL record at the current LSN.
77 31875 : pub fn insert_logical_message(&mut self, prefix: &str, msg: &[u8]) -> anyhow::Result<()> {
78 31875 : let prefix_cstr = CString::new(prefix)?;
79 31875 : let prefix_bytes = prefix_cstr.as_bytes_with_nul();
80 31875 :
81 31875 : let lm = XlLogicalMessage {
82 31875 : db_id: 0,
83 31875 : transactional: 0,
84 31875 : prefix_size: prefix_bytes.len() as ::std::os::raw::c_ulong,
85 31875 : message_size: msg.len() as ::std::os::raw::c_ulong,
86 31875 : };
87 31875 :
88 31875 : let record_bytes = lm.encode();
89 31875 : let rdatas: Vec<&[u8]> = vec![&record_bytes, prefix_bytes, msg];
90 31875 : insert_wal_record(self, rdatas, RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE)
91 31875 : }
92 : }
93 :
94 31875 : fn insert_wal_record(
95 31875 : state: &mut State,
96 31875 : rdatas: Vec<&[u8]>,
97 31875 : rmid: u8,
98 31875 : info: u8,
99 31875 : ) -> anyhow::Result<()> {
100 31875 : // bytes right after the header, in the same rdata block
101 31875 : let mut scratch = Vec::new();
102 95625 : let mainrdata_len: usize = rdatas.iter().map(|rdata| rdata.len()).sum();
103 31875 :
104 31875 : if mainrdata_len > 0 {
105 31875 : if mainrdata_len > 255 {
106 0 : scratch.push(XLR_BLOCK_ID_DATA_LONG);
107 0 : // TODO: verify endiness
108 0 : let _ = scratch.write_u32::<LittleEndian>(mainrdata_len as u32);
109 31875 : } else {
110 31875 : scratch.push(XLR_BLOCK_ID_DATA_SHORT);
111 31875 : scratch.push(mainrdata_len as u8);
112 31875 : }
113 0 : }
114 :
115 31875 : let total_len: u32 = (XLOG_SIZE_OF_XLOG_RECORD + scratch.len() + mainrdata_len) as u32;
116 31875 : let size = maxalign(total_len);
117 31875 : assert!(size as usize > XLOG_SIZE_OF_XLOG_RECORD);
118 :
119 31875 : let start_bytepos = recptr_to_bytepos(state.internal_available_lsn);
120 31875 : let end_bytepos = start_bytepos + size as u64;
121 31875 :
122 31875 : let start_recptr = bytepos_to_recptr(start_bytepos);
123 31875 : let end_recptr = bytepos_to_recptr(end_bytepos);
124 31875 :
125 31875 : assert!(recptr_to_bytepos(start_recptr) == start_bytepos);
126 31875 : assert!(recptr_to_bytepos(end_recptr) == end_bytepos);
127 :
128 31875 : let mut crc = crc32c_append(0, &scratch);
129 127500 : for rdata in &rdatas {
130 95625 : crc = crc32c_append(crc, rdata);
131 95625 : }
132 :
133 31875 : let mut header = XLogRecord {
134 31875 : xl_tot_len: total_len,
135 31875 : xl_xid: 0,
136 31875 : xl_prev: state.prev_lsn.0,
137 31875 : xl_info: info,
138 31875 : xl_rmid: rmid,
139 31875 : __bindgen_padding_0: [0u8; 2usize],
140 31875 : xl_crc: crc,
141 31875 : };
142 :
143 : // now we have the header and can finish the crc
144 31875 : let header_bytes = header.encode()?;
145 31875 : let crc = crc32c_append(crc, &header_bytes[0..XLOG_RECORD_CRC_OFFS]);
146 31875 : header.xl_crc = crc;
147 :
148 31875 : let mut header_bytes = header.encode()?.to_vec();
149 31875 : assert!(header_bytes.len() == XLOG_SIZE_OF_XLOG_RECORD);
150 :
151 31875 : header_bytes.extend_from_slice(&scratch);
152 31875 :
153 31875 : // finish rdatas
154 31875 : let mut rdatas = rdatas;
155 31875 : rdatas.insert(0, &header_bytes);
156 31875 :
157 31875 : write_walrecord_to_disk(state, total_len as u64, rdatas, start_recptr, end_recptr)?;
158 :
159 31875 : state.internal_available_lsn = end_recptr;
160 31875 : state.prev_lsn = start_recptr;
161 31875 : Ok(())
162 31875 : }
163 :
164 31875 : fn write_walrecord_to_disk(
165 31875 : state: &mut State,
166 31875 : total_len: u64,
167 31875 : rdatas: Vec<&[u8]>,
168 31875 : start: Lsn,
169 31875 : end: Lsn,
170 31875 : ) -> anyhow::Result<()> {
171 31875 : let mut curr_ptr = start;
172 31875 : let mut freespace = insert_freespace(curr_ptr);
173 31875 : let mut written: usize = 0;
174 31875 :
175 31875 : assert!(freespace >= std::mem::size_of::<u32>());
176 :
177 159375 : for mut rdata in rdatas {
178 127677 : while rdata.len() >= freespace {
179 177 : assert!(
180 177 : curr_ptr.segment_offset(WAL_SEGMENT_SIZE) >= XLOG_SIZE_OF_XLOG_SHORT_PHD
181 0 : || freespace == 0
182 : );
183 :
184 177 : state.write(curr_ptr.0, &rdata[..freespace]);
185 177 : rdata = &rdata[freespace..];
186 177 : written += freespace;
187 177 : curr_ptr = Lsn(curr_ptr.0 + freespace as u64);
188 177 :
189 177 : let mut new_page = XLogPageHeaderData {
190 177 : xlp_magic: XLOG_PAGE_MAGIC as u16,
191 177 : xlp_info: XLP_BKP_REMOVABLE,
192 177 : xlp_tli: 1,
193 177 : xlp_pageaddr: curr_ptr.0,
194 177 : xlp_rem_len: (total_len - written as u64) as u32,
195 177 : ..Default::default() // Put 0 in padding fields.
196 177 : };
197 177 : if new_page.xlp_rem_len > 0 {
198 159 : new_page.xlp_info |= XLP_FIRST_IS_CONTRECORD;
199 159 : }
200 :
201 177 : if curr_ptr.segment_offset(WAL_SEGMENT_SIZE) == 0 {
202 0 : new_page.xlp_info |= XLP_LONG_HEADER;
203 0 : let long_page = XLogLongPageHeaderData {
204 0 : std: new_page,
205 0 : xlp_sysid: 0,
206 0 : xlp_seg_size: WAL_SEGMENT_SIZE as u32,
207 0 : xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
208 0 : };
209 0 : let header_bytes = long_page.encode()?;
210 0 : assert!(header_bytes.len() == XLOG_SIZE_OF_XLOG_LONG_PHD);
211 0 : state.write(curr_ptr.0, &header_bytes);
212 0 : curr_ptr = Lsn(curr_ptr.0 + header_bytes.len() as u64);
213 : } else {
214 177 : let header_bytes = new_page.encode()?;
215 177 : assert!(header_bytes.len() == XLOG_SIZE_OF_XLOG_SHORT_PHD);
216 177 : state.write(curr_ptr.0, &header_bytes);
217 177 : curr_ptr = Lsn(curr_ptr.0 + header_bytes.len() as u64);
218 : }
219 177 : freespace = insert_freespace(curr_ptr);
220 : }
221 :
222 127500 : assert!(
223 127500 : curr_ptr.segment_offset(WAL_SEGMENT_SIZE) >= XLOG_SIZE_OF_XLOG_SHORT_PHD
224 0 : || rdata.is_empty()
225 : );
226 127500 : state.write(curr_ptr.0, rdata);
227 127500 : curr_ptr = Lsn(curr_ptr.0 + rdata.len() as u64);
228 127500 : written += rdata.len();
229 127500 : freespace -= rdata.len();
230 : }
231 :
232 31875 : assert!(written == total_len as usize);
233 31875 : curr_ptr.0 = maxalign(curr_ptr.0);
234 31875 : assert!(curr_ptr == end);
235 31875 : Ok(())
236 31875 : }
237 :
238 63750 : fn maxalign<T>(size: T) -> T
239 63750 : where
240 63750 : T: std::ops::BitAnd<Output = T>
241 63750 : + std::ops::Add<Output = T>
242 63750 : + std::ops::Not<Output = T>
243 63750 : + From<u8>,
244 63750 : {
245 63750 : (size + T::from(7)) & !T::from(7)
246 63750 : }
247 :
248 32052 : fn insert_freespace(ptr: Lsn) -> usize {
249 32052 : if ptr.block_offset() == 0 {
250 0 : 0
251 : } else {
252 32052 : (XLOG_BLCKSZ as u64 - ptr.block_offset()) as usize
253 : }
254 32052 : }
255 :
256 : const XLP_BKP_REMOVABLE: u16 = 0x0004;
257 : const USABLE_BYTES_IN_PAGE: u64 = (XLOG_BLCKSZ - XLOG_SIZE_OF_XLOG_SHORT_PHD) as u64;
258 : const USABLE_BYTES_IN_SEGMENT: u64 = ((WAL_SEGMENT_SIZE / XLOG_BLCKSZ) as u64
259 : * USABLE_BYTES_IN_PAGE)
260 : - (XLOG_SIZE_OF_XLOG_RECORD - XLOG_SIZE_OF_XLOG_SHORT_PHD) as u64;
261 :
262 63750 : fn bytepos_to_recptr(bytepos: u64) -> Lsn {
263 63750 : let fullsegs = bytepos / USABLE_BYTES_IN_SEGMENT;
264 63750 : let mut bytesleft = bytepos % USABLE_BYTES_IN_SEGMENT;
265 :
266 63750 : let seg_offset = if bytesleft < (XLOG_BLCKSZ - XLOG_SIZE_OF_XLOG_SHORT_PHD) as u64 {
267 : // fits on first page of segment
268 0 : bytesleft + XLOG_SIZE_OF_XLOG_SHORT_PHD as u64
269 : } else {
270 : // account for the first page on segment with long header
271 63750 : bytesleft -= (XLOG_BLCKSZ - XLOG_SIZE_OF_XLOG_SHORT_PHD) as u64;
272 63750 : let fullpages = bytesleft / USABLE_BYTES_IN_PAGE;
273 63750 : bytesleft %= USABLE_BYTES_IN_PAGE;
274 63750 :
275 63750 : XLOG_BLCKSZ as u64
276 63750 : + fullpages * XLOG_BLCKSZ as u64
277 63750 : + bytesleft
278 63750 : + XLOG_SIZE_OF_XLOG_SHORT_PHD as u64
279 : };
280 :
281 63750 : Lsn(XLogSegNoOffsetToRecPtr(
282 63750 : fullsegs,
283 63750 : seg_offset as u32,
284 63750 : WAL_SEGMENT_SIZE,
285 63750 : ))
286 63750 : }
287 :
288 95625 : fn recptr_to_bytepos(ptr: Lsn) -> u64 {
289 95625 : let fullsegs = ptr.segment_number(WAL_SEGMENT_SIZE);
290 95625 : let offset = ptr.segment_offset(WAL_SEGMENT_SIZE) as u64;
291 95625 :
292 95625 : let fullpages = offset / XLOG_BLCKSZ as u64;
293 95625 : let offset = offset % XLOG_BLCKSZ as u64;
294 95625 :
295 95625 : if fullpages == 0 {
296 0 : fullsegs * USABLE_BYTES_IN_SEGMENT
297 0 : + if offset > 0 {
298 0 : assert!(offset >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);
299 0 : offset - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64
300 : } else {
301 0 : 0
302 : }
303 : } else {
304 95625 : fullsegs * USABLE_BYTES_IN_SEGMENT
305 95625 : + (XLOG_BLCKSZ - XLOG_SIZE_OF_XLOG_SHORT_PHD) as u64
306 95625 : + (fullpages - 1) * USABLE_BYTES_IN_PAGE
307 95625 : + if offset > 0 {
308 95625 : assert!(offset >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);
309 95625 : offset - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64
310 : } else {
311 0 : 0
312 : }
313 : }
314 95625 : }
|