TLA Line data Source code
1 : //! Every image of a certain timeline from [`crate::tenant::Tenant`]
2 : //! has a metadata that needs to be stored persistently.
3 : //!
4 : //! Later, the file gets used in [`remote_timeline_client`] as a part of
5 : //! external storage import and export operations.
6 : //!
7 : //! The module contains all structs and related helper methods related to timeline metadata.
8 : //!
9 : //! [`remote_timeline_client`]: super::remote_timeline_client
10 :
11 : use std::io::{self};
12 :
13 : use anyhow::{ensure, Context};
14 : use serde::{de::Error, Deserialize, Serialize, Serializer};
15 : use thiserror::Error;
16 : use utils::bin_ser::SerializeError;
17 : use utils::crashsafe::path_with_suffix_extension;
18 : use utils::{
19 : bin_ser::BeSer,
20 : id::{TenantId, TimelineId},
21 : lsn::Lsn,
22 : };
23 :
24 : use crate::config::PageServerConf;
25 : use crate::virtual_file::VirtualFile;
26 : use crate::TEMP_FILE_SUFFIX;
27 :
28 : /// Use special format number to enable backward compatibility.
29 : const METADATA_FORMAT_VERSION: u16 = 4;
30 :
31 : /// Previous supported format versions.
32 : const METADATA_OLD_FORMAT_VERSION: u16 = 3;
33 :
34 : /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic.
35 : ///
36 : /// This is the same assumption that PostgreSQL makes with the control file,
37 : /// see PG_CONTROL_MAX_SAFE_SIZE
38 : const METADATA_MAX_SIZE: usize = 512;
39 :
40 : /// Metadata stored on disk for each timeline
41 : ///
42 : /// The fields correspond to the values we hold in memory, in Timeline.
43 CBC 14125 : #[derive(Debug, Clone, PartialEq, Eq)]
44 : pub struct TimelineMetadata {
45 : hdr: TimelineMetadataHeader,
46 : body: TimelineMetadataBodyV2,
47 : }
48 :
49 27258 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50 : struct TimelineMetadataHeader {
51 : checksum: u32, // CRC of serialized metadata body
52 : size: u16, // size of serialized metadata
53 : format_version: u16, // metadata format version (used for compatibility checks)
54 : }
55 : const METADATA_HDR_SIZE: usize = std::mem::size_of::<TimelineMetadataHeader>();
56 :
57 27256 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58 : struct TimelineMetadataBodyV2 {
59 : disk_consistent_lsn: Lsn,
60 : // This is only set if we know it. We track it in memory when the page
61 : // server is running, but we only track the value corresponding to
62 : // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
63 : // lot. We only store it in the metadata file when we flush *all* the
64 : // in-memory data so that 'last_record_lsn' is the same as
65 : // 'disk_consistent_lsn'. That's OK, because after page server restart, as
66 : // soon as we reprocess at least one record, we will have a valid
67 : // 'prev_record_lsn' value in memory again. This is only really needed when
68 : // doing a clean shutdown, so that there is no more WAL beyond
69 : // 'disk_consistent_lsn'
70 : prev_record_lsn: Option<Lsn>,
71 : ancestor_timeline: Option<TimelineId>,
72 : ancestor_lsn: Lsn,
73 : latest_gc_cutoff_lsn: Lsn,
74 : initdb_lsn: Lsn,
75 : pg_version: u32,
76 : }
77 :
78 2 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79 : struct TimelineMetadataBodyV1 {
80 : disk_consistent_lsn: Lsn,
81 : // This is only set if we know it. We track it in memory when the page
82 : // server is running, but we only track the value corresponding to
83 : // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
84 : // lot. We only store it in the metadata file when we flush *all* the
85 : // in-memory data so that 'last_record_lsn' is the same as
86 : // 'disk_consistent_lsn'. That's OK, because after page server restart, as
87 : // soon as we reprocess at least one record, we will have a valid
88 : // 'prev_record_lsn' value in memory again. This is only really needed when
89 : // doing a clean shutdown, so that there is no more WAL beyond
90 : // 'disk_consistent_lsn'
91 : prev_record_lsn: Option<Lsn>,
92 : ancestor_timeline: Option<TimelineId>,
93 : ancestor_lsn: Lsn,
94 : latest_gc_cutoff_lsn: Lsn,
95 : initdb_lsn: Lsn,
96 : }
97 :
98 : impl TimelineMetadata {
99 6344 : pub fn new(
100 6344 : disk_consistent_lsn: Lsn,
101 6344 : prev_record_lsn: Option<Lsn>,
102 6344 : ancestor_timeline: Option<TimelineId>,
103 6344 : ancestor_lsn: Lsn,
104 6344 : latest_gc_cutoff_lsn: Lsn,
105 6344 : initdb_lsn: Lsn,
106 6344 : pg_version: u32,
107 6344 : ) -> Self {
108 6344 : Self {
109 6344 : hdr: TimelineMetadataHeader {
110 6344 : checksum: 0,
111 6344 : size: 0,
112 6344 : format_version: METADATA_FORMAT_VERSION,
113 6344 : },
114 6344 : body: TimelineMetadataBodyV2 {
115 6344 : disk_consistent_lsn,
116 6344 : prev_record_lsn,
117 6344 : ancestor_timeline,
118 6344 : ancestor_lsn,
119 6344 : latest_gc_cutoff_lsn,
120 6344 : initdb_lsn,
121 6344 : pg_version,
122 6344 : },
123 6344 : }
124 6344 : }
125 :
126 1 : fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
127 1 : let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
128 :
129 : // backward compatible only up to this version
130 1 : ensure!(
131 1 : hdr.format_version == METADATA_OLD_FORMAT_VERSION,
132 UBC 0 : "unsupported metadata format version {}",
133 : hdr.format_version
134 : );
135 :
136 CBC 1 : let metadata_size = hdr.size as usize;
137 :
138 1 : let body: TimelineMetadataBodyV1 =
139 1 : TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
140 :
141 1 : let body = TimelineMetadataBodyV2 {
142 1 : disk_consistent_lsn: body.disk_consistent_lsn,
143 1 : prev_record_lsn: body.prev_record_lsn,
144 1 : ancestor_timeline: body.ancestor_timeline,
145 1 : ancestor_lsn: body.ancestor_lsn,
146 1 : latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn,
147 1 : initdb_lsn: body.initdb_lsn,
148 1 : pg_version: 14, // All timelines created before this version had pg_version 14
149 1 : };
150 1 :
151 1 : hdr.format_version = METADATA_FORMAT_VERSION;
152 1 :
153 1 : Ok(Self { hdr, body })
154 1 : }
155 :
156 645 : pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
157 645 : ensure!(
158 645 : metadata_bytes.len() == METADATA_MAX_SIZE,
159 1 : "metadata bytes size is wrong"
160 : );
161 644 : let hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
162 :
163 644 : let metadata_size = hdr.size as usize;
164 644 : ensure!(
165 644 : metadata_size <= METADATA_MAX_SIZE,
166 UBC 0 : "corrupted metadata file"
167 : );
168 CBC 644 : let calculated_checksum = crc32c::crc32c(&metadata_bytes[METADATA_HDR_SIZE..metadata_size]);
169 644 : ensure!(
170 644 : hdr.checksum == calculated_checksum,
171 1 : "metadata checksum mismatch"
172 : );
173 :
174 643 : if hdr.format_version != METADATA_FORMAT_VERSION {
175 : // If metadata has the old format,
176 : // upgrade it and return the result
177 1 : TimelineMetadata::upgrade_timeline_metadata(metadata_bytes)
178 : } else {
179 642 : let body =
180 642 : TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
181 642 : ensure!(
182 642 : body.disk_consistent_lsn.is_aligned(),
183 UBC 0 : "disk_consistent_lsn is not aligned"
184 : );
185 CBC 642 : Ok(TimelineMetadata { hdr, body })
186 : }
187 645 : }
188 :
189 13628 : pub fn to_bytes(&self) -> Result<Vec<u8>, SerializeError> {
190 13628 : let body_bytes = self.body.ser()?;
191 13628 : let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
192 13628 : let hdr = TimelineMetadataHeader {
193 13628 : size: metadata_size as u16,
194 13628 : format_version: METADATA_FORMAT_VERSION,
195 13628 : checksum: crc32c::crc32c(&body_bytes),
196 13628 : };
197 13628 : let hdr_bytes = hdr.ser()?;
198 13628 : let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
199 13628 : metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
200 13628 : metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
201 13628 : Ok(metadata_bytes)
202 13628 : }
203 :
204 : /// [`Lsn`] that corresponds to the corresponding timeline directory
205 : /// contents, stored locally in the pageserver workdir.
206 8881 : pub fn disk_consistent_lsn(&self) -> Lsn {
207 8881 : self.body.disk_consistent_lsn
208 8881 : }
209 :
210 1302 : pub fn prev_record_lsn(&self) -> Option<Lsn> {
211 1302 : self.body.prev_record_lsn
212 1302 : }
213 :
214 1912 : pub fn ancestor_timeline(&self) -> Option<TimelineId> {
215 1912 : self.body.ancestor_timeline
216 1912 : }
217 :
218 1302 : pub fn ancestor_lsn(&self) -> Lsn {
219 1302 : self.body.ancestor_lsn
220 1302 : }
221 :
222 1302 : pub fn latest_gc_cutoff_lsn(&self) -> Lsn {
223 1302 : self.body.latest_gc_cutoff_lsn
224 1302 : }
225 :
226 1302 : pub fn initdb_lsn(&self) -> Lsn {
227 1302 : self.body.initdb_lsn
228 1302 : }
229 :
230 1302 : pub fn pg_version(&self) -> u32 {
231 1302 : self.body.pg_version
232 1302 : }
233 :
234 : // Checksums make it awkward to build a valid instance by hand. This helper
235 : // provides a TimelineMetadata with a valid checksum in its header.
236 : #[cfg(test)]
237 6 : pub fn example() -> Self {
238 6 : let instance = Self::new(
239 6 : "0/16960E8".parse::<Lsn>().unwrap(),
240 6 : None,
241 6 : None,
242 6 : Lsn::from_hex("00000000").unwrap(),
243 6 : Lsn::from_hex("00000000").unwrap(),
244 6 : Lsn::from_hex("00000000").unwrap(),
245 6 : 0,
246 6 : );
247 6 : let bytes = instance.to_bytes().unwrap();
248 6 : Self::from_bytes(&bytes).unwrap()
249 6 : }
250 : }
251 :
252 : impl<'de> Deserialize<'de> for TimelineMetadata {
253 347 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
254 347 : where
255 347 : D: serde::Deserializer<'de>,
256 347 : {
257 347 : let bytes = Vec::<u8>::deserialize(deserializer)?;
258 347 : Self::from_bytes(bytes.as_slice()).map_err(|e| D::Error::custom(format!("{e}")))
259 347 : }
260 : }
261 :
262 : impl Serialize for TimelineMetadata {
263 6972 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
264 6972 : where
265 6972 : S: Serializer,
266 6972 : {
267 6972 : let bytes = self
268 6972 : .to_bytes()
269 6972 : .map_err(|e| serde::ser::Error::custom(format!("{e}")))?;
270 6972 : bytes.serialize(serializer)
271 6972 : }
272 : }
273 :
274 : /// Save timeline metadata to file
275 26592 : #[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
276 : pub async fn save_metadata(
277 : conf: &'static PageServerConf,
278 : tenant_id: &TenantId,
279 : timeline_id: &TimelineId,
280 : data: &TimelineMetadata,
281 : ) -> anyhow::Result<()> {
282 : let path = conf.metadata_path(tenant_id, timeline_id);
283 : let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
284 : let metadata_bytes = data.to_bytes().context("serialize metadata")?;
285 : VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
286 : .await
287 : .context("write metadata")?;
288 : Ok(())
289 : }
290 :
291 9 : #[derive(Error, Debug)]
292 : pub enum LoadMetadataError {
293 : #[error(transparent)]
294 : Read(#[from] io::Error),
295 :
296 : #[error(transparent)]
297 : Decode(#[from] anyhow::Error),
298 : }
299 :
300 291 : pub fn load_metadata(
301 291 : conf: &'static PageServerConf,
302 291 : tenant_id: &TenantId,
303 291 : timeline_id: &TimelineId,
304 291 : ) -> Result<TimelineMetadata, LoadMetadataError> {
305 291 : let metadata_path = conf.metadata_path(tenant_id, timeline_id);
306 291 : let metadata_bytes = std::fs::read(metadata_path)?;
307 :
308 284 : Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
309 291 : }
310 :
311 : #[cfg(test)]
312 : mod tests {
313 : use super::*;
314 : use crate::tenant::harness::TIMELINE_ID;
315 :
316 1 : #[test]
317 1 : fn metadata_serializes_correctly() {
318 1 : let original_metadata = TimelineMetadata::new(
319 1 : Lsn(0x200),
320 1 : Some(Lsn(0x100)),
321 1 : Some(TIMELINE_ID),
322 1 : Lsn(0),
323 1 : Lsn(0),
324 1 : Lsn(0),
325 1 : // Any version will do here, so use the default
326 1 : crate::DEFAULT_PG_VERSION,
327 1 : );
328 1 :
329 1 : let metadata_bytes = original_metadata
330 1 : .to_bytes()
331 1 : .expect("Should serialize correct metadata to bytes");
332 1 :
333 1 : let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
334 1 : .expect("Should deserialize its own bytes");
335 1 :
336 1 : assert_eq!(
337 : deserialized_metadata.body, original_metadata.body,
338 UBC 0 : "Metadata that was serialized to bytes and deserialized back should not change"
339 : );
340 CBC 1 : }
341 :
342 : // Generate old version metadata and read it with current code.
343 : // Ensure that it is upgraded correctly
344 1 : #[test]
345 1 : fn test_metadata_upgrade() {
346 1 : #[derive(Debug, Clone, PartialEq, Eq)]
347 1 : struct TimelineMetadataV1 {
348 1 : hdr: TimelineMetadataHeader,
349 1 : body: TimelineMetadataBodyV1,
350 1 : }
351 1 :
352 1 : let metadata_v1 = TimelineMetadataV1 {
353 1 : hdr: TimelineMetadataHeader {
354 1 : checksum: 0,
355 1 : size: 0,
356 1 : format_version: METADATA_OLD_FORMAT_VERSION,
357 1 : },
358 1 : body: TimelineMetadataBodyV1 {
359 1 : disk_consistent_lsn: Lsn(0x200),
360 1 : prev_record_lsn: Some(Lsn(0x100)),
361 1 : ancestor_timeline: Some(TIMELINE_ID),
362 1 : ancestor_lsn: Lsn(0),
363 1 : latest_gc_cutoff_lsn: Lsn(0),
364 1 : initdb_lsn: Lsn(0),
365 1 : },
366 1 : };
367 1 :
368 1 : impl TimelineMetadataV1 {
369 1 : pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
370 1 : let body_bytes = self.body.ser()?;
371 1 : let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
372 1 : let hdr = TimelineMetadataHeader {
373 1 : size: metadata_size as u16,
374 1 : format_version: METADATA_OLD_FORMAT_VERSION,
375 1 : checksum: crc32c::crc32c(&body_bytes),
376 1 : };
377 1 : let hdr_bytes = hdr.ser()?;
378 1 : let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
379 1 : metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
380 1 : metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
381 1 : Ok(metadata_bytes)
382 1 : }
383 1 : }
384 1 :
385 1 : let metadata_bytes = metadata_v1
386 1 : .to_bytes()
387 1 : .expect("Should serialize correct metadata to bytes");
388 1 :
389 1 : // This should deserialize to the latest version format
390 1 : let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
391 1 : .expect("Should deserialize its own bytes");
392 1 :
393 1 : let expected_metadata = TimelineMetadata::new(
394 1 : Lsn(0x200),
395 1 : Some(Lsn(0x100)),
396 1 : Some(TIMELINE_ID),
397 1 : Lsn(0),
398 1 : Lsn(0),
399 1 : Lsn(0),
400 1 : 14, // All timelines created before this version had pg_version 14
401 1 : );
402 1 :
403 1 : assert_eq!(
404 : deserialized_metadata.body, expected_metadata.body,
405 UBC 0 : "Metadata of the old version {} should be upgraded to the latest version {}",
406 : METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION
407 : );
408 CBC 1 : }
409 : }
|