Line data Source code
1 : //! Every image of a certain timeline from [`crate::tenant::Tenant`]
2 : //! has a metadata that needs to be stored persistently.
3 : //!
4 : //! Later, the file gets used in [`remote_timeline_client`] as a part of
5 : //! external storage import and export operations.
6 : //!
7 : //! The module contains all structs and related helper methods related to timeline metadata.
8 : //!
9 : //! [`remote_timeline_client`]: super::remote_timeline_client
10 :
11 : use std::io::{self};
12 :
13 : use anyhow::{ensure, Context};
14 : use serde::{de::Error, Deserialize, Serialize, Serializer};
15 : use thiserror::Error;
16 : use utils::bin_ser::SerializeError;
17 : use utils::crashsafe::path_with_suffix_extension;
18 : use utils::{
19 : bin_ser::BeSer,
20 : id::{TenantId, TimelineId},
21 : lsn::Lsn,
22 : };
23 :
24 : use crate::config::PageServerConf;
25 : use crate::virtual_file::VirtualFile;
26 : use crate::TEMP_FILE_SUFFIX;
27 :
28 : /// Use special format number to enable backward compatibility.
29 : const METADATA_FORMAT_VERSION: u16 = 4;
30 :
31 : /// Previous supported format versions.
32 : const METADATA_OLD_FORMAT_VERSION: u16 = 3;
33 :
34 : /// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic.
35 : ///
36 : /// This is the same assumption that PostgreSQL makes with the control file,
37 : /// see PG_CONTROL_MAX_SAFE_SIZE
38 : const METADATA_MAX_SIZE: usize = 512;
39 :
40 : /// Metadata stored on disk for each timeline
41 : ///
42 : /// The fields correspond to the values we hold in memory, in Timeline.
43 9950 : #[derive(Debug, Clone, PartialEq, Eq)]
44 : pub struct TimelineMetadata {
45 : hdr: TimelineMetadataHeader,
46 : body: TimelineMetadataBodyV2,
47 : }
48 :
49 25706 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
50 : struct TimelineMetadataHeader {
51 : checksum: u32, // CRC of serialized metadata body
52 : size: u16, // size of serialized metadata
53 : format_version: u16, // metadata format version (used for compatibility checks)
54 : }
55 : const METADATA_HDR_SIZE: usize = std::mem::size_of::<TimelineMetadataHeader>();
56 :
57 25704 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
58 : struct TimelineMetadataBodyV2 {
59 : disk_consistent_lsn: Lsn,
60 : // This is only set if we know it. We track it in memory when the page
61 : // server is running, but we only track the value corresponding to
62 : // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
63 : // lot. We only store it in the metadata file when we flush *all* the
64 : // in-memory data so that 'last_record_lsn' is the same as
65 : // 'disk_consistent_lsn'. That's OK, because after page server restart, as
66 : // soon as we reprocess at least one record, we will have a valid
67 : // 'prev_record_lsn' value in memory again. This is only really needed when
68 : // doing a clean shutdown, so that there is no more WAL beyond
69 : // 'disk_consistent_lsn'
70 : prev_record_lsn: Option<Lsn>,
71 : ancestor_timeline: Option<TimelineId>,
72 : ancestor_lsn: Lsn,
73 : latest_gc_cutoff_lsn: Lsn,
74 : initdb_lsn: Lsn,
75 : pg_version: u32,
76 : }
77 :
78 2 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79 : struct TimelineMetadataBodyV1 {
80 : disk_consistent_lsn: Lsn,
81 : // This is only set if we know it. We track it in memory when the page
82 : // server is running, but we only track the value corresponding to
83 : // 'last_record_lsn', not 'disk_consistent_lsn' which can lag behind by a
84 : // lot. We only store it in the metadata file when we flush *all* the
85 : // in-memory data so that 'last_record_lsn' is the same as
86 : // 'disk_consistent_lsn'. That's OK, because after page server restart, as
87 : // soon as we reprocess at least one record, we will have a valid
88 : // 'prev_record_lsn' value in memory again. This is only really needed when
89 : // doing a clean shutdown, so that there is no more WAL beyond
90 : // 'disk_consistent_lsn'
91 : prev_record_lsn: Option<Lsn>,
92 : ancestor_timeline: Option<TimelineId>,
93 : ancestor_lsn: Lsn,
94 : latest_gc_cutoff_lsn: Lsn,
95 : initdb_lsn: Lsn,
96 : }
97 :
98 : impl TimelineMetadata {
99 7566 : pub fn new(
100 7566 : disk_consistent_lsn: Lsn,
101 7566 : prev_record_lsn: Option<Lsn>,
102 7566 : ancestor_timeline: Option<TimelineId>,
103 7566 : ancestor_lsn: Lsn,
104 7566 : latest_gc_cutoff_lsn: Lsn,
105 7566 : initdb_lsn: Lsn,
106 7566 : pg_version: u32,
107 7566 : ) -> Self {
108 7566 : Self {
109 7566 : hdr: TimelineMetadataHeader {
110 7566 : checksum: 0,
111 7566 : size: 0,
112 7566 : format_version: METADATA_FORMAT_VERSION,
113 7566 : },
114 7566 : body: TimelineMetadataBodyV2 {
115 7566 : disk_consistent_lsn,
116 7566 : prev_record_lsn,
117 7566 : ancestor_timeline,
118 7566 : ancestor_lsn,
119 7566 : latest_gc_cutoff_lsn,
120 7566 : initdb_lsn,
121 7566 : pg_version,
122 7566 : },
123 7566 : }
124 7566 : }
125 :
126 1 : fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
127 1 : let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
128 :
129 : // backward compatible only up to this version
130 1 : ensure!(
131 1 : hdr.format_version == METADATA_OLD_FORMAT_VERSION,
132 0 : "unsupported metadata format version {}",
133 : hdr.format_version
134 : );
135 :
136 1 : let metadata_size = hdr.size as usize;
137 :
138 1 : let body: TimelineMetadataBodyV1 =
139 1 : TimelineMetadataBodyV1::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
140 :
141 1 : let body = TimelineMetadataBodyV2 {
142 1 : disk_consistent_lsn: body.disk_consistent_lsn,
143 1 : prev_record_lsn: body.prev_record_lsn,
144 1 : ancestor_timeline: body.ancestor_timeline,
145 1 : ancestor_lsn: body.ancestor_lsn,
146 1 : latest_gc_cutoff_lsn: body.latest_gc_cutoff_lsn,
147 1 : initdb_lsn: body.initdb_lsn,
148 1 : pg_version: 14, // All timelines created before this version had pg_version 14
149 1 : };
150 1 :
151 1 : hdr.format_version = METADATA_FORMAT_VERSION;
152 1 :
153 1 : Ok(Self { hdr, body })
154 1 : }
155 :
156 515 : pub fn from_bytes(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
157 515 : ensure!(
158 515 : metadata_bytes.len() == METADATA_MAX_SIZE,
159 1 : "metadata bytes size is wrong"
160 : );
161 514 : let hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
162 :
163 514 : let metadata_size = hdr.size as usize;
164 514 : ensure!(
165 514 : metadata_size <= METADATA_MAX_SIZE,
166 0 : "corrupted metadata file"
167 : );
168 514 : let calculated_checksum = crc32c::crc32c(&metadata_bytes[METADATA_HDR_SIZE..metadata_size]);
169 514 : ensure!(
170 514 : hdr.checksum == calculated_checksum,
171 1 : "metadata checksum mismatch"
172 : );
173 :
174 513 : if hdr.format_version != METADATA_FORMAT_VERSION {
175 : // If metadata has the old format,
176 : // upgrade it and return the result
177 1 : TimelineMetadata::upgrade_timeline_metadata(metadata_bytes)
178 : } else {
179 512 : let body =
180 512 : TimelineMetadataBodyV2::des(&metadata_bytes[METADATA_HDR_SIZE..metadata_size])?;
181 512 : ensure!(
182 512 : body.disk_consistent_lsn.is_aligned(),
183 0 : "disk_consistent_lsn is not aligned"
184 : );
185 512 : Ok(TimelineMetadata { hdr, body })
186 : }
187 515 : }
188 :
189 12852 : pub fn to_bytes(&self) -> Result<Vec<u8>, SerializeError> {
190 12852 : let body_bytes = self.body.ser()?;
191 12852 : let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
192 12852 : let hdr = TimelineMetadataHeader {
193 12852 : size: metadata_size as u16,
194 12852 : format_version: METADATA_FORMAT_VERSION,
195 12852 : checksum: crc32c::crc32c(&body_bytes),
196 12852 : };
197 12852 : let hdr_bytes = hdr.ser()?;
198 12852 : let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
199 12852 : metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
200 12852 : metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
201 12852 : Ok(metadata_bytes)
202 12852 : }
203 :
204 : /// [`Lsn`] that corresponds to the corresponding timeline directory
205 : /// contents, stored locally in the pageserver workdir.
206 6896 : pub fn disk_consistent_lsn(&self) -> Lsn {
207 6896 : self.body.disk_consistent_lsn
208 6896 : }
209 :
210 1394 : pub fn prev_record_lsn(&self) -> Option<Lsn> {
211 1394 : self.body.prev_record_lsn
212 1394 : }
213 :
214 2020 : pub fn ancestor_timeline(&self) -> Option<TimelineId> {
215 2020 : self.body.ancestor_timeline
216 2020 : }
217 :
218 1394 : pub fn ancestor_lsn(&self) -> Lsn {
219 1394 : self.body.ancestor_lsn
220 1394 : }
221 :
222 1652 : pub fn latest_gc_cutoff_lsn(&self) -> Lsn {
223 1652 : self.body.latest_gc_cutoff_lsn
224 1652 : }
225 :
226 1394 : pub fn initdb_lsn(&self) -> Lsn {
227 1394 : self.body.initdb_lsn
228 1394 : }
229 :
230 1394 : pub fn pg_version(&self) -> u32 {
231 1394 : self.body.pg_version
232 1394 : }
233 : }
234 :
235 : impl<'de> Deserialize<'de> for TimelineMetadata {
236 206 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
237 206 : where
238 206 : D: serde::Deserializer<'de>,
239 206 : {
240 206 : let bytes = Vec::<u8>::deserialize(deserializer)?;
241 206 : Self::from_bytes(bytes.as_slice()).map_err(|e| D::Error::custom(format!("{e}")))
242 206 : }
243 : }
244 :
245 : impl Serialize for TimelineMetadata {
246 5239 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
247 5239 : where
248 5239 : S: Serializer,
249 5239 : {
250 5239 : let bytes = self
251 5239 : .to_bytes()
252 5239 : .map_err(|e| serde::ser::Error::custom(format!("{e}")))?;
253 5239 : bytes.serialize(serializer)
254 5239 : }
255 : }
256 :
257 : /// Save timeline metadata to file
258 30444 : #[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
259 : pub async fn save_metadata(
260 : conf: &'static PageServerConf,
261 : tenant_id: &TenantId,
262 : timeline_id: &TimelineId,
263 : data: &TimelineMetadata,
264 : ) -> anyhow::Result<()> {
265 : let path = conf.metadata_path(tenant_id, timeline_id);
266 : let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
267 : let metadata_bytes = data.to_bytes().context("serialize metadata")?;
268 : VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
269 : .await
270 : .context("write metadata")?;
271 : Ok(())
272 : }
273 :
274 9 : #[derive(Error, Debug)]
275 : pub enum LoadMetadataError {
276 : #[error(transparent)]
277 : Read(#[from] io::Error),
278 :
279 : #[error(transparent)]
280 : Decode(#[from] anyhow::Error),
281 : }
282 :
283 311 : pub fn load_metadata(
284 311 : conf: &'static PageServerConf,
285 311 : tenant_id: &TenantId,
286 311 : timeline_id: &TimelineId,
287 311 : ) -> Result<TimelineMetadata, LoadMetadataError> {
288 311 : let metadata_path = conf.metadata_path(tenant_id, timeline_id);
289 311 : let metadata_bytes = std::fs::read(metadata_path)?;
290 :
291 301 : Ok(TimelineMetadata::from_bytes(&metadata_bytes)?)
292 311 : }
293 :
294 : #[cfg(test)]
295 : mod tests {
296 : use super::*;
297 : use crate::tenant::harness::TIMELINE_ID;
298 :
299 1 : #[test]
300 1 : fn metadata_serializes_correctly() {
301 1 : let original_metadata = TimelineMetadata::new(
302 1 : Lsn(0x200),
303 1 : Some(Lsn(0x100)),
304 1 : Some(TIMELINE_ID),
305 1 : Lsn(0),
306 1 : Lsn(0),
307 1 : Lsn(0),
308 1 : // Any version will do here, so use the default
309 1 : crate::DEFAULT_PG_VERSION,
310 1 : );
311 1 :
312 1 : let metadata_bytes = original_metadata
313 1 : .to_bytes()
314 1 : .expect("Should serialize correct metadata to bytes");
315 1 :
316 1 : let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
317 1 : .expect("Should deserialize its own bytes");
318 1 :
319 1 : assert_eq!(
320 : deserialized_metadata.body, original_metadata.body,
321 0 : "Metadata that was serialized to bytes and deserialized back should not change"
322 : );
323 1 : }
324 :
325 : // Generate old version metadata and read it with current code.
326 : // Ensure that it is upgraded correctly
327 1 : #[test]
328 1 : fn test_metadata_upgrade() {
329 1 : #[derive(Debug, Clone, PartialEq, Eq)]
330 1 : struct TimelineMetadataV1 {
331 1 : hdr: TimelineMetadataHeader,
332 1 : body: TimelineMetadataBodyV1,
333 1 : }
334 1 :
335 1 : let metadata_v1 = TimelineMetadataV1 {
336 1 : hdr: TimelineMetadataHeader {
337 1 : checksum: 0,
338 1 : size: 0,
339 1 : format_version: METADATA_OLD_FORMAT_VERSION,
340 1 : },
341 1 : body: TimelineMetadataBodyV1 {
342 1 : disk_consistent_lsn: Lsn(0x200),
343 1 : prev_record_lsn: Some(Lsn(0x100)),
344 1 : ancestor_timeline: Some(TIMELINE_ID),
345 1 : ancestor_lsn: Lsn(0),
346 1 : latest_gc_cutoff_lsn: Lsn(0),
347 1 : initdb_lsn: Lsn(0),
348 1 : },
349 1 : };
350 1 :
351 1 : impl TimelineMetadataV1 {
352 1 : pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
353 1 : let body_bytes = self.body.ser()?;
354 1 : let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
355 1 : let hdr = TimelineMetadataHeader {
356 1 : size: metadata_size as u16,
357 1 : format_version: METADATA_OLD_FORMAT_VERSION,
358 1 : checksum: crc32c::crc32c(&body_bytes),
359 1 : };
360 1 : let hdr_bytes = hdr.ser()?;
361 1 : let mut metadata_bytes = vec![0u8; METADATA_MAX_SIZE];
362 1 : metadata_bytes[0..METADATA_HDR_SIZE].copy_from_slice(&hdr_bytes);
363 1 : metadata_bytes[METADATA_HDR_SIZE..metadata_size].copy_from_slice(&body_bytes);
364 1 : Ok(metadata_bytes)
365 1 : }
366 1 : }
367 1 :
368 1 : let metadata_bytes = metadata_v1
369 1 : .to_bytes()
370 1 : .expect("Should serialize correct metadata to bytes");
371 1 :
372 1 : // This should deserialize to the latest version format
373 1 : let deserialized_metadata = TimelineMetadata::from_bytes(&metadata_bytes)
374 1 : .expect("Should deserialize its own bytes");
375 1 :
376 1 : let expected_metadata = TimelineMetadata::new(
377 1 : Lsn(0x200),
378 1 : Some(Lsn(0x100)),
379 1 : Some(TIMELINE_ID),
380 1 : Lsn(0),
381 1 : Lsn(0),
382 1 : Lsn(0),
383 1 : 14, // All timelines created before this version had pg_version 14
384 1 : );
385 1 :
386 1 : assert_eq!(
387 : deserialized_metadata.body, expected_metadata.body,
388 0 : "Metadata of the old version {} should be upgraded to the latest version {}",
389 : METADATA_OLD_FORMAT_VERSION, METADATA_FORMAT_VERSION
390 : );
391 1 : }
392 : }
|