Line data Source code
1 : use crate::{
2 : is_temporary,
3 : tenant::{
4 : ephemeral_file::is_ephemeral_file,
5 : remote_timeline_client::{
6 : self,
7 : index::{IndexPart, LayerFileMetadata},
8 : },
9 : storage_layer::LayerName,
10 : Generation,
11 : },
12 : METADATA_FILE_NAME,
13 : };
14 : use anyhow::Context;
15 : use camino::{Utf8Path, Utf8PathBuf};
16 : use pageserver_api::shard::ShardIndex;
17 : use std::{collections::HashMap, str::FromStr};
18 : use utils::lsn::Lsn;
19 :
20 : /// Identified files in the timeline directory.
21 : pub(super) enum Discovered {
22 : /// The only one we care about
23 : Layer(LayerName, Utf8PathBuf, u64),
24 : /// Old ephmeral files from previous launches, should be removed
25 : Ephemeral(String),
26 : /// Old temporary timeline files, unsure what these really are, should be removed
27 : Temporary(String),
28 : /// Temporary on-demand download files, should be removed
29 : TemporaryDownload(String),
30 : /// "metadata" file we persist locally and include in `index_part.json`
31 : Metadata,
32 : /// Backup file from previously future layers
33 : IgnoredBackup,
34 : /// Unrecognized, warn about these
35 : Unknown(String),
36 : }
37 :
38 : /// Scans the timeline directory for interesting files.
39 6 : pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovered>> {
40 6 : let mut ret = Vec::new();
41 :
42 16 : for direntry in path.read_dir_utf8()? {
43 16 : let direntry = direntry?;
44 16 : let file_name = direntry.file_name().to_string();
45 :
46 16 : let discovered = match LayerName::from_str(&file_name) {
47 16 : Ok(file_name) => {
48 16 : let file_size = direntry.metadata()?.len();
49 16 : Discovered::Layer(file_name, direntry.path().to_owned(), file_size)
50 : }
51 : Err(_) => {
52 0 : if file_name == METADATA_FILE_NAME {
53 0 : Discovered::Metadata
54 0 : } else if file_name.ends_with(".old") {
55 : // ignore these
56 0 : Discovered::IgnoredBackup
57 0 : } else if remote_timeline_client::is_temp_download_file(direntry.path()) {
58 0 : Discovered::TemporaryDownload(file_name)
59 0 : } else if is_ephemeral_file(&file_name) {
60 0 : Discovered::Ephemeral(file_name)
61 0 : } else if is_temporary(direntry.path()) {
62 0 : Discovered::Temporary(file_name)
63 : } else {
64 0 : Discovered::Unknown(file_name)
65 : }
66 : }
67 : };
68 :
69 16 : ret.push(discovered);
70 : }
71 :
72 6 : Ok(ret)
73 6 : }
74 :
75 : /// Whereas `LayerFileMetadata` describes the metadata we would store in remote storage,
76 : /// this structure extends it with metadata describing the layer's presence in local storage.
77 : #[derive(Clone, Debug)]
78 : pub(super) struct LocalLayerFileMetadata {
79 : pub(super) metadata: LayerFileMetadata,
80 : pub(super) local_path: Utf8PathBuf,
81 : }
82 :
83 : impl LocalLayerFileMetadata {
84 16 : pub fn new(
85 16 : local_path: Utf8PathBuf,
86 16 : file_size: u64,
87 16 : generation: Generation,
88 16 : shard: ShardIndex,
89 16 : ) -> Self {
90 16 : Self {
91 16 : local_path,
92 16 : metadata: LayerFileMetadata::new(file_size, generation, shard),
93 16 : }
94 16 : }
95 : }
96 :
97 : /// Decision on what to do with a layer file after considering its local and remote metadata.
98 : #[derive(Clone, Debug)]
99 : pub(super) enum Decision {
100 : /// The layer is not present locally.
101 : Evicted(LayerFileMetadata),
102 : /// The layer is present locally, but local metadata does not match remote; we must
103 : /// delete it and treat it as evicted.
104 : UseRemote {
105 : local: LocalLayerFileMetadata,
106 : remote: LayerFileMetadata,
107 : },
108 : /// The layer is present locally, and metadata matches.
109 : UseLocal(LocalLayerFileMetadata),
110 : }
111 :
112 : /// A layer needs to be left out of the layer map.
113 : #[derive(Debug)]
114 : pub(super) enum DismissedLayer {
115 : /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.
116 : Future {
117 : /// `None` if the layer is only known through [`IndexPart`].
118 : local: Option<LocalLayerFileMetadata>,
119 : },
120 : /// The layer only exists locally.
121 : ///
122 : /// In order to make crash safe updates to layer map, we must dismiss layers which are only
123 : /// found locally or not yet included in the remote `index_part.json`.
124 : LocalOnly(LocalLayerFileMetadata),
125 : }
126 :
127 : /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
128 6 : pub(super) fn reconcile(
129 6 : discovered: Vec<(LayerName, Utf8PathBuf, u64)>,
130 6 : index_part: Option<&IndexPart>,
131 6 : disk_consistent_lsn: Lsn,
132 6 : generation: Generation,
133 6 : shard: ShardIndex,
134 6 : ) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {
135 6 : use Decision::*;
136 6 :
137 6 : // name => (local_metadata, remote_metadata)
138 6 : type Collected =
139 6 : HashMap<LayerName, (Option<LocalLayerFileMetadata>, Option<LayerFileMetadata>)>;
140 6 :
141 6 : let mut discovered = discovered
142 6 : .into_iter()
143 16 : .map(|(layer_name, local_path, file_size)| {
144 16 : (
145 16 : layer_name,
146 16 : // The generation and shard here will be corrected to match IndexPart in the merge below, unless
147 16 : // it is not in IndexPart, in which case using our current generation makes sense
148 16 : // because it will be uploaded in this generation.
149 16 : (
150 16 : Some(LocalLayerFileMetadata::new(
151 16 : local_path, file_size, generation, shard,
152 16 : )),
153 16 : None,
154 16 : ),
155 16 : )
156 16 : })
157 6 : .collect::<Collected>();
158 6 :
159 6 : // merge any index_part information, when available
160 6 : index_part
161 6 : .as_ref()
162 6 : .map(|ip| ip.layer_metadata.iter())
163 6 : .into_iter()
164 6 : .flatten()
165 16 : .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
166 16 : .for_each(|(name, metadata)| {
167 16 : if let Some(existing) = discovered.get_mut(name) {
168 16 : existing.1 = Some(metadata);
169 16 : } else {
170 0 : discovered.insert(name.to_owned(), (None, Some(metadata)));
171 0 : }
172 16 : });
173 6 :
174 6 : discovered
175 6 : .into_iter()
176 16 : .map(|(name, (local, remote))| {
177 16 : let decision = if name.is_in_future(disk_consistent_lsn) {
178 0 : Err(DismissedLayer::Future { local })
179 : } else {
180 16 : match (local, remote) {
181 16 : (Some(local), Some(remote)) if local.metadata != remote => {
182 0 : Ok(UseRemote { local, remote })
183 : }
184 16 : (Some(x), Some(_)) => Ok(UseLocal(x)),
185 0 : (None, Some(x)) => Ok(Evicted(x)),
186 0 : (Some(x), None) => Err(DismissedLayer::LocalOnly(x)),
187 : (None, None) => {
188 0 : unreachable!("there must not be any non-local non-remote files")
189 : }
190 : }
191 : };
192 :
193 16 : (name, decision)
194 16 : })
195 6 : .collect::<Vec<_>>()
196 6 : }
197 :
198 0 : pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {
199 0 : let file_name = path.file_name().expect("must be file path");
200 0 : tracing::debug!(kind, ?file_name, "cleaning up");
201 0 : std::fs::remove_file(path).with_context(|| format!("failed to remove {kind} at {path}"))
202 0 : }
203 :
204 0 : pub(super) fn cleanup_local_file_for_remote(
205 0 : local: &LocalLayerFileMetadata,
206 0 : remote: &LayerFileMetadata,
207 0 : ) -> anyhow::Result<()> {
208 0 : let local_size = local.metadata.file_size();
209 0 : let remote_size = remote.file_size();
210 0 : let path = &local.local_path;
211 0 :
212 0 : let file_name = path.file_name().expect("must be file path");
213 0 : tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
214 0 : if let Err(err) = crate::tenant::timeline::rename_to_backup(path) {
215 0 : assert!(
216 0 : path.exists(),
217 0 : "we would leave the local_layer without a file if this does not hold: {path}",
218 : );
219 0 : Err(err)
220 : } else {
221 0 : Ok(())
222 : }
223 0 : }
224 :
225 0 : pub(super) fn cleanup_future_layer(
226 0 : path: &Utf8Path,
227 0 : name: &LayerName,
228 0 : disk_consistent_lsn: Lsn,
229 0 : ) -> anyhow::Result<()> {
230 0 : // future image layers are allowed to be produced always for not yet flushed to disk
231 0 : // lsns stored in InMemoryLayer.
232 0 : let kind = name.kind();
233 0 : tracing::info!("found future {kind} layer {name} disk_consistent_lsn is {disk_consistent_lsn}");
234 0 : std::fs::remove_file(path)?;
235 0 : Ok(())
236 0 : }
237 :
238 0 : pub(super) fn cleanup_local_only_file(
239 0 : name: &LayerName,
240 0 : local: &LocalLayerFileMetadata,
241 0 : ) -> anyhow::Result<()> {
242 0 : let kind = name.kind();
243 0 : tracing::info!(
244 0 : "found local-only {kind} layer {name}, metadata {:?}",
245 : local.metadata
246 : );
247 0 : std::fs::remove_file(&local.local_path)?;
248 0 : Ok(())
249 0 : }
|