Line data Source code
1 : use crate::{
2 : is_temporary,
3 : tenant::{
4 : ephemeral_file::is_ephemeral_file,
5 : remote_timeline_client::{
6 : self,
7 : index::{IndexPart, LayerFileMetadata},
8 : },
9 : storage_layer::LayerFileName,
10 : Generation,
11 : },
12 : METADATA_FILE_NAME,
13 : };
14 : use anyhow::Context;
15 : use camino::Utf8Path;
16 : use pageserver_api::shard::ShardIndex;
17 : use std::{collections::HashMap, str::FromStr};
18 : use utils::lsn::Lsn;
19 :
20 : /// Identified files in the timeline directory.
21 : pub(super) enum Discovered {
22 : /// The only one we care about
23 : Layer(LayerFileName, u64),
24 : /// Old ephmeral files from previous launches, should be removed
25 : Ephemeral(String),
26 : /// Old temporary timeline files, unsure what these really are, should be removed
27 : Temporary(String),
28 : /// Temporary on-demand download files, should be removed
29 : TemporaryDownload(String),
30 : /// "metadata" file we persist locally and include in `index_part.json`
31 : Metadata,
32 : /// Backup file from previously future layers
33 : IgnoredBackup,
34 : /// Unrecognized, warn about these
35 : Unknown(String),
36 : }
37 :
38 : /// Scans the timeline directory for interesting files.
39 6 : pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovered>> {
40 6 : let mut ret = Vec::new();
41 :
42 16 : for direntry in path.read_dir_utf8()? {
43 16 : let direntry = direntry?;
44 16 : let file_name = direntry.file_name().to_string();
45 :
46 16 : let discovered = match LayerFileName::from_str(&file_name) {
47 16 : Ok(file_name) => {
48 16 : let file_size = direntry.metadata()?.len();
49 16 : Discovered::Layer(file_name, file_size)
50 : }
51 : Err(_) => {
52 0 : if file_name == METADATA_FILE_NAME {
53 0 : Discovered::Metadata
54 0 : } else if file_name.ends_with(".old") {
55 : // ignore these
56 0 : Discovered::IgnoredBackup
57 0 : } else if remote_timeline_client::is_temp_download_file(direntry.path()) {
58 0 : Discovered::TemporaryDownload(file_name)
59 0 : } else if is_ephemeral_file(&file_name) {
60 0 : Discovered::Ephemeral(file_name)
61 0 : } else if is_temporary(direntry.path()) {
62 0 : Discovered::Temporary(file_name)
63 : } else {
64 0 : Discovered::Unknown(file_name)
65 : }
66 : }
67 : };
68 :
69 16 : ret.push(discovered);
70 : }
71 :
72 6 : Ok(ret)
73 6 : }
74 :
75 : /// Decision on what to do with a layer file after considering its local and remote metadata.
76 0 : #[derive(Clone, Debug)]
77 : pub(super) enum Decision {
78 : /// The layer is not present locally.
79 : Evicted(LayerFileMetadata),
80 : /// The layer is present locally, but local metadata does not match remote; we must
81 : /// delete it and treat it as evicted.
82 : UseRemote {
83 : local: LayerFileMetadata,
84 : remote: LayerFileMetadata,
85 : },
86 : /// The layer is present locally, and metadata matches.
87 : UseLocal(LayerFileMetadata),
88 : }
89 :
90 : /// A layer needs to be left out of the layer map.
91 0 : #[derive(Debug)]
92 : pub(super) enum DismissedLayer {
93 : /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.
94 : Future {
95 : /// The local metadata. `None` if the layer is only known through [`IndexPart`].
96 : local: Option<LayerFileMetadata>,
97 : },
98 : /// The layer only exists locally.
99 : ///
100 : /// In order to make crash safe updates to layer map, we must dismiss layers which are only
101 : /// found locally or not yet included in the remote `index_part.json`.
102 : LocalOnly(LayerFileMetadata),
103 : }
104 :
105 : /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
106 6 : pub(super) fn reconcile(
107 6 : discovered: Vec<(LayerFileName, u64)>,
108 6 : index_part: Option<&IndexPart>,
109 6 : disk_consistent_lsn: Lsn,
110 6 : generation: Generation,
111 6 : shard: ShardIndex,
112 6 : ) -> Vec<(LayerFileName, Result<Decision, DismissedLayer>)> {
113 6 : use Decision::*;
114 6 :
115 6 : // name => (local, remote)
116 6 : type Collected = HashMap<LayerFileName, (Option<LayerFileMetadata>, Option<LayerFileMetadata>)>;
117 6 :
118 6 : let mut discovered = discovered
119 6 : .into_iter()
120 16 : .map(|(name, file_size)| {
121 16 : (
122 16 : name,
123 16 : // The generation and shard here will be corrected to match IndexPart in the merge below, unless
124 16 : // it is not in IndexPart, in which case using our current generation makes sense
125 16 : // because it will be uploaded in this generation.
126 16 : (
127 16 : Some(LayerFileMetadata::new(file_size, generation, shard)),
128 16 : None,
129 16 : ),
130 16 : )
131 16 : })
132 6 : .collect::<Collected>();
133 6 :
134 6 : // merge any index_part information, when available
135 6 : index_part
136 6 : .as_ref()
137 6 : .map(|ip| ip.layer_metadata.iter())
138 6 : .into_iter()
139 6 : .flatten()
140 16 : .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
141 16 : .for_each(|(name, metadata)| {
142 16 : if let Some(existing) = discovered.get_mut(name) {
143 16 : existing.1 = Some(metadata);
144 16 : } else {
145 0 : discovered.insert(name.to_owned(), (None, Some(metadata)));
146 0 : }
147 16 : });
148 6 :
149 6 : discovered
150 6 : .into_iter()
151 16 : .map(|(name, (local, remote))| {
152 16 : let decision = if name.is_in_future(disk_consistent_lsn) {
153 0 : Err(DismissedLayer::Future { local })
154 : } else {
155 16 : match (local, remote) {
156 16 : (Some(local), Some(remote)) if local != remote => {
157 0 : Ok(UseRemote { local, remote })
158 : }
159 16 : (Some(x), Some(_)) => Ok(UseLocal(x)),
160 0 : (None, Some(x)) => Ok(Evicted(x)),
161 0 : (Some(x), None) => Err(DismissedLayer::LocalOnly(x)),
162 : (None, None) => {
163 0 : unreachable!("there must not be any non-local non-remote files")
164 : }
165 : }
166 : };
167 :
168 16 : (name, decision)
169 16 : })
170 6 : .collect::<Vec<_>>()
171 6 : }
172 :
173 0 : pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {
174 0 : let file_name = path.file_name().expect("must be file path");
175 0 : tracing::debug!(kind, ?file_name, "cleaning up");
176 0 : std::fs::remove_file(path).with_context(|| format!("failed to remove {kind} at {path}"))
177 0 : }
178 :
179 0 : pub(super) fn cleanup_local_file_for_remote(
180 0 : path: &Utf8Path,
181 0 : local: &LayerFileMetadata,
182 0 : remote: &LayerFileMetadata,
183 0 : ) -> anyhow::Result<()> {
184 0 : let local_size = local.file_size();
185 0 : let remote_size = remote.file_size();
186 0 :
187 0 : let file_name = path.file_name().expect("must be file path");
188 0 : tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
189 0 : if let Err(err) = crate::tenant::timeline::rename_to_backup(path) {
190 0 : assert!(
191 0 : path.exists(),
192 0 : "we would leave the local_layer without a file if this does not hold: {path}",
193 : );
194 0 : Err(err)
195 : } else {
196 0 : Ok(())
197 : }
198 0 : }
199 :
200 0 : pub(super) fn cleanup_future_layer(
201 0 : path: &Utf8Path,
202 0 : name: &LayerFileName,
203 0 : disk_consistent_lsn: Lsn,
204 0 : ) -> anyhow::Result<()> {
205 0 : // future image layers are allowed to be produced always for not yet flushed to disk
206 0 : // lsns stored in InMemoryLayer.
207 0 : let kind = name.kind();
208 0 : tracing::info!("found future {kind} layer {name} disk_consistent_lsn is {disk_consistent_lsn}");
209 0 : std::fs::remove_file(path)?;
210 0 : Ok(())
211 0 : }
212 :
213 0 : pub(super) fn cleanup_local_only_file(
214 0 : path: &Utf8Path,
215 0 : name: &LayerFileName,
216 0 : local: &LayerFileMetadata,
217 0 : ) -> anyhow::Result<()> {
218 0 : let kind = name.kind();
219 0 : tracing::info!("found local-only {kind} layer {name}, metadata {local:?}");
220 0 : std::fs::remove_file(path)?;
221 0 : Ok(())
222 0 : }
|