Line data Source code
1 : use crate::{
2 : is_temporary,
3 : tenant::{
4 : ephemeral_file::is_ephemeral_file,
5 : remote_timeline_client::{
6 : self,
7 : index::{IndexPart, LayerFileMetadata},
8 : },
9 : storage_layer::LayerName,
10 : Generation,
11 : },
12 : };
13 : use anyhow::Context;
14 : use camino::{Utf8Path, Utf8PathBuf};
15 : use pageserver_api::shard::ShardIndex;
16 : use std::{collections::HashMap, str::FromStr};
17 : use utils::lsn::Lsn;
18 :
19 : /// Identified files in the timeline directory.
20 : pub(super) enum Discovered {
21 : /// The only one we care about
22 : Layer(LayerName, Utf8PathBuf, u64),
23 : /// Old ephmeral files from previous launches, should be removed
24 : Ephemeral(String),
25 : /// Old temporary timeline files, unsure what these really are, should be removed
26 : Temporary(String),
27 : /// Temporary on-demand download files, should be removed
28 : TemporaryDownload(String),
29 : /// Backup file from previously future layers
30 : IgnoredBackup,
31 : /// Unrecognized, warn about these
32 : Unknown(String),
33 : }
34 :
35 : /// Scans the timeline directory for interesting files.
36 6 : pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovered>> {
37 6 : let mut ret = Vec::new();
38 :
39 16 : for direntry in path.read_dir_utf8()? {
40 16 : let direntry = direntry?;
41 16 : let file_name = direntry.file_name().to_string();
42 :
43 16 : let discovered = match LayerName::from_str(&file_name) {
44 16 : Ok(file_name) => {
45 16 : let file_size = direntry.metadata()?.len();
46 16 : Discovered::Layer(file_name, direntry.path().to_owned(), file_size)
47 : }
48 : Err(_) => {
49 0 : if file_name.ends_with(".old") {
50 : // ignore these
51 0 : Discovered::IgnoredBackup
52 0 : } else if remote_timeline_client::is_temp_download_file(direntry.path()) {
53 0 : Discovered::TemporaryDownload(file_name)
54 0 : } else if is_ephemeral_file(&file_name) {
55 0 : Discovered::Ephemeral(file_name)
56 0 : } else if is_temporary(direntry.path()) {
57 0 : Discovered::Temporary(file_name)
58 : } else {
59 0 : Discovered::Unknown(file_name)
60 : }
61 : }
62 : };
63 :
64 16 : ret.push(discovered);
65 : }
66 :
67 6 : Ok(ret)
68 6 : }
69 :
70 : /// Whereas `LayerFileMetadata` describes the metadata we would store in remote storage,
71 : /// this structure extends it with metadata describing the layer's presence in local storage.
72 : #[derive(Clone, Debug)]
73 : pub(super) struct LocalLayerFileMetadata {
74 : pub(super) metadata: LayerFileMetadata,
75 : pub(super) local_path: Utf8PathBuf,
76 : }
77 :
78 : impl LocalLayerFileMetadata {
79 16 : pub fn new(
80 16 : local_path: Utf8PathBuf,
81 16 : file_size: u64,
82 16 : generation: Generation,
83 16 : shard: ShardIndex,
84 16 : ) -> Self {
85 16 : Self {
86 16 : local_path,
87 16 : metadata: LayerFileMetadata::new(file_size, generation, shard),
88 16 : }
89 16 : }
90 : }
91 :
92 : /// Decision on what to do with a layer file after considering its local and remote metadata.
93 : #[derive(Clone, Debug)]
94 : pub(super) enum Decision {
95 : /// The layer is not present locally.
96 : Evicted(LayerFileMetadata),
97 : /// The layer is present locally, but local metadata does not match remote; we must
98 : /// delete it and treat it as evicted.
99 : UseRemote {
100 : local: LocalLayerFileMetadata,
101 : remote: LayerFileMetadata,
102 : },
103 : /// The layer is present locally, and metadata matches.
104 : UseLocal(LocalLayerFileMetadata),
105 : }
106 :
107 : /// A layer needs to be left out of the layer map.
108 : #[derive(Debug)]
109 : pub(super) enum DismissedLayer {
110 : /// The related layer is is in future compared to disk_consistent_lsn, it must not be loaded.
111 : Future {
112 : /// `None` if the layer is only known through [`IndexPart`].
113 : local: Option<LocalLayerFileMetadata>,
114 : },
115 : /// The layer only exists locally.
116 : ///
117 : /// In order to make crash safe updates to layer map, we must dismiss layers which are only
118 : /// found locally or not yet included in the remote `index_part.json`.
119 : LocalOnly(LocalLayerFileMetadata),
120 : }
121 :
122 : /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
123 6 : pub(super) fn reconcile(
124 6 : discovered: Vec<(LayerName, Utf8PathBuf, u64)>,
125 6 : index_part: Option<&IndexPart>,
126 6 : disk_consistent_lsn: Lsn,
127 6 : generation: Generation,
128 6 : shard: ShardIndex,
129 6 : ) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {
130 6 : use Decision::*;
131 6 :
132 6 : // name => (local_metadata, remote_metadata)
133 6 : type Collected =
134 6 : HashMap<LayerName, (Option<LocalLayerFileMetadata>, Option<LayerFileMetadata>)>;
135 6 :
136 6 : let mut discovered = discovered
137 6 : .into_iter()
138 16 : .map(|(layer_name, local_path, file_size)| {
139 16 : (
140 16 : layer_name,
141 16 : // The generation and shard here will be corrected to match IndexPart in the merge below, unless
142 16 : // it is not in IndexPart, in which case using our current generation makes sense
143 16 : // because it will be uploaded in this generation.
144 16 : (
145 16 : Some(LocalLayerFileMetadata::new(
146 16 : local_path, file_size, generation, shard,
147 16 : )),
148 16 : None,
149 16 : ),
150 16 : )
151 16 : })
152 6 : .collect::<Collected>();
153 6 :
154 6 : // merge any index_part information, when available
155 6 : index_part
156 6 : .as_ref()
157 6 : .map(|ip| ip.layer_metadata.iter())
158 6 : .into_iter()
159 6 : .flatten()
160 16 : .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
161 16 : .for_each(|(name, metadata)| {
162 16 : if let Some(existing) = discovered.get_mut(name) {
163 16 : existing.1 = Some(metadata);
164 16 : } else {
165 0 : discovered.insert(name.to_owned(), (None, Some(metadata)));
166 0 : }
167 16 : });
168 6 :
169 6 : discovered
170 6 : .into_iter()
171 16 : .map(|(name, (local, remote))| {
172 16 : let decision = if name.is_in_future(disk_consistent_lsn) {
173 0 : Err(DismissedLayer::Future { local })
174 : } else {
175 16 : match (local, remote) {
176 16 : (Some(local), Some(remote)) if local.metadata != remote => {
177 0 : Ok(UseRemote { local, remote })
178 : }
179 16 : (Some(x), Some(_)) => Ok(UseLocal(x)),
180 0 : (None, Some(x)) => Ok(Evicted(x)),
181 0 : (Some(x), None) => Err(DismissedLayer::LocalOnly(x)),
182 : (None, None) => {
183 0 : unreachable!("there must not be any non-local non-remote files")
184 : }
185 : }
186 : };
187 :
188 16 : (name, decision)
189 16 : })
190 6 : .collect::<Vec<_>>()
191 6 : }
192 :
193 0 : pub(super) fn cleanup(path: &Utf8Path, kind: &str) -> anyhow::Result<()> {
194 0 : let file_name = path.file_name().expect("must be file path");
195 0 : tracing::debug!(kind, ?file_name, "cleaning up");
196 0 : std::fs::remove_file(path).with_context(|| format!("failed to remove {kind} at {path}"))
197 0 : }
198 :
199 0 : pub(super) fn cleanup_local_file_for_remote(
200 0 : local: &LocalLayerFileMetadata,
201 0 : remote: &LayerFileMetadata,
202 0 : ) -> anyhow::Result<()> {
203 0 : let local_size = local.metadata.file_size();
204 0 : let remote_size = remote.file_size();
205 0 : let path = &local.local_path;
206 0 :
207 0 : let file_name = path.file_name().expect("must be file path");
208 0 : tracing::warn!("removing local file {file_name:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
209 0 : if let Err(err) = crate::tenant::timeline::rename_to_backup(path) {
210 0 : assert!(
211 0 : path.exists(),
212 0 : "we would leave the local_layer without a file if this does not hold: {path}",
213 : );
214 0 : Err(err)
215 : } else {
216 0 : Ok(())
217 : }
218 0 : }
219 :
220 0 : pub(super) fn cleanup_future_layer(
221 0 : path: &Utf8Path,
222 0 : name: &LayerName,
223 0 : disk_consistent_lsn: Lsn,
224 0 : ) -> anyhow::Result<()> {
225 0 : // future image layers are allowed to be produced always for not yet flushed to disk
226 0 : // lsns stored in InMemoryLayer.
227 0 : let kind = name.kind();
228 0 : tracing::info!("found future {kind} layer {name} disk_consistent_lsn is {disk_consistent_lsn}");
229 0 : std::fs::remove_file(path)?;
230 0 : Ok(())
231 0 : }
232 :
233 0 : pub(super) fn cleanup_local_only_file(
234 0 : name: &LayerName,
235 0 : local: &LocalLayerFileMetadata,
236 0 : ) -> anyhow::Result<()> {
237 0 : let kind = name.kind();
238 0 : tracing::info!(
239 0 : "found local-only {kind} layer {name}, metadata {:?}",
240 : local.metadata
241 : );
242 0 : std::fs::remove_file(&local.local_path)?;
243 0 : Ok(())
244 0 : }
|