TLA Line data Source code
1 : use anyhow::Context;
2 : use camino::{Utf8Path, Utf8PathBuf};
3 : use pageserver_api::models::{
4 : HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
5 : };
6 : use pageserver_api::shard::ShardIndex;
7 : use std::ops::Range;
8 : use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
9 : use std::sync::{Arc, Weak};
10 : use std::time::SystemTime;
11 : use tracing::Instrument;
12 : use utils::lsn::Lsn;
13 : use utils::sync::heavier_once_cell;
14 :
15 : use crate::config::PageServerConf;
16 : use crate::context::RequestContext;
17 : use crate::repository::Key;
18 : use crate::tenant::{remote_timeline_client::LayerFileMetadata, RemoteTimelineClient, Timeline};
19 :
20 : use super::delta_layer::{self, DeltaEntry};
21 : use super::image_layer;
22 : use super::{
23 : AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc,
24 : ValueReconstructResult, ValueReconstructState,
25 : };
26 :
27 : use utils::generation::Generation;
28 :
29 : /// A Layer contains all data in a "rectangle" consisting of a range of keys and
30 : /// range of LSNs.
31 : ///
32 : /// There are two kinds of layers, in-memory and on-disk layers. In-memory
33 : /// layers are used to ingest incoming WAL, and provide fast access to the
34 : /// recent page versions. On-disk layers are stored as files on disk, and are
35 : /// immutable. This type represents the on-disk kind while in-memory kind are represented by
36 : /// [`InMemoryLayer`].
37 : ///
38 : /// Furthermore, there are two kinds of on-disk layers: delta and image layers.
39 : /// A delta layer contains all modifications within a range of LSNs and keys.
40 : /// An image layer is a snapshot of all the data in a key-range, at a single
41 : /// LSN.
42 : ///
43 : /// This type models the on-disk layers, which can be evicted and on-demand downloaded.
44 : ///
45 : /// [`InMemoryLayer`]: super::inmemory_layer::InMemoryLayer
46 CBC 31058985 : #[derive(Clone)]
47 : pub(crate) struct Layer(Arc<LayerInner>);
48 :
49 : impl std::fmt::Display for Layer {
50 31291 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 31291 : if matches!(self.0.generation, Generation::Broken) {
52 UBC 0 : write!(f, "{}-broken", self.layer_desc().short_id())
53 : } else {
54 CBC 31291 : write!(
55 31291 : f,
56 31291 : "{}{}",
57 31291 : self.layer_desc().short_id(),
58 31291 : self.0.generation.get_suffix()
59 31291 : )
60 : }
61 31291 : }
62 : }
63 :
64 : impl std::fmt::Debug for Layer {
65 UBC 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
66 0 : write!(f, "{}", self)
67 0 : }
68 : }
69 :
70 : impl AsLayerDesc for Layer {
71 CBC 47123277 : fn layer_desc(&self) -> &PersistentLayerDesc {
72 47123277 : self.0.layer_desc()
73 47123277 : }
74 : }
75 :
76 : impl Layer {
77 : /// Creates a layer value for a file we know to not be resident.
78 43720 : pub(crate) fn for_evicted(
79 43720 : conf: &'static PageServerConf,
80 43720 : timeline: &Arc<Timeline>,
81 43720 : file_name: LayerFileName,
82 43720 : metadata: LayerFileMetadata,
83 43720 : ) -> Self {
84 43720 : let desc = PersistentLayerDesc::from_filename(
85 43720 : timeline.tenant_shard_id,
86 43720 : timeline.timeline_id,
87 43720 : file_name,
88 43720 : metadata.file_size(),
89 43720 : );
90 43720 :
91 43720 : let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted);
92 43720 :
93 43720 : let owner = Layer(Arc::new(LayerInner::new(
94 43720 : conf,
95 43720 : timeline,
96 43720 : access_stats,
97 43720 : desc,
98 43720 : None,
99 43720 : metadata.generation,
100 43720 : metadata.shard,
101 43720 : )));
102 :
103 43720 : debug_assert!(owner.0.needs_download_blocking().unwrap().is_some());
104 :
105 43720 : owner
106 43720 : }
107 :
108 : /// Creates a Layer value for a file we know to be resident in timeline directory.
109 14085 : pub(crate) fn for_resident(
110 14085 : conf: &'static PageServerConf,
111 14085 : timeline: &Arc<Timeline>,
112 14085 : file_name: LayerFileName,
113 14085 : metadata: LayerFileMetadata,
114 14085 : ) -> ResidentLayer {
115 14085 : let desc = PersistentLayerDesc::from_filename(
116 14085 : timeline.tenant_shard_id,
117 14085 : timeline.timeline_id,
118 14085 : file_name,
119 14085 : metadata.file_size(),
120 14085 : );
121 14085 :
122 14085 : let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident);
123 14085 :
124 14085 : let mut resident = None;
125 14085 :
126 14085 : let owner = Layer(Arc::new_cyclic(|owner| {
127 14085 : let inner = Arc::new(DownloadedLayer {
128 14085 : owner: owner.clone(),
129 14085 : kind: tokio::sync::OnceCell::default(),
130 14085 : version: 0,
131 14085 : });
132 14085 : resident = Some(inner.clone());
133 14085 :
134 14085 : LayerInner::new(
135 14085 : conf,
136 14085 : timeline,
137 14085 : access_stats,
138 14085 : desc,
139 14085 : Some(inner),
140 14085 : metadata.generation,
141 14085 : metadata.shard,
142 14085 : )
143 14085 : }));
144 14085 :
145 14085 : let downloaded = resident.expect("just initialized");
146 :
147 14085 : debug_assert!(owner.0.needs_download_blocking().unwrap().is_none());
148 :
149 14085 : timeline
150 14085 : .metrics
151 14085 : .resident_physical_size_add(metadata.file_size());
152 14085 :
153 14085 : ResidentLayer { downloaded, owner }
154 14085 : }
155 :
156 : /// Creates a Layer value for freshly written out new layer file by renaming it from a
157 : /// temporary path.
158 20448 : pub(crate) fn finish_creating(
159 20448 : conf: &'static PageServerConf,
160 20448 : timeline: &Arc<Timeline>,
161 20448 : desc: PersistentLayerDesc,
162 20448 : temp_path: &Utf8Path,
163 20448 : ) -> anyhow::Result<ResidentLayer> {
164 20448 : let mut resident = None;
165 20448 :
166 20448 : let owner = Layer(Arc::new_cyclic(|owner| {
167 20448 : let inner = Arc::new(DownloadedLayer {
168 20448 : owner: owner.clone(),
169 20448 : kind: tokio::sync::OnceCell::default(),
170 20448 : version: 0,
171 20448 : });
172 20448 : resident = Some(inner.clone());
173 20448 : let access_stats = LayerAccessStats::empty_will_record_residence_event_later();
174 20448 : access_stats.record_residence_event(
175 20448 : LayerResidenceStatus::Resident,
176 20448 : LayerResidenceEventReason::LayerCreate,
177 20448 : );
178 20448 : LayerInner::new(
179 20448 : conf,
180 20448 : timeline,
181 20448 : access_stats,
182 20448 : desc,
183 20448 : Some(inner),
184 20448 : timeline.generation,
185 20448 : timeline.get_shard_index(),
186 20448 : )
187 20448 : }));
188 20448 :
189 20448 : let downloaded = resident.expect("just initialized");
190 20448 :
191 20448 : // if the rename works, the path is as expected
192 20448 : std::fs::rename(temp_path, owner.local_path())
193 20448 : .with_context(|| format!("rename temporary file as correct path for {owner}"))?;
194 :
195 20448 : Ok(ResidentLayer { downloaded, owner })
196 20448 : }
197 :
198 : /// Requests the layer to be evicted and waits for this to be done.
199 : ///
200 : /// If the file is not resident, an [`EvictionError::NotFound`] is returned.
201 : ///
202 : /// If for a bad luck or blocking of the executor, we miss the actual eviction and the layer is
203 : /// re-downloaded, [`EvictionError::Downloaded`] is returned.
204 : ///
205 : /// Technically cancellation safe, but cancelling might shift the viewpoint of what generation
206 : /// of download-evict cycle on retry.
207 2539 : pub(crate) async fn evict_and_wait(
208 2539 : &self,
209 2539 : rtc: &RemoteTimelineClient,
210 2539 : ) -> Result<(), EvictionError> {
211 2539 : self.0.evict_and_wait(rtc).await
212 2539 : }
213 :
214 : /// Delete the layer file when the `self` gets dropped, also try to schedule a remote index upload
215 : /// then.
216 : ///
217 : /// On drop, this will cause a call to [`RemoteTimelineClient::schedule_deletion_of_unlinked`].
218 : /// This means that the unlinking by [gc] or [compaction] must have happened strictly before
219 : /// the value this is called on gets dropped.
220 : ///
221 : /// This is ensured by both of those methods accepting references to Layer.
222 : ///
223 : /// [gc]: [`RemoteTimelineClient::schedule_gc_update`]
224 : /// [compaction]: [`RemoteTimelineClient::schedule_compaction_update`]
225 5005 : pub(crate) fn delete_on_drop(&self) {
226 5005 : self.0.delete_on_drop();
227 5005 : }
228 :
229 : /// Return data needed to reconstruct given page at LSN.
230 : ///
231 : /// It is up to the caller to collect more data from the previous layer and
232 : /// perform WAL redo, if necessary.
233 : ///
234 : /// # Cancellation-Safety
235 : ///
236 : /// This method is cancellation-safe.
237 15454855 : pub(crate) async fn get_value_reconstruct_data(
238 15454855 : &self,
239 15454855 : key: Key,
240 15454855 : lsn_range: Range<Lsn>,
241 15454855 : reconstruct_data: &mut ValueReconstructState,
242 15454855 : ctx: &RequestContext,
243 15454875 : ) -> anyhow::Result<ValueReconstructResult> {
244 : use anyhow::ensure;
245 :
246 15454875 : let layer = self.0.get_or_maybe_download(true, Some(ctx)).await?;
247 15454865 : self.0
248 15454865 : .access_stats
249 15454865 : .record_access(LayerAccessKind::GetValueReconstructData, ctx);
250 15454865 :
251 15454865 : if self.layer_desc().is_delta {
252 15002765 : ensure!(lsn_range.start >= self.layer_desc().lsn_range.start);
253 15002765 : ensure!(self.layer_desc().key_range.contains(&key));
254 : } else {
255 452100 : ensure!(self.layer_desc().key_range.contains(&key));
256 452100 : ensure!(lsn_range.start >= self.layer_desc().image_layer_lsn());
257 452100 : ensure!(lsn_range.end >= self.layer_desc().image_layer_lsn());
258 : }
259 :
260 15454865 : layer
261 15454865 : .get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
262 15454865 : .instrument(tracing::debug_span!("get_value_reconstruct_data", layer=%self))
263 862444 : .await
264 15454863 : .with_context(|| format!("get_value_reconstruct_data for layer {self}"))
265 15454864 : }
266 :
267 : /// Download the layer if evicted.
268 : ///
269 : /// Will not error when the layer is already downloaded.
270 12 : pub(crate) async fn download(&self) -> anyhow::Result<()> {
271 32 : self.0.get_or_maybe_download(true, None).await?;
272 7 : Ok(())
273 12 : }
274 :
275 : /// Assuming the layer is already downloaded, returns a guard which will prohibit eviction
276 : /// while the guard exists.
277 : ///
278 : /// Returns None if the layer is currently evicted.
279 4795 : pub(crate) async fn keep_resident(&self) -> anyhow::Result<Option<ResidentLayer>> {
280 4795 : let downloaded = match self.0.get_or_maybe_download(false, None).await {
281 4171 : Ok(d) => d,
282 : // technically there are a lot of possible errors, but in practice it should only be
283 : // DownloadRequired which is tripped up. could work to improve this situation
284 : // statically later.
285 624 : Err(DownloadError::DownloadRequired) => return Ok(None),
286 UBC 0 : Err(e) => return Err(e.into()),
287 : };
288 :
289 CBC 4171 : Ok(Some(ResidentLayer {
290 4171 : downloaded,
291 4171 : owner: self.clone(),
292 4171 : }))
293 4795 : }
294 :
295 : /// Downloads if necessary and creates a guard, which will keep this layer from being evicted.
296 3710 : pub(crate) async fn download_and_keep_resident(&self) -> anyhow::Result<ResidentLayer> {
297 3710 : let downloaded = self.0.get_or_maybe_download(true, None).await?;
298 :
299 3710 : Ok(ResidentLayer {
300 3710 : downloaded,
301 3710 : owner: self.clone(),
302 3710 : })
303 3710 : }
304 :
305 2974 : pub(crate) fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
306 2974 : self.0.info(reset)
307 2974 : }
308 :
309 4170 : pub(crate) fn access_stats(&self) -> &LayerAccessStats {
310 4170 : &self.0.access_stats
311 4170 : }
312 :
313 21343 : pub(crate) fn local_path(&self) -> &Utf8Path {
314 21343 : &self.0.path
315 21343 : }
316 :
317 20463 : pub(crate) fn metadata(&self) -> LayerFileMetadata {
318 20463 : self.0.metadata()
319 20463 : }
320 :
321 : /// Traditional debug dumping facility
322 : #[allow(unused)]
323 2 : pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> {
324 2 : self.0.desc.dump();
325 2 :
326 2 : if verbose {
327 : // for now, unconditionally download everything, even if that might not be wanted.
328 2 : let l = self.0.get_or_maybe_download(true, Some(ctx)).await?;
329 2 : l.dump(&self.0, ctx).await?
330 UBC 0 : }
331 :
332 CBC 2 : Ok(())
333 2 : }
334 :
335 : /// Waits until this layer has been dropped (and if needed, local file deletion and remote
336 : /// deletion scheduling has completed).
337 : ///
338 : /// Does not start local deletion, use [`Self::delete_on_drop`] for that
339 : /// separatedly.
340 : #[cfg(feature = "testing")]
341 702 : pub(crate) fn wait_drop(&self) -> impl std::future::Future<Output = ()> + 'static {
342 702 : let mut rx = self.0.status.subscribe();
343 :
344 702 : async move {
345 : loop {
346 702 : if let Err(tokio::sync::broadcast::error::RecvError::Closed) = rx.recv().await {
347 702 : break;
348 UBC 0 : }
349 : }
350 CBC 702 : }
351 702 : }
352 : }
353 :
354 : /// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted.
355 : ///
356 : /// However when we want something evicted, we cannot evict it right away as there might be current
357 : /// reads happening on it. For example: it has been searched from [`LayerMap::search`] but not yet
358 : /// read with [`Layer::get_value_reconstruct_data`].
359 : ///
360 : /// [`LayerMap::search`]: crate::tenant::layer_map::LayerMap::search
361 UBC 0 : #[derive(Debug)]
362 : enum ResidentOrWantedEvicted {
363 : Resident(Arc<DownloadedLayer>),
364 : WantedEvicted(Weak<DownloadedLayer>, usize),
365 : }
366 :
367 : impl ResidentOrWantedEvicted {
368 CBC 15462735 : fn get_and_upgrade(&mut self) -> Option<(Arc<DownloadedLayer>, bool)> {
369 15462735 : match self {
370 15462735 : ResidentOrWantedEvicted::Resident(strong) => Some((strong.clone(), false)),
371 UBC 0 : ResidentOrWantedEvicted::WantedEvicted(weak, _) => match weak.upgrade() {
372 0 : Some(strong) => {
373 0 : LAYER_IMPL_METRICS.inc_raced_wanted_evicted_accesses();
374 0 :
375 0 : *self = ResidentOrWantedEvicted::Resident(strong.clone());
376 0 :
377 0 : Some((strong, true))
378 : }
379 0 : None => None,
380 : },
381 : }
382 CBC 15462735 : }
383 :
384 : /// When eviction is first requested, drop down to holding a [`Weak`].
385 : ///
386 : /// Returns `Some` if this was the first time eviction was requested. Care should be taken to
387 : /// drop the possibly last strong reference outside of the mutex of
388 : /// heavier_once_cell::OnceCell.
389 2538 : fn downgrade(&mut self) -> Option<Arc<DownloadedLayer>> {
390 2538 : match self {
391 2538 : ResidentOrWantedEvicted::Resident(strong) => {
392 2538 : let weak = Arc::downgrade(strong);
393 2538 : let mut temp = ResidentOrWantedEvicted::WantedEvicted(weak, strong.version);
394 2538 : std::mem::swap(self, &mut temp);
395 2538 : match temp {
396 2538 : ResidentOrWantedEvicted::Resident(strong) => Some(strong),
397 UBC 0 : ResidentOrWantedEvicted::WantedEvicted(..) => unreachable!("just swapped"),
398 : }
399 : }
400 0 : ResidentOrWantedEvicted::WantedEvicted(..) => None,
401 : }
402 CBC 2538 : }
403 : }
404 :
405 : struct LayerInner {
406 : /// Only needed to check ondemand_download_behavior_treat_error_as_warn and creation of
407 : /// [`Self::path`].
408 : conf: &'static PageServerConf,
409 :
410 : /// Full path to the file; unclear if this should exist anymore.
411 : path: Utf8PathBuf,
412 :
413 : desc: PersistentLayerDesc,
414 :
415 : /// Timeline access is needed for remote timeline client and metrics.
416 : timeline: Weak<Timeline>,
417 :
418 : /// Cached knowledge of [`Timeline::remote_client`] being `Some`.
419 : have_remote_client: bool,
420 :
421 : access_stats: LayerAccessStats,
422 :
423 : /// This custom OnceCell is backed by std mutex, but only held for short time periods.
424 : /// Initialization and deinitialization are done while holding a permit.
425 : inner: heavier_once_cell::OnceCell<ResidentOrWantedEvicted>,
426 :
427 : /// Do we want to delete locally and remotely this when `LayerInner` is dropped
428 : wanted_deleted: AtomicBool,
429 :
430 : /// Do we want to evict this layer as soon as possible? After being set to `true`, all accesses
431 : /// will try to downgrade [`ResidentOrWantedEvicted`], which will eventually trigger
432 : /// [`LayerInner::on_downloaded_layer_drop`].
433 : wanted_evicted: AtomicBool,
434 :
435 : /// Version is to make sure we will only evict a specific download of a file.
436 : ///
437 : /// Incremented for each download, stored in `DownloadedLayer::version` or
438 : /// `ResidentOrWantedEvicted::WantedEvicted`.
439 : version: AtomicUsize,
440 :
441 : /// Allow subscribing to when the layer actually gets evicted.
442 : status: tokio::sync::broadcast::Sender<Status>,
443 :
444 : /// Counter for exponential backoff with the download
445 : consecutive_failures: AtomicUsize,
446 :
447 : /// The generation of this Layer.
448 : ///
449 : /// For loaded layers (resident or evicted) this comes from [`LayerFileMetadata::generation`],
450 : /// for created layers from [`Timeline::generation`].
451 : generation: Generation,
452 :
453 : /// The shard of this Layer.
454 : ///
455 : /// For layers created in this process, this will always be the [`ShardIndex`] of the
456 : /// current `ShardIdentity`` (TODO: add link once it's introduced).
457 : ///
458 : /// For loaded layers, this may be some other value if the tenant has undergone
459 : /// a shard split since the layer was originally written.
460 : shard: ShardIndex,
461 :
462 : last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,
463 : }
464 :
465 : impl std::fmt::Display for LayerInner {
466 21984 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
467 21984 : write!(f, "{}", self.layer_desc().short_id())
468 21984 : }
469 : }
470 :
471 : impl AsLayerDesc for LayerInner {
472 47166786 : fn layer_desc(&self) -> &PersistentLayerDesc {
473 47166786 : &self.desc
474 47166786 : }
475 : }
476 :
477 2538 : #[derive(Debug, Clone, Copy)]
478 : enum Status {
479 : Evicted,
480 : Downloaded,
481 : }
482 :
483 : impl Drop for LayerInner {
484 46995 : fn drop(&mut self) {
485 46995 : if !*self.wanted_deleted.get_mut() {
486 : // should we try to evict if the last wish was for eviction?
487 : // feels like there's some hazard of overcrowding near shutdown near by, but we don't
488 : // run drops during shutdown (yet)
489 42690 : return;
490 4305 : }
491 :
492 4305 : let span = tracing::info_span!(parent: None, "layer_delete", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);
493 :
494 4305 : let path = std::mem::take(&mut self.path);
495 4305 : let file_name = self.layer_desc().filename();
496 4305 : let file_size = self.layer_desc().file_size;
497 4305 : let timeline = self.timeline.clone();
498 4305 : let meta = self.metadata();
499 4305 : let status = self.status.clone();
500 4305 :
501 4305 : crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
502 4305 : let _g = span.entered();
503 4305 :
504 4305 : // carry this until we are finished for [`Layer::wait_drop`] support
505 4305 : let _status = status;
506 :
507 4305 : let removed = match std::fs::remove_file(path) {
508 4303 : Ok(()) => true,
509 2 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
510 2 : // until we no longer do detaches by removing all local files before removing the
511 2 : // tenant from the global map, we will always get these errors even if we knew what
512 2 : // is the latest state.
513 2 : //
514 2 : // we currently do not track the latest state, so we'll also end up here on evicted
515 2 : // layers.
516 2 : false
517 : }
518 UBC 0 : Err(e) => {
519 0 : tracing::error!("failed to remove wanted deleted layer: {e}");
520 0 : LAYER_IMPL_METRICS.inc_delete_removes_failed();
521 0 : false
522 : }
523 : };
524 :
525 CBC 4305 : if let Some(timeline) = timeline.upgrade() {
526 4305 : if removed {
527 4303 : timeline.metrics.resident_physical_size_sub(file_size);
528 4303 : }
529 4305 : if let Some(remote_client) = timeline.remote_client.as_ref() {
530 4305 : let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
531 :
532 4305 : if let Err(e) = res {
533 : // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
534 : // demonstrating this deadlock (without spawn_blocking): stop will drop
535 : // queued items, which will have ResidentLayer's, and those drops would try
536 : // to re-entrantly lock the RemoteTimelineClient inner state.
537 7 : if !timeline.is_active() {
538 7 : tracing::info!("scheduling deletion on drop failed: {e:#}");
539 : } else {
540 UBC 0 : tracing::warn!("scheduling deletion on drop failed: {e:#}");
541 : }
542 CBC 7 : LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
543 4298 : } else {
544 4298 : LAYER_IMPL_METRICS.inc_completed_deletes();
545 4298 : }
546 UBC 0 : }
547 0 : } else {
548 0 : // no need to nag that timeline is gone: under normal situation on
549 0 : // task_mgr::remove_tenant_from_memory the timeline is gone before we get dropped.
550 0 : LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);
551 0 : }
552 CBC 4305 : });
553 46995 : }
554 : }
555 :
556 : impl LayerInner {
557 78253 : fn new(
558 78253 : conf: &'static PageServerConf,
559 78253 : timeline: &Arc<Timeline>,
560 78253 : access_stats: LayerAccessStats,
561 78253 : desc: PersistentLayerDesc,
562 78253 : downloaded: Option<Arc<DownloadedLayer>>,
563 78253 : generation: Generation,
564 78253 : shard: ShardIndex,
565 78253 : ) -> Self {
566 78253 : let path = conf
567 78253 : .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
568 78253 : .join(desc.filename().to_string());
569 :
570 78253 : let (inner, version) = if let Some(inner) = downloaded {
571 34533 : let version = inner.version;
572 34533 : let resident = ResidentOrWantedEvicted::Resident(inner);
573 34533 : (heavier_once_cell::OnceCell::new(resident), version)
574 : } else {
575 43720 : (heavier_once_cell::OnceCell::default(), 0)
576 : };
577 :
578 78253 : LayerInner {
579 78253 : conf,
580 78253 : path,
581 78253 : desc,
582 78253 : timeline: Arc::downgrade(timeline),
583 78253 : have_remote_client: timeline.remote_client.is_some(),
584 78253 : access_stats,
585 78253 : wanted_deleted: AtomicBool::new(false),
586 78253 : wanted_evicted: AtomicBool::new(false),
587 78253 : inner,
588 78253 : version: AtomicUsize::new(version),
589 78253 : status: tokio::sync::broadcast::channel(1).0,
590 78253 : consecutive_failures: AtomicUsize::new(0),
591 78253 : generation,
592 78253 : shard,
593 78253 : last_evicted_at: std::sync::Mutex::default(),
594 78253 : }
595 78253 : }
596 :
597 5005 : fn delete_on_drop(&self) {
598 5005 : let res =
599 5005 : self.wanted_deleted
600 5005 : .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed);
601 5005 :
602 5005 : if res.is_ok() {
603 5005 : LAYER_IMPL_METRICS.inc_started_deletes();
604 5005 : }
605 5005 : }
606 :
607 : /// Cancellation safe, however dropping the future and calling this method again might result
608 : /// in a new attempt to evict OR join the previously started attempt.
609 2539 : pub(crate) async fn evict_and_wait(
610 2539 : &self,
611 2539 : _: &RemoteTimelineClient,
612 2539 : ) -> Result<(), EvictionError> {
613 2539 : use tokio::sync::broadcast::error::RecvError;
614 2539 :
615 2539 : assert!(self.have_remote_client);
616 :
617 2539 : let mut rx = self.status.subscribe();
618 :
619 2538 : let strong = {
620 2539 : match self.inner.get() {
621 2538 : Some(mut either) => {
622 2538 : self.wanted_evicted.store(true, Ordering::Relaxed);
623 2538 : either.downgrade()
624 : }
625 1 : None => return Err(EvictionError::NotFound),
626 : }
627 : };
628 :
629 2538 : if strong.is_some() {
630 2538 : // drop the DownloadedLayer outside of the holding the guard
631 2538 : drop(strong);
632 2538 : LAYER_IMPL_METRICS.inc_started_evictions();
633 2538 : }
634 :
635 2538 : match rx.recv().await {
636 2538 : Ok(Status::Evicted) => Ok(()),
637 UBC 0 : Ok(Status::Downloaded) => Err(EvictionError::Downloaded),
638 : Err(RecvError::Closed) => {
639 0 : unreachable!("sender cannot be dropped while we are in &self method")
640 : }
641 : Err(RecvError::Lagged(_)) => {
642 : // this is quite unlikely, but we are blocking a lot in the async context, so
643 : // we might be missing this because we are stuck on a LIFO slot on a thread
644 : // which is busy blocking for a 1TB database create_image_layers.
645 : //
646 : // use however late (compared to the initial expressing of wanted) as the
647 : // "outcome" now
648 0 : LAYER_IMPL_METRICS.inc_broadcast_lagged();
649 0 : match self.inner.get() {
650 0 : Some(_) => Err(EvictionError::Downloaded),
651 0 : None => Ok(()),
652 : }
653 : }
654 : }
655 CBC 2539 : }
656 :
657 : /// Cancellation safe.
658 15463374 : async fn get_or_maybe_download(
659 15463374 : self: &Arc<Self>,
660 15463374 : allow_download: bool,
661 15463374 : ctx: Option<&RequestContext>,
662 15463394 : ) -> Result<Arc<DownloadedLayer>, DownloadError> {
663 15463394 : let mut init_permit = None;
664 :
665 : loop {
666 15463394 : let download = move |permit| {
667 10035 : async move {
668 10035 : // disable any scheduled but not yet running eviction deletions for this
669 10035 : let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);
670 10035 :
671 10035 : // count cancellations, which currently remain largely unexpected
672 10035 : let init_cancelled =
673 10035 : scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
674 10035 :
675 10035 : // no need to make the evict_and_wait wait for the actual download to complete
676 10035 : drop(self.status.send(Status::Downloaded));
677 :
678 10035 : let timeline = self
679 10035 : .timeline
680 10035 : .upgrade()
681 10035 : .ok_or_else(|| DownloadError::TimelineShutdown)?;
682 :
683 : // FIXME: grab a gate
684 :
685 10035 : let can_ever_evict = timeline.remote_client.as_ref().is_some();
686 :
687 : // check if we really need to be downloaded; could have been already downloaded by a
688 : // cancelled previous attempt.
689 10035 : let needs_download = self
690 10035 : .needs_download()
691 9589 : .await
692 10035 : .map_err(DownloadError::PreStatFailed)?;
693 :
694 10035 : let permit = if let Some(reason) = needs_download {
695 10035 : if let NeedsDownload::NotFile(ft) = reason {
696 UBC 0 : return Err(DownloadError::NotFile(ft));
697 CBC 10035 : }
698 10035 :
699 10035 : // only reset this after we've decided we really need to download. otherwise it'd
700 10035 : // be impossible to mark cancelled downloads for eviction, like one could imagine
701 10035 : // we would like to do for prefetching which was not needed.
702 10035 : self.wanted_evicted.store(false, Ordering::Release);
703 10035 :
704 10035 : if !can_ever_evict {
705 UBC 0 : return Err(DownloadError::NoRemoteStorage);
706 CBC 10035 : }
707 :
708 10035 : if let Some(ctx) = ctx {
709 9396 : self.check_expected_download(ctx)?;
710 639 : }
711 :
712 10035 : if !allow_download {
713 : // this does look weird, but for LayerInner the "downloading" means also changing
714 : // internal once related state ...
715 624 : return Err(DownloadError::DownloadRequired);
716 9411 : }
717 9411 :
718 9411 : tracing::info!(%reason, "downloading on-demand");
719 :
720 19664 : self.spawn_download_and_wait(timeline, permit).await?
721 : } else {
722 : // the file is present locally, probably by a previous but cancelled call to
723 : // get_or_maybe_download. alternatively we might be running without remote storage.
724 UBC 0 : LAYER_IMPL_METRICS.inc_init_needed_no_download();
725 0 :
726 0 : permit
727 : };
728 :
729 CBC 9396 : let since_last_eviction =
730 9396 : self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
731 9396 : if let Some(since_last_eviction) = since_last_eviction {
732 105 : // FIXME: this will not always be recorded correctly until #6028 (the no
733 105 : // download needed branch above)
734 105 : LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
735 9291 : }
736 :
737 9396 : let res = Arc::new(DownloadedLayer {
738 9396 : owner: Arc::downgrade(self),
739 9396 : kind: tokio::sync::OnceCell::default(),
740 9396 : version: next_version,
741 9396 : });
742 9396 :
743 9396 : self.access_stats.record_residence_event(
744 9396 : LayerResidenceStatus::Resident,
745 9396 : LayerResidenceEventReason::ResidenceChange,
746 9396 : );
747 9396 :
748 9396 : let waiters = self.inner.initializer_count();
749 9396 : if waiters > 0 {
750 318 : tracing::info!(
751 318 : waiters,
752 318 : "completing the on-demand download for other tasks"
753 318 : );
754 9078 : }
755 :
756 9396 : scopeguard::ScopeGuard::into_inner(init_cancelled);
757 9396 :
758 9396 : Ok((ResidentOrWantedEvicted::Resident(res), permit))
759 10026 : }
760 10035 : .instrument(tracing::info_span!("get_or_maybe_download", layer=%self))
761 10035 : };
762 :
763 15463394 : if let Some(init_permit) = init_permit.take() {
764 : // use the already held initialization permit because it is impossible to hit the
765 : // below paths anymore essentially limiting the max loop iterations to 2.
766 UBC 0 : let (value, init_permit) = download(init_permit).await?;
767 0 : let mut guard = self.inner.set(value, init_permit);
768 0 : let (strong, _upgraded) = guard
769 0 : .get_and_upgrade()
770 0 : .expect("init creates strong reference, we held the init permit");
771 0 : return Ok(strong);
772 CBC 15463394 : }
773 :
774 UBC 0 : let (weak, permit) = {
775 CBC 15463394 : let mut locked = self.inner.get_or_init(download).await?;
776 :
777 15462755 : if let Some((strong, upgraded)) = locked.get_and_upgrade() {
778 15462755 : if upgraded {
779 UBC 0 : // when upgraded back, the Arc<DownloadedLayer> is still available, but
780 0 : // previously a `evict_and_wait` was received.
781 0 : self.wanted_evicted.store(false, Ordering::Relaxed);
782 0 :
783 0 : // error out any `evict_and_wait`
784 0 : drop(self.status.send(Status::Downloaded));
785 0 : LAYER_IMPL_METRICS
786 0 : .inc_eviction_cancelled(EvictionCancelled::UpgradedBackOnAccess);
787 CBC 15462755 : }
788 :
789 15462755 : return Ok(strong);
790 : } else {
791 : // path to here: the evict_blocking is stuck on spawn_blocking queue.
792 : //
793 : // reset the contents, deactivating the eviction and causing a
794 : // EvictionCancelled::LostToDownload or EvictionCancelled::VersionCheckFailed.
795 UBC 0 : locked.take_and_deinit()
796 0 : }
797 0 : };
798 0 :
799 0 : // unlock first, then drop the weak, but because upgrade failed, we
800 0 : // know it cannot be a problem.
801 0 :
802 0 : assert!(
803 0 : matches!(weak, ResidentOrWantedEvicted::WantedEvicted(..)),
804 0 : "unexpected {weak:?}, ResidentOrWantedEvicted::get_and_upgrade has a bug"
805 : );
806 :
807 0 : init_permit = Some(permit);
808 0 :
809 0 : LAYER_IMPL_METRICS.inc_retried_get_or_maybe_download();
810 : }
811 CBC 15463385 : }
812 :
813 : /// Nag or fail per RequestContext policy
814 9396 : fn check_expected_download(&self, ctx: &RequestContext) -> Result<(), DownloadError> {
815 9396 : use crate::context::DownloadBehavior::*;
816 9396 : let b = ctx.download_behavior();
817 9396 : match b {
818 9396 : Download => Ok(()),
819 : Warn | Error => {
820 UBC 0 : tracing::info!(
821 0 : "unexpectedly on-demand downloading for task kind {:?}",
822 0 : ctx.task_kind()
823 0 : );
824 0 : crate::metrics::UNEXPECTED_ONDEMAND_DOWNLOADS.inc();
825 :
826 0 : let really_error =
827 0 : matches!(b, Error) && !self.conf.ondemand_download_behavior_treat_error_as_warn;
828 :
829 0 : if really_error {
830 : // this check is only probablistic, seems like flakyness footgun
831 0 : Err(DownloadError::ContextAndConfigReallyDeniesDownloads)
832 : } else {
833 0 : Ok(())
834 : }
835 : }
836 : }
837 CBC 9396 : }
838 :
839 : /// Actual download, at most one is executed at the time.
840 9411 : async fn spawn_download_and_wait(
841 9411 : self: &Arc<Self>,
842 9411 : timeline: Arc<Timeline>,
843 9411 : permit: heavier_once_cell::InitPermit,
844 9411 : ) -> Result<heavier_once_cell::InitPermit, DownloadError> {
845 9411 : let task_name = format!("download layer {}", self);
846 9411 :
847 9411 : let (tx, rx) = tokio::sync::oneshot::channel();
848 9411 :
849 9411 : // this is sadly needed because of task_mgr::shutdown_tasks, otherwise we cannot
850 9411 : // block tenant::mgr::remove_tenant_from_memory.
851 9411 :
852 9411 : let this: Arc<Self> = self.clone();
853 9411 :
854 9411 : crate::task_mgr::spawn(
855 9411 : &tokio::runtime::Handle::current(),
856 9411 : crate::task_mgr::TaskKind::RemoteDownloadTask,
857 9411 : Some(self.desc.tenant_shard_id),
858 9411 : Some(self.desc.timeline_id),
859 9411 : &task_name,
860 9411 : false,
861 9411 : async move {
862 9411 :
863 9411 : let client = timeline
864 9411 : .remote_client
865 9411 : .as_ref()
866 9411 : .expect("checked above with have_remote_client");
867 :
868 9411 : let result = client.download_layer_file(
869 9411 : &this.desc.filename(),
870 9411 : &this.metadata(),
871 9411 : &crate::task_mgr::shutdown_token()
872 9411 : )
873 376826 : .await;
874 :
875 9408 : let result = match result {
876 9397 : Ok(size) => {
877 9397 : timeline.metrics.resident_physical_size_add(size);
878 9397 : Ok(())
879 : }
880 11 : Err(e) => {
881 11 : let consecutive_failures =
882 11 : this.consecutive_failures.fetch_add(1, Ordering::Relaxed);
883 11 :
884 11 : let backoff = utils::backoff::exponential_backoff_duration_seconds(
885 11 : consecutive_failures.min(u32::MAX as usize) as u32,
886 11 : 1.5,
887 11 : 60.0,
888 11 : );
889 11 :
890 11 : let backoff = std::time::Duration::from_secs_f64(backoff);
891 11 :
892 17 : tokio::select! {
893 17 : _ = tokio::time::sleep(backoff) => {},
894 17 : _ = crate::task_mgr::shutdown_token().cancelled_owned() => {},
895 17 : _ = timeline.cancel.cancelled() => {},
896 17 : };
897 :
898 11 : Err(e)
899 : }
900 : };
901 :
902 9408 : if let Err(res) = tx.send((result, permit)) {
903 6 : match res {
904 1 : (Ok(()), _) => {
905 1 : // our caller is cancellation safe so this is fine; if someone
906 1 : // else requests the layer, they'll find it already downloaded.
907 1 : //
908 1 : // See counter [`LayerImplMetrics::inc_init_needed_no_download`]
909 1 : //
910 1 : // FIXME(#6028): however, could be that we should consider marking the
911 1 : // layer for eviction? alas, cannot: because only DownloadedLayer will
912 1 : // handle that.
913 1 : },
914 5 : (Err(e), _) => {
915 5 : // our caller is cancellation safe, but we might be racing with
916 5 : // another attempt to initialize. before we have cancellation
917 5 : // token support: these attempts should converge regardless of
918 5 : // their completion order.
919 5 : tracing::error!("layer file download failed, and additionally failed to communicate this to caller: {e:?}");
920 5 : LAYER_IMPL_METRICS.inc_download_failed_without_requester();
921 : }
922 : }
923 9402 : }
924 :
925 9408 : Ok(())
926 9411 : }
927 9411 : .in_current_span(),
928 9411 : );
929 10713 : match rx.await {
930 9396 : Ok((Ok(()), permit)) => {
931 9396 : if let Some(reason) = self
932 9396 : .needs_download()
933 8951 : .await
934 9396 : .map_err(DownloadError::PostStatFailed)?
935 : {
936 : // this is really a bug in needs_download or remote timeline client
937 UBC 0 : panic!("post-condition failed: needs_download returned {reason:?}");
938 CBC 9396 : }
939 9396 :
940 9396 : self.consecutive_failures.store(0, Ordering::Relaxed);
941 9396 : tracing::info!("on-demand download successful");
942 :
943 9396 : Ok(permit)
944 : }
945 6 : Ok((Err(e), _permit)) => {
946 6 : // sleep already happened in the spawned task, if it was not cancelled
947 6 : let consecutive_failures = self.consecutive_failures.load(Ordering::Relaxed);
948 6 : tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
949 6 : Err(DownloadError::DownloadFailed)
950 : }
951 UBC 0 : Err(_gone) => Err(DownloadError::DownloadCancelled),
952 : }
953 CBC 9402 : }
954 :
955 19431 : async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
956 19431 : match tokio::fs::metadata(&self.path).await {
957 9396 : Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),
958 10035 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),
959 UBC 0 : Err(e) => Err(e),
960 : }
961 CBC 19431 : }
962 :
963 57805 : fn needs_download_blocking(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
964 57805 : match self.path.metadata() {
965 14085 : Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),
966 43720 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),
967 UBC 0 : Err(e) => Err(e),
968 : }
969 CBC 57805 : }
970 :
971 23481 : fn is_file_present_and_good_size(&self, m: &std::fs::Metadata) -> Result<(), NeedsDownload> {
972 23481 : // in future, this should include sha2-256 validation of the file.
973 23481 : if !m.is_file() {
974 UBC 0 : Err(NeedsDownload::NotFile(m.file_type()))
975 CBC 23481 : } else if m.len() != self.desc.file_size {
976 UBC 0 : Err(NeedsDownload::WrongSize {
977 0 : actual: m.len(),
978 0 : expected: self.desc.file_size,
979 0 : })
980 : } else {
981 CBC 23481 : Ok(())
982 : }
983 23481 : }
984 :
985 2974 : fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
986 2974 : let layer_file_name = self.desc.filename().file_name();
987 2974 :
988 2974 : // this is not accurate: we could have the file locally but there was a cancellation
989 2974 : // and now we are not in sync, or we are currently downloading it.
990 2974 : let remote = self.inner.get().is_none();
991 2974 :
992 2974 : let access_stats = self.access_stats.as_api_model(reset);
993 2974 :
994 2974 : if self.desc.is_delta {
995 2393 : let lsn_range = &self.desc.lsn_range;
996 2393 :
997 2393 : HistoricLayerInfo::Delta {
998 2393 : layer_file_name,
999 2393 : layer_file_size: self.desc.file_size,
1000 2393 : lsn_start: lsn_range.start,
1001 2393 : lsn_end: lsn_range.end,
1002 2393 : remote,
1003 2393 : access_stats,
1004 2393 : }
1005 : } else {
1006 581 : let lsn = self.desc.image_layer_lsn();
1007 581 :
1008 581 : HistoricLayerInfo::Image {
1009 581 : layer_file_name,
1010 581 : layer_file_size: self.desc.file_size,
1011 581 : lsn_start: lsn,
1012 581 : remote,
1013 581 : access_stats,
1014 581 : }
1015 : }
1016 2974 : }
1017 :
1018 : /// `DownloadedLayer` is being dropped, so it calls this method.
1019 2538 : fn on_downloaded_layer_drop(self: Arc<LayerInner>, version: usize) {
1020 2538 : let delete = self.wanted_deleted.load(Ordering::Acquire);
1021 2538 : let evict = self.wanted_evicted.load(Ordering::Acquire);
1022 2538 : let can_evict = self.have_remote_client;
1023 2538 :
1024 2538 : if delete {
1025 UBC 0 : // do nothing now, only in LayerInner::drop -- this was originally implemented because
1026 0 : // we could had already scheduled the deletion at the time.
1027 0 : //
1028 0 : // FIXME: this is not true anymore, we can safely evict wanted deleted files.
1029 CBC 2538 : } else if can_evict && evict {
1030 2538 : let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, %version);
1031 :
1032 : // downgrade for queueing, in case there's a tear down already ongoing we should not
1033 : // hold it alive.
1034 2538 : let this = Arc::downgrade(&self);
1035 2538 : drop(self);
1036 2538 :
1037 2538 : // NOTE: this scope *must* never call `self.inner.get` because evict_and_wait might
1038 2538 : // drop while the `self.inner` is being locked, leading to a deadlock.
1039 2538 :
1040 2538 : crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
1041 2538 : let _g = span.entered();
1042 :
1043 : // if LayerInner is already dropped here, do nothing because the delete on drop
1044 : // has already ran while we were in queue
1045 2538 : let Some(this) = this.upgrade() else {
1046 UBC 0 : LAYER_IMPL_METRICS.inc_eviction_cancelled(EvictionCancelled::LayerGone);
1047 0 : return;
1048 : };
1049 CBC 2538 : match this.evict_blocking(version) {
1050 2538 : Ok(()) => LAYER_IMPL_METRICS.inc_completed_evictions(),
1051 UBC 0 : Err(reason) => LAYER_IMPL_METRICS.inc_eviction_cancelled(reason),
1052 : }
1053 CBC 2538 : });
1054 UBC 0 : }
1055 CBC 2538 : }
1056 :
1057 2538 : fn evict_blocking(&self, only_version: usize) -> Result<(), EvictionCancelled> {
1058 : // deleted or detached timeline, don't do anything.
1059 2538 : let Some(timeline) = self.timeline.upgrade() else {
1060 UBC 0 : return Err(EvictionCancelled::TimelineGone);
1061 : };
1062 :
1063 : // to avoid starting a new download while we evict, keep holding on to the
1064 : // permit.
1065 CBC 2538 : let _permit = {
1066 2538 : let maybe_downloaded = self.inner.get();
1067 :
1068 2538 : let (_weak, permit) = match maybe_downloaded {
1069 2538 : Some(mut guard) => {
1070 2538 : if let ResidentOrWantedEvicted::WantedEvicted(_weak, version) = &*guard {
1071 2538 : if *version == only_version {
1072 2538 : guard.take_and_deinit()
1073 : } else {
1074 : // this was not for us; maybe there's another eviction job
1075 : // TODO: does it make any sense to stall here? unique versions do not
1076 : // matter, we only want to make sure not to evict a resident, which we
1077 : // are not doing.
1078 UBC 0 : return Err(EvictionCancelled::VersionCheckFailed);
1079 : }
1080 : } else {
1081 0 : return Err(EvictionCancelled::AlreadyReinitialized);
1082 : }
1083 : }
1084 : None => {
1085 : // already deinitialized, perhaps get_or_maybe_download did this and is
1086 : // currently waiting to reinitialize it
1087 0 : return Err(EvictionCancelled::LostToDownload);
1088 : }
1089 : };
1090 :
1091 CBC 2538 : permit
1092 2538 : };
1093 2538 :
1094 2538 : // now accesses to inner.get_or_init wait on the semaphore or the `_permit`
1095 2538 :
1096 2538 : self.access_stats.record_residence_event(
1097 2538 : LayerResidenceStatus::Evicted,
1098 2538 : LayerResidenceEventReason::ResidenceChange,
1099 2538 : );
1100 :
1101 2538 : let res = match capture_mtime_and_remove(&self.path) {
1102 2538 : Ok(local_layer_mtime) => {
1103 2538 : let duration = SystemTime::now().duration_since(local_layer_mtime);
1104 2538 : match duration {
1105 2538 : Ok(elapsed) => {
1106 2538 : timeline
1107 2538 : .metrics
1108 2538 : .evictions_with_low_residence_duration
1109 2538 : .read()
1110 2538 : .unwrap()
1111 2538 : .observe(elapsed);
1112 2538 : tracing::info!(
1113 2538 : residence_millis = elapsed.as_millis(),
1114 2538 : "evicted layer after known residence period"
1115 2538 : );
1116 : }
1117 : Err(_) => {
1118 UBC 0 : tracing::info!("evicted layer after unknown residence period");
1119 : }
1120 : }
1121 CBC 2538 : timeline.metrics.evictions.inc();
1122 2538 : timeline
1123 2538 : .metrics
1124 2538 : .resident_physical_size_sub(self.desc.file_size);
1125 2538 :
1126 2538 : Ok(())
1127 : }
1128 UBC 0 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
1129 0 : tracing::error!(
1130 0 : layer_size = %self.desc.file_size,
1131 0 : "failed to evict layer from disk, it was already gone (metrics will be inaccurate)"
1132 0 : );
1133 0 : Err(EvictionCancelled::FileNotFound)
1134 : }
1135 0 : Err(e) => {
1136 0 : tracing::error!("failed to evict file from disk: {e:#}");
1137 0 : Err(EvictionCancelled::RemoveFailed)
1138 : }
1139 : };
1140 :
1141 : // we are still holding the permit, so no new spawn_download_and_wait can happen
1142 CBC 2538 : drop(self.status.send(Status::Evicted));
1143 2538 :
1144 2538 : *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now());
1145 2538 :
1146 2538 : res
1147 2538 : }
1148 :
1149 34179 : fn metadata(&self) -> LayerFileMetadata {
1150 34179 : LayerFileMetadata::new(self.desc.file_size, self.generation, self.shard)
1151 34179 : }
1152 : }
1153 :
1154 2538 : fn capture_mtime_and_remove(path: &Utf8Path) -> Result<SystemTime, std::io::Error> {
1155 2538 : let m = path.metadata()?;
1156 2538 : let local_layer_mtime = m.modified()?;
1157 2538 : std::fs::remove_file(path)?;
1158 2538 : Ok(local_layer_mtime)
1159 2538 : }
1160 :
1161 UBC 0 : #[derive(Debug, thiserror::Error)]
1162 : pub(crate) enum EvictionError {
1163 : #[error("layer was already evicted")]
1164 : NotFound,
1165 :
1166 : /// Evictions must always lose to downloads in races, and this time it happened.
1167 : #[error("layer was downloaded instead")]
1168 : Downloaded,
1169 : }
1170 :
1171 : /// Error internal to the [`LayerInner::get_or_maybe_download`]
1172 CBC 13 : #[derive(Debug, thiserror::Error)]
1173 : enum DownloadError {
1174 : #[error("timeline has already shutdown")]
1175 : TimelineShutdown,
1176 : #[error("no remote storage configured")]
1177 : NoRemoteStorage,
1178 : #[error("context denies downloading")]
1179 : ContextAndConfigReallyDeniesDownloads,
1180 : #[error("downloading is really required but not allowed by this method")]
1181 : DownloadRequired,
1182 : #[error("layer path exists, but it is not a file: {0:?}")]
1183 : NotFile(std::fs::FileType),
1184 : /// Why no error here? Because it will be reported by page_service. We should had also done
1185 : /// retries already.
1186 : #[error("downloading evicted layer file failed")]
1187 : DownloadFailed,
1188 : #[error("downloading failed, possibly for shutdown")]
1189 : DownloadCancelled,
1190 : #[error("pre-condition: stat before download failed")]
1191 : PreStatFailed(#[source] std::io::Error),
1192 : #[error("post-condition: stat after download failed")]
1193 : PostStatFailed(#[source] std::io::Error),
1194 : }
1195 :
1196 UBC 0 : #[derive(Debug, PartialEq)]
1197 : pub(crate) enum NeedsDownload {
1198 : NotFound,
1199 : NotFile(std::fs::FileType),
1200 : WrongSize { actual: u64, expected: u64 },
1201 : }
1202 :
1203 : impl std::fmt::Display for NeedsDownload {
1204 CBC 9411 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1205 9411 : match self {
1206 9411 : NeedsDownload::NotFound => write!(f, "file was not found"),
1207 UBC 0 : NeedsDownload::NotFile(ft) => write!(f, "path is not a file; {ft:?}"),
1208 0 : NeedsDownload::WrongSize { actual, expected } => {
1209 0 : write!(f, "file size mismatch {actual} vs. {expected}")
1210 : }
1211 : }
1212 CBC 9411 : }
1213 : }
1214 :
1215 : /// Existence of `DownloadedLayer` means that we have the file locally, and can later evict it.
1216 : pub(crate) struct DownloadedLayer {
1217 : owner: Weak<LayerInner>,
1218 : // Use tokio OnceCell as we do not need to deinitialize this, it'll just get dropped with the
1219 : // DownloadedLayer
1220 : kind: tokio::sync::OnceCell<anyhow::Result<LayerKind>>,
1221 : version: usize,
1222 : }
1223 :
1224 : impl std::fmt::Debug for DownloadedLayer {
1225 UBC 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1226 0 : f.debug_struct("DownloadedLayer")
1227 0 : // owner omitted because it is always "Weak"
1228 0 : .field("kind", &self.kind)
1229 0 : .field("version", &self.version)
1230 0 : .finish()
1231 0 : }
1232 : }
1233 :
1234 : impl Drop for DownloadedLayer {
1235 CBC 24124 : fn drop(&mut self) {
1236 24124 : if let Some(owner) = self.owner.upgrade() {
1237 2538 : owner.on_downloaded_layer_drop(self.version);
1238 21586 : } else {
1239 21586 : // no need to do anything, we are shutting down
1240 21586 : }
1241 24124 : }
1242 : }
1243 :
1244 : impl DownloadedLayer {
1245 : /// Initializes the `DeltaLayerInner` or `ImageLayerInner` within [`LayerKind`], or fails to
1246 : /// initialize it permanently.
1247 : ///
1248 : /// `owner` parameter is a strong reference at the same `LayerInner` as the
1249 : /// `DownloadedLayer::owner` would be when upgraded. Given how this method ends up called,
1250 : /// we will always have the LayerInner on the callstack, so we can just use it.
1251 15458517 : async fn get<'a>(
1252 15458517 : &'a self,
1253 15458517 : owner: &Arc<LayerInner>,
1254 15458517 : ctx: &RequestContext,
1255 15458537 : ) -> anyhow::Result<&'a LayerKind> {
1256 15458537 : let init = || async {
1257 33286 : assert_eq!(
1258 33286 : Weak::as_ptr(&self.owner),
1259 33286 : Arc::as_ptr(owner),
1260 15458537 : "these are the same, just avoiding the upgrade"
1261 15458537 : );
1262 15458537 :
1263 15458537 : let res = if owner.desc.is_delta {
1264 15458537 : let summary = Some(delta_layer::Summary::expected(
1265 11055 : owner.desc.tenant_shard_id.tenant_id,
1266 11055 : owner.desc.timeline_id,
1267 11055 : owner.desc.key_range.clone(),
1268 11055 : owner.desc.lsn_range.clone(),
1269 11055 : ));
1270 11055 : delta_layer::DeltaLayerInner::load(&owner.path, summary, ctx)
1271 15458537 : .await
1272 15458537 : .map(|res| res.map(LayerKind::Delta))
1273 15458537 : } else {
1274 15458537 : let lsn = owner.desc.image_layer_lsn();
1275 22231 : let summary = Some(image_layer::Summary::expected(
1276 22231 : owner.desc.tenant_shard_id.tenant_id,
1277 22231 : owner.desc.timeline_id,
1278 22231 : owner.desc.key_range.clone(),
1279 22231 : lsn,
1280 22231 : ));
1281 22231 : image_layer::ImageLayerInner::load(&owner.path, lsn, summary, ctx)
1282 15458537 : .await
1283 15458537 : .map(|res| res.map(LayerKind::Image))
1284 15458537 : };
1285 15458537 :
1286 15458537 : match res {
1287 15458537 : Ok(Ok(layer)) => Ok(Ok(layer)),
1288 15458537 : Ok(Err(transient)) => Err(transient),
1289 15458537 : Err(permanent) => {
1290 1 : LAYER_IMPL_METRICS.inc_permanent_loading_failures();
1291 1 : // TODO(#5815): we are not logging all errors, so temporarily log them **once**
1292 1 : // here as well
1293 1 : let permanent = permanent.context("load layer");
1294 1 : tracing::error!("layer loading failed permanently: {permanent:#}");
1295 15458537 : Ok(Err(permanent))
1296 15458537 : }
1297 15458537 : }
1298 15458537 : };
1299 15458537 : self.kind
1300 15458537 : .get_or_try_init(init)
1301 : // return transient errors using `?`
1302 1094 : .await?
1303 15458537 : .as_ref()
1304 15458537 : .map_err(|e| {
1305 9 : // errors are not clonabled, cannot but stringify
1306 9 : // test_broken_timeline matches this string
1307 9 : anyhow::anyhow!("layer loading failed: {e:#}")
1308 15458537 : })
1309 15458537 : }
1310 :
1311 15454845 : async fn get_value_reconstruct_data(
1312 15454845 : &self,
1313 15454845 : key: Key,
1314 15454845 : lsn_range: Range<Lsn>,
1315 15454845 : reconstruct_data: &mut ValueReconstructState,
1316 15454845 : owner: &Arc<LayerInner>,
1317 15454845 : ctx: &RequestContext,
1318 15454865 : ) -> anyhow::Result<ValueReconstructResult> {
1319 15454865 : use LayerKind::*;
1320 15454865 :
1321 15454865 : match self.get(owner, ctx).await? {
1322 15002756 : Delta(d) => {
1323 15002756 : d.get_value_reconstruct_data(key, lsn_range, reconstruct_data, ctx)
1324 850598 : .await
1325 : }
1326 452100 : Image(i) => {
1327 452100 : i.get_value_reconstruct_data(key, reconstruct_data, ctx)
1328 10753 : .await
1329 : }
1330 : }
1331 15454863 : }
1332 :
1333 2 : async fn dump(&self, owner: &Arc<LayerInner>, ctx: &RequestContext) -> anyhow::Result<()> {
1334 2 : use LayerKind::*;
1335 2 : match self.get(owner, ctx).await? {
1336 2 : Delta(d) => d.dump(ctx).await?,
1337 UBC 0 : Image(i) => i.dump(ctx).await?,
1338 : }
1339 :
1340 CBC 2 : Ok(())
1341 2 : }
1342 : }
1343 :
1344 : /// Wrapper around an actual layer implementation.
1345 UBC 0 : #[derive(Debug)]
1346 : enum LayerKind {
1347 : Delta(delta_layer::DeltaLayerInner),
1348 : Image(image_layer::ImageLayerInner),
1349 : }
1350 :
1351 : /// Guard for forcing a layer be resident while it exists.
1352 CBC 25211 : #[derive(Clone)]
1353 : pub(crate) struct ResidentLayer {
1354 : owner: Layer,
1355 : downloaded: Arc<DownloadedLayer>,
1356 : }
1357 :
1358 : impl std::fmt::Display for ResidentLayer {
1359 31267 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1360 31267 : write!(f, "{}", self.owner)
1361 31267 : }
1362 : }
1363 :
1364 : impl std::fmt::Debug for ResidentLayer {
1365 UBC 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1366 0 : write!(f, "{}", self.owner)
1367 0 : }
1368 : }
1369 :
1370 : impl ResidentLayer {
1371 : /// Release the eviction guard, converting back into a plain [`Layer`].
1372 : ///
1373 : /// You can access the [`Layer`] also by using `as_ref`.
1374 CBC 21615 : pub(crate) fn drop_eviction_guard(self) -> Layer {
1375 21615 : self.into()
1376 21615 : }
1377 :
1378 : /// Loads all keys stored in the layer. Returns key, lsn and value size.
1379 UBC 0 : #[tracing::instrument(skip_all, fields(layer=%self))]
1380 : pub(crate) async fn load_keys<'a>(
1381 : &'a self,
1382 : ctx: &RequestContext,
1383 : ) -> anyhow::Result<Vec<DeltaEntry<'a>>> {
1384 : use LayerKind::*;
1385 :
1386 : let owner = &self.owner.0;
1387 :
1388 : match self.downloaded.get(owner, ctx).await? {
1389 : Delta(ref d) => {
1390 : owner
1391 : .access_stats
1392 : .record_access(LayerAccessKind::KeyIter, ctx);
1393 :
1394 : // this is valid because the DownloadedLayer::kind is a OnceCell, not a
1395 : // Mutex<OnceCell>, so we cannot go and deinitialize the value with OnceCell::take
1396 : // while it's being held.
1397 : delta_layer::DeltaLayerInner::load_keys(d, ctx)
1398 : .await
1399 : .context("Layer index is corrupted")
1400 : }
1401 : Image(_) => anyhow::bail!("cannot load_keys on a image layer"),
1402 : }
1403 : }
1404 :
1405 CBC 41919 : pub(crate) fn local_path(&self) -> &Utf8Path {
1406 41919 : &self.owner.0.path
1407 41919 : }
1408 :
1409 2893 : pub(crate) fn access_stats(&self) -> &LayerAccessStats {
1410 2893 : self.owner.access_stats()
1411 2893 : }
1412 :
1413 20463 : pub(crate) fn metadata(&self) -> LayerFileMetadata {
1414 20463 : self.owner.metadata()
1415 20463 : }
1416 : }
1417 :
1418 : impl AsLayerDesc for ResidentLayer {
1419 72650 : fn layer_desc(&self) -> &PersistentLayerDesc {
1420 72650 : self.owner.layer_desc()
1421 72650 : }
1422 : }
1423 :
1424 : impl AsRef<Layer> for ResidentLayer {
1425 30844 : fn as_ref(&self) -> &Layer {
1426 30844 : &self.owner
1427 30844 : }
1428 : }
1429 :
1430 : /// Drop the eviction guard.
1431 : impl From<ResidentLayer> for Layer {
1432 21615 : fn from(value: ResidentLayer) -> Self {
1433 21615 : value.owner
1434 21615 : }
1435 : }
1436 :
1437 : use metrics::IntCounter;
1438 :
1439 : pub(crate) struct LayerImplMetrics {
1440 : started_evictions: IntCounter,
1441 : completed_evictions: IntCounter,
1442 : cancelled_evictions: enum_map::EnumMap<EvictionCancelled, IntCounter>,
1443 :
1444 : started_deletes: IntCounter,
1445 : completed_deletes: IntCounter,
1446 : failed_deletes: enum_map::EnumMap<DeleteFailed, IntCounter>,
1447 :
1448 : rare_counters: enum_map::EnumMap<RareEvent, IntCounter>,
1449 : inits_cancelled: metrics::core::GenericCounter<metrics::core::AtomicU64>,
1450 : redownload_after: metrics::Histogram,
1451 : }
1452 :
1453 : impl Default for LayerImplMetrics {
1454 560 : fn default() -> Self {
1455 560 : use enum_map::Enum;
1456 560 :
1457 560 : // reminder: these will be pageserver_layer_* with "_total" suffix
1458 560 :
1459 560 : let started_evictions = metrics::register_int_counter!(
1460 560 : "pageserver_layer_started_evictions",
1461 560 : "Evictions started in the Layer implementation"
1462 560 : )
1463 560 : .unwrap();
1464 560 : let completed_evictions = metrics::register_int_counter!(
1465 560 : "pageserver_layer_completed_evictions",
1466 560 : "Evictions completed in the Layer implementation"
1467 560 : )
1468 560 : .unwrap();
1469 560 :
1470 560 : let cancelled_evictions = metrics::register_int_counter_vec!(
1471 560 : "pageserver_layer_cancelled_evictions_count",
1472 560 : "Different reasons for evictions to have been cancelled or failed",
1473 560 : &["reason"]
1474 560 : )
1475 560 : .unwrap();
1476 560 :
1477 4480 : let cancelled_evictions = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1478 4480 : let reason = EvictionCancelled::from_usize(i);
1479 4480 : let s = reason.as_str();
1480 4480 : cancelled_evictions.with_label_values(&[s])
1481 4480 : }));
1482 560 :
1483 560 : let started_deletes = metrics::register_int_counter!(
1484 560 : "pageserver_layer_started_deletes",
1485 560 : "Deletions on drop pending in the Layer implementation"
1486 560 : )
1487 560 : .unwrap();
1488 560 : let completed_deletes = metrics::register_int_counter!(
1489 560 : "pageserver_layer_completed_deletes",
1490 560 : "Deletions on drop completed in the Layer implementation"
1491 560 : )
1492 560 : .unwrap();
1493 560 :
1494 560 : let failed_deletes = metrics::register_int_counter_vec!(
1495 560 : "pageserver_layer_failed_deletes_count",
1496 560 : "Different reasons for deletions on drop to have failed",
1497 560 : &["reason"]
1498 560 : )
1499 560 : .unwrap();
1500 560 :
1501 1120 : let failed_deletes = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1502 1120 : let reason = DeleteFailed::from_usize(i);
1503 1120 : let s = reason.as_str();
1504 1120 : failed_deletes.with_label_values(&[s])
1505 1120 : }));
1506 560 :
1507 560 : let rare_counters = metrics::register_int_counter_vec!(
1508 560 : "pageserver_layer_assumed_rare_count",
1509 560 : "Times unexpected or assumed rare event happened",
1510 560 : &["event"]
1511 560 : )
1512 560 : .unwrap();
1513 560 :
1514 3920 : let rare_counters = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1515 3920 : let event = RareEvent::from_usize(i);
1516 3920 : let s = event.as_str();
1517 3920 : rare_counters.with_label_values(&[s])
1518 3920 : }));
1519 560 :
1520 560 : let inits_cancelled = metrics::register_int_counter!(
1521 560 : "pageserver_layer_inits_cancelled_count",
1522 560 : "Times Layer initialization was cancelled",
1523 560 : )
1524 560 : .unwrap();
1525 560 :
1526 560 : let redownload_after = {
1527 560 : let minute = 60.0;
1528 560 : let hour = 60.0 * minute;
1529 560 : metrics::register_histogram!(
1530 560 : "pageserver_layer_redownloaded_after",
1531 560 : "Time between evicting and re-downloading.",
1532 560 : vec![
1533 560 : 10.0,
1534 560 : 30.0,
1535 560 : minute,
1536 560 : 5.0 * minute,
1537 560 : 15.0 * minute,
1538 560 : 30.0 * minute,
1539 560 : hour,
1540 560 : 12.0 * hour,
1541 560 : ]
1542 560 : )
1543 560 : .unwrap()
1544 560 : };
1545 560 :
1546 560 : Self {
1547 560 : started_evictions,
1548 560 : completed_evictions,
1549 560 : cancelled_evictions,
1550 560 :
1551 560 : started_deletes,
1552 560 : completed_deletes,
1553 560 : failed_deletes,
1554 560 :
1555 560 : rare_counters,
1556 560 : inits_cancelled,
1557 560 : redownload_after,
1558 560 : }
1559 560 : }
1560 : }
1561 :
1562 : impl LayerImplMetrics {
1563 2538 : fn inc_started_evictions(&self) {
1564 2538 : self.started_evictions.inc();
1565 2538 : }
1566 2538 : fn inc_completed_evictions(&self) {
1567 2538 : self.completed_evictions.inc();
1568 2538 : }
1569 UBC 0 : fn inc_eviction_cancelled(&self, reason: EvictionCancelled) {
1570 0 : self.cancelled_evictions[reason].inc()
1571 0 : }
1572 :
1573 CBC 5005 : fn inc_started_deletes(&self) {
1574 5005 : self.started_deletes.inc();
1575 5005 : }
1576 4298 : fn inc_completed_deletes(&self) {
1577 4298 : self.completed_deletes.inc();
1578 4298 : }
1579 7 : fn inc_deletes_failed(&self, reason: DeleteFailed) {
1580 7 : self.failed_deletes[reason].inc();
1581 7 : }
1582 :
1583 : /// Counted separatedly from failed layer deletes because we will complete the layer deletion
1584 : /// attempt regardless of failure to delete local file.
1585 UBC 0 : fn inc_delete_removes_failed(&self) {
1586 0 : self.rare_counters[RareEvent::RemoveOnDropFailed].inc();
1587 0 : }
1588 :
1589 : /// Expected rare because requires a race with `evict_blocking` and `get_or_maybe_download`.
1590 0 : fn inc_retried_get_or_maybe_download(&self) {
1591 0 : self.rare_counters[RareEvent::RetriedGetOrMaybeDownload].inc();
1592 0 : }
1593 :
1594 : /// Expected rare because cancellations are unexpected, and failures are unexpected
1595 CBC 5 : fn inc_download_failed_without_requester(&self) {
1596 5 : self.rare_counters[RareEvent::DownloadFailedWithoutRequester].inc();
1597 5 : }
1598 :
1599 : /// The Weak in ResidentOrWantedEvicted::WantedEvicted was successfully upgraded.
1600 : ///
1601 : /// If this counter is always zero, we should replace ResidentOrWantedEvicted type with an
1602 : /// Option.
1603 UBC 0 : fn inc_raced_wanted_evicted_accesses(&self) {
1604 0 : self.rare_counters[RareEvent::UpgradedWantedEvicted].inc();
1605 0 : }
1606 :
1607 : /// These are only expected for [`Self::inc_init_cancelled`] amount when
1608 : /// running with remote storage.
1609 0 : fn inc_init_needed_no_download(&self) {
1610 0 : self.rare_counters[RareEvent::InitWithoutDownload].inc();
1611 0 : }
1612 :
1613 : /// Expected rare because all layer files should be readable and good
1614 CBC 1 : fn inc_permanent_loading_failures(&self) {
1615 1 : self.rare_counters[RareEvent::PermanentLoadingFailure].inc();
1616 1 : }
1617 :
1618 UBC 0 : fn inc_broadcast_lagged(&self) {
1619 0 : self.rare_counters[RareEvent::EvictAndWaitLagged].inc();
1620 0 : }
1621 :
1622 CBC 636 : fn inc_init_cancelled(&self) {
1623 636 : self.inits_cancelled.inc()
1624 636 : }
1625 :
1626 105 : fn record_redownloaded_after(&self, duration: std::time::Duration) {
1627 105 : self.redownload_after.observe(duration.as_secs_f64())
1628 105 : }
1629 : }
1630 :
1631 4480 : #[derive(enum_map::Enum)]
1632 : enum EvictionCancelled {
1633 : LayerGone,
1634 : TimelineGone,
1635 : VersionCheckFailed,
1636 : FileNotFound,
1637 : RemoveFailed,
1638 : AlreadyReinitialized,
1639 : /// Not evicted because of a pending reinitialization
1640 : LostToDownload,
1641 : /// After eviction, there was a new layer access which cancelled the eviction.
1642 : UpgradedBackOnAccess,
1643 : }
1644 :
1645 : impl EvictionCancelled {
1646 4480 : fn as_str(&self) -> &'static str {
1647 4480 : match self {
1648 560 : EvictionCancelled::LayerGone => "layer_gone",
1649 560 : EvictionCancelled::TimelineGone => "timeline_gone",
1650 560 : EvictionCancelled::VersionCheckFailed => "version_check_fail",
1651 560 : EvictionCancelled::FileNotFound => "file_not_found",
1652 560 : EvictionCancelled::RemoveFailed => "remove_failed",
1653 560 : EvictionCancelled::AlreadyReinitialized => "already_reinitialized",
1654 560 : EvictionCancelled::LostToDownload => "lost_to_download",
1655 560 : EvictionCancelled::UpgradedBackOnAccess => "upgraded_back_on_access",
1656 : }
1657 4480 : }
1658 : }
1659 :
1660 1127 : #[derive(enum_map::Enum)]
1661 : enum DeleteFailed {
1662 : TimelineGone,
1663 : DeleteSchedulingFailed,
1664 : }
1665 :
1666 : impl DeleteFailed {
1667 1120 : fn as_str(&self) -> &'static str {
1668 1120 : match self {
1669 560 : DeleteFailed::TimelineGone => "timeline_gone",
1670 560 : DeleteFailed::DeleteSchedulingFailed => "delete_scheduling_failed",
1671 : }
1672 1120 : }
1673 : }
1674 :
1675 3926 : #[derive(enum_map::Enum)]
1676 : enum RareEvent {
1677 : RemoveOnDropFailed,
1678 : RetriedGetOrMaybeDownload,
1679 : DownloadFailedWithoutRequester,
1680 : UpgradedWantedEvicted,
1681 : InitWithoutDownload,
1682 : PermanentLoadingFailure,
1683 : EvictAndWaitLagged,
1684 : }
1685 :
1686 : impl RareEvent {
1687 3920 : fn as_str(&self) -> &'static str {
1688 3920 : use RareEvent::*;
1689 3920 :
1690 3920 : match self {
1691 560 : RemoveOnDropFailed => "remove_on_drop_failed",
1692 560 : RetriedGetOrMaybeDownload => "retried_gomd",
1693 560 : DownloadFailedWithoutRequester => "download_failed_without",
1694 560 : UpgradedWantedEvicted => "raced_wanted_evicted",
1695 560 : InitWithoutDownload => "init_needed_no_download",
1696 560 : PermanentLoadingFailure => "permanent_loading_failure",
1697 560 : EvictAndWaitLagged => "broadcast_lagged",
1698 : }
1699 3920 : }
1700 : }
1701 :
1702 : pub(crate) static LAYER_IMPL_METRICS: once_cell::sync::Lazy<LayerImplMetrics> =
1703 : once_cell::sync::Lazy::new(LayerImplMetrics::default);
|