Line data Source code
1 : use anyhow::Context;
2 : use camino::{Utf8Path, Utf8PathBuf};
3 : use pageserver_api::keyspace::KeySpace;
4 : use pageserver_api::models::{
5 : HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
6 : };
7 : use pageserver_api::shard::ShardIndex;
8 : use std::ops::Range;
9 : use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
10 : use std::sync::{Arc, Weak};
11 : use std::time::SystemTime;
12 : use tracing::Instrument;
13 : use utils::lsn::Lsn;
14 : use utils::sync::heavier_once_cell;
15 :
16 : use crate::config::PageServerConf;
17 : use crate::context::RequestContext;
18 : use crate::repository::Key;
19 : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
20 : use crate::tenant::timeline::GetVectoredError;
21 : use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};
22 :
23 : use super::delta_layer::{self, DeltaEntry};
24 : use super::image_layer;
25 : use super::{
26 : AsLayerDesc, LayerAccessStats, LayerAccessStatsReset, LayerFileName, PersistentLayerDesc,
27 : ValueReconstructResult, ValueReconstructState, ValuesReconstructState,
28 : };
29 :
30 : use utils::generation::Generation;
31 :
32 : #[cfg(test)]
33 : mod tests;
34 :
35 : /// A Layer contains all data in a "rectangle" consisting of a range of keys and
36 : /// range of LSNs.
37 : ///
38 : /// There are two kinds of layers, in-memory and on-disk layers. In-memory
39 : /// layers are used to ingest incoming WAL, and provide fast access to the
40 : /// recent page versions. On-disk layers are stored as files on disk, and are
41 : /// immutable. This type represents the on-disk kind while in-memory kind are represented by
42 : /// [`InMemoryLayer`].
43 : ///
44 : /// Furthermore, there are two kinds of on-disk layers: delta and image layers.
45 : /// A delta layer contains all modifications within a range of LSNs and keys.
46 : /// An image layer is a snapshot of all the data in a key-range, at a single
47 : /// LSN.
48 : ///
49 : /// This type models the on-disk layers, which can be evicted and on-demand downloaded.
50 : ///
51 : /// [`InMemoryLayer`]: super::inmemory_layer::InMemoryLayer
52 252100 : #[derive(Clone)]
53 : pub(crate) struct Layer(Arc<LayerInner>);
54 :
55 : impl std::fmt::Display for Layer {
56 1162 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 1162 : if matches!(self.0.generation, Generation::Broken) {
58 0 : write!(f, "{}-broken", self.layer_desc().short_id())
59 : } else {
60 1162 : write!(
61 1162 : f,
62 1162 : "{}{}",
63 1162 : self.layer_desc().short_id(),
64 1162 : self.0.generation.get_suffix()
65 1162 : )
66 : }
67 1162 : }
68 : }
69 :
70 : impl std::fmt::Debug for Layer {
71 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72 0 : write!(f, "{}", self)
73 0 : }
74 : }
75 :
76 : impl AsLayerDesc for Layer {
77 378256 : fn layer_desc(&self) -> &PersistentLayerDesc {
78 378256 : self.0.layer_desc()
79 378256 : }
80 : }
81 :
82 : impl Layer {
83 : /// Creates a layer value for a file we know to not be resident.
84 0 : pub(crate) fn for_evicted(
85 0 : conf: &'static PageServerConf,
86 0 : timeline: &Arc<Timeline>,
87 0 : file_name: LayerFileName,
88 0 : metadata: LayerFileMetadata,
89 0 : ) -> Self {
90 0 : let desc = PersistentLayerDesc::from_filename(
91 0 : timeline.tenant_shard_id,
92 0 : timeline.timeline_id,
93 0 : file_name,
94 0 : metadata.file_size(),
95 0 : );
96 0 :
97 0 : let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted);
98 0 :
99 0 : let owner = Layer(Arc::new(LayerInner::new(
100 0 : conf,
101 0 : timeline,
102 0 : access_stats,
103 0 : desc,
104 0 : None,
105 0 : metadata.generation,
106 0 : metadata.shard,
107 0 : )));
108 :
109 0 : debug_assert!(owner.0.needs_download_blocking().unwrap().is_some());
110 :
111 0 : owner
112 0 : }
113 :
114 : /// Creates a Layer value for a file we know to be resident in timeline directory.
115 24 : pub(crate) fn for_resident(
116 24 : conf: &'static PageServerConf,
117 24 : timeline: &Arc<Timeline>,
118 24 : file_name: LayerFileName,
119 24 : metadata: LayerFileMetadata,
120 24 : ) -> ResidentLayer {
121 24 : let desc = PersistentLayerDesc::from_filename(
122 24 : timeline.tenant_shard_id,
123 24 : timeline.timeline_id,
124 24 : file_name,
125 24 : metadata.file_size(),
126 24 : );
127 24 :
128 24 : let access_stats = LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident);
129 24 :
130 24 : let mut resident = None;
131 24 :
132 24 : let owner = Layer(Arc::new_cyclic(|owner| {
133 24 : let inner = Arc::new(DownloadedLayer {
134 24 : owner: owner.clone(),
135 24 : kind: tokio::sync::OnceCell::default(),
136 24 : version: 0,
137 24 : });
138 24 : resident = Some(inner.clone());
139 24 :
140 24 : LayerInner::new(
141 24 : conf,
142 24 : timeline,
143 24 : access_stats,
144 24 : desc,
145 24 : Some(inner),
146 24 : metadata.generation,
147 24 : metadata.shard,
148 24 : )
149 24 : }));
150 24 :
151 24 : let downloaded = resident.expect("just initialized");
152 :
153 24 : debug_assert!(owner.0.needs_download_blocking().unwrap().is_none());
154 :
155 24 : timeline
156 24 : .metrics
157 24 : .resident_physical_size_add(metadata.file_size());
158 24 :
159 24 : ResidentLayer { downloaded, owner }
160 24 : }
161 :
162 : /// Creates a Layer value for freshly written out new layer file by renaming it from a
163 : /// temporary path.
164 554 : pub(crate) fn finish_creating(
165 554 : conf: &'static PageServerConf,
166 554 : timeline: &Arc<Timeline>,
167 554 : desc: PersistentLayerDesc,
168 554 : temp_path: &Utf8Path,
169 554 : ) -> anyhow::Result<ResidentLayer> {
170 554 : let mut resident = None;
171 554 :
172 554 : let owner = Layer(Arc::new_cyclic(|owner| {
173 554 : let inner = Arc::new(DownloadedLayer {
174 554 : owner: owner.clone(),
175 554 : kind: tokio::sync::OnceCell::default(),
176 554 : version: 0,
177 554 : });
178 554 : resident = Some(inner.clone());
179 554 : let access_stats = LayerAccessStats::empty_will_record_residence_event_later();
180 554 : access_stats.record_residence_event(
181 554 : LayerResidenceStatus::Resident,
182 554 : LayerResidenceEventReason::LayerCreate,
183 554 : );
184 554 : LayerInner::new(
185 554 : conf,
186 554 : timeline,
187 554 : access_stats,
188 554 : desc,
189 554 : Some(inner),
190 554 : timeline.generation,
191 554 : timeline.get_shard_index(),
192 554 : )
193 554 : }));
194 554 :
195 554 : let downloaded = resident.expect("just initialized");
196 554 :
197 554 : // if the rename works, the path is as expected
198 554 : std::fs::rename(temp_path, owner.local_path())
199 554 : .with_context(|| format!("rename temporary file as correct path for {owner}"))?;
200 :
201 554 : Ok(ResidentLayer { downloaded, owner })
202 554 : }
203 :
204 : /// Requests the layer to be evicted and waits for this to be done.
205 : ///
206 : /// If the file is not resident, an [`EvictionError::NotFound`] is returned.
207 : ///
208 : /// If for a bad luck or blocking of the executor, we miss the actual eviction and the layer is
209 : /// re-downloaded, [`EvictionError::Downloaded`] is returned.
210 : ///
211 : /// Technically cancellation safe, but cancelling might shift the viewpoint of what generation
212 : /// of download-evict cycle on retry.
213 10 : pub(crate) async fn evict_and_wait(&self) -> Result<(), EvictionError> {
214 14 : self.0.evict_and_wait().await
215 10 : }
216 :
217 : /// Delete the layer file when the `self` gets dropped, also try to schedule a remote index upload
218 : /// then.
219 : ///
220 : /// On drop, this will cause a call to [`crate::tenant::remote_timeline_client::RemoteTimelineClient::schedule_deletion_of_unlinked`].
221 : /// This means that the unlinking by [gc] or [compaction] must have happened strictly before
222 : /// the value this is called on gets dropped.
223 : ///
224 : /// This is ensured by both of those methods accepting references to Layer.
225 : ///
226 : /// [gc]: [`RemoteTimelineClient::schedule_gc_update`]
227 : /// [compaction]: [`RemoteTimelineClient::schedule_compaction_update`]
228 304 : pub(crate) fn delete_on_drop(&self) {
229 304 : self.0.delete_on_drop();
230 304 : }
231 :
232 : /// Return data needed to reconstruct given page at LSN.
233 : ///
234 : /// It is up to the caller to collect more data from the previous layer and
235 : /// perform WAL redo, if necessary.
236 : ///
237 : /// # Cancellation-Safety
238 : ///
239 : /// This method is cancellation-safe.
240 124028 : pub(crate) async fn get_value_reconstruct_data(
241 124028 : &self,
242 124028 : key: Key,
243 124028 : lsn_range: Range<Lsn>,
244 124028 : reconstruct_data: &mut ValueReconstructState,
245 124028 : ctx: &RequestContext,
246 124028 : ) -> anyhow::Result<ValueReconstructResult> {
247 : use anyhow::ensure;
248 :
249 124028 : let layer = self.0.get_or_maybe_download(true, Some(ctx)).await?;
250 124028 : self.0
251 124028 : .access_stats
252 124028 : .record_access(LayerAccessKind::GetValueReconstructData, ctx);
253 124028 :
254 124028 : if self.layer_desc().is_delta {
255 123516 : ensure!(lsn_range.start >= self.layer_desc().lsn_range.start);
256 123516 : ensure!(self.layer_desc().key_range.contains(&key));
257 : } else {
258 512 : ensure!(self.layer_desc().key_range.contains(&key));
259 512 : ensure!(lsn_range.start >= self.layer_desc().image_layer_lsn());
260 512 : ensure!(lsn_range.end >= self.layer_desc().image_layer_lsn());
261 : }
262 :
263 124028 : layer
264 124028 : .get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
265 124028 : .instrument(tracing::debug_span!("get_value_reconstruct_data", layer=%self))
266 23310 : .await
267 124028 : .with_context(|| format!("get_value_reconstruct_data for layer {self}"))
268 124028 : }
269 :
270 10 : pub(crate) async fn get_values_reconstruct_data(
271 10 : &self,
272 10 : keyspace: KeySpace,
273 10 : lsn_range: Range<Lsn>,
274 10 : reconstruct_data: &mut ValuesReconstructState,
275 10 : ctx: &RequestContext,
276 10 : ) -> Result<(), GetVectoredError> {
277 10 : let layer = self
278 10 : .0
279 10 : .get_or_maybe_download(true, Some(ctx))
280 0 : .await
281 10 : .map_err(|err| GetVectoredError::Other(anyhow::anyhow!(err)))?;
282 :
283 10 : self.0
284 10 : .access_stats
285 10 : .record_access(LayerAccessKind::GetValueReconstructData, ctx);
286 10 :
287 10 : layer
288 10 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, &self.0, ctx)
289 10 : .instrument(tracing::debug_span!("get_values_reconstruct_data", layer=%self))
290 25 : .await
291 10 : }
292 :
293 : /// Download the layer if evicted.
294 : ///
295 : /// Will not error when the layer is already downloaded.
296 0 : pub(crate) async fn download(&self) -> anyhow::Result<()> {
297 0 : self.0.get_or_maybe_download(true, None).await?;
298 0 : Ok(())
299 0 : }
300 :
301 : /// Assuming the layer is already downloaded, returns a guard which will prohibit eviction
302 : /// while the guard exists.
303 : ///
304 : /// Returns None if the layer is currently evicted.
305 16 : pub(crate) async fn keep_resident(&self) -> anyhow::Result<Option<ResidentLayer>> {
306 16 : let downloaded = match self.0.get_or_maybe_download(false, None).await {
307 12 : Ok(d) => d,
308 : // technically there are a lot of possible errors, but in practice it should only be
309 : // DownloadRequired which is tripped up. could work to improve this situation
310 : // statically later.
311 4 : Err(DownloadError::DownloadRequired) => return Ok(None),
312 0 : Err(e) => return Err(e.into()),
313 : };
314 :
315 12 : Ok(Some(ResidentLayer {
316 12 : downloaded,
317 12 : owner: self.clone(),
318 12 : }))
319 16 : }
320 :
321 : /// Downloads if necessary and creates a guard, which will keep this layer from being evicted.
322 300 : pub(crate) async fn download_and_keep_resident(&self) -> anyhow::Result<ResidentLayer> {
323 300 : let downloaded = self.0.get_or_maybe_download(true, None).await?;
324 :
325 300 : Ok(ResidentLayer {
326 300 : downloaded,
327 300 : owner: self.clone(),
328 300 : })
329 300 : }
330 :
331 0 : pub(crate) fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
332 0 : self.0.info(reset)
333 0 : }
334 :
335 0 : pub(crate) fn access_stats(&self) -> &LayerAccessStats {
336 0 : &self.0.access_stats
337 0 : }
338 :
339 658 : pub(crate) fn local_path(&self) -> &Utf8Path {
340 658 : &self.0.path
341 658 : }
342 :
343 562 : pub(crate) fn metadata(&self) -> LayerFileMetadata {
344 562 : self.0.metadata()
345 562 : }
346 :
347 : /// Traditional debug dumping facility
348 : #[allow(unused)]
349 4 : pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> {
350 4 : self.0.desc.dump();
351 4 :
352 4 : if verbose {
353 : // for now, unconditionally download everything, even if that might not be wanted.
354 4 : let l = self.0.get_or_maybe_download(true, Some(ctx)).await?;
355 8 : l.dump(&self.0, ctx).await?
356 0 : }
357 :
358 4 : Ok(())
359 4 : }
360 :
361 : /// Waits until this layer has been dropped (and if needed, local file deletion and remote
362 : /// deletion scheduling has completed).
363 : ///
364 : /// Does not start local deletion, use [`Self::delete_on_drop`] for that
365 : /// separatedly.
366 : #[cfg(feature = "testing")]
367 0 : pub(crate) fn wait_drop(&self) -> impl std::future::Future<Output = ()> + 'static {
368 0 : let mut rx = self.0.status.subscribe();
369 :
370 0 : async move {
371 : loop {
372 0 : if let Err(tokio::sync::broadcast::error::RecvError::Closed) = rx.recv().await {
373 0 : break;
374 0 : }
375 : }
376 0 : }
377 0 : }
378 : }
379 :
380 : /// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted.
381 : ///
382 : /// However when we want something evicted, we cannot evict it right away as there might be current
383 : /// reads happening on it. For example: it has been searched from [`LayerMap::search`] but not yet
384 : /// read with [`Layer::get_value_reconstruct_data`].
385 : ///
386 : /// [`LayerMap::search`]: crate::tenant::layer_map::LayerMap::search
387 0 : #[derive(Debug)]
388 : enum ResidentOrWantedEvicted {
389 : Resident(Arc<DownloadedLayer>),
390 : WantedEvicted(Weak<DownloadedLayer>, usize),
391 : }
392 :
393 : impl ResidentOrWantedEvicted {
394 124356 : fn get_and_upgrade(&mut self) -> Option<(Arc<DownloadedLayer>, bool)> {
395 124356 : match self {
396 124354 : ResidentOrWantedEvicted::Resident(strong) => Some((strong.clone(), false)),
397 2 : ResidentOrWantedEvicted::WantedEvicted(weak, _) => match weak.upgrade() {
398 0 : Some(strong) => {
399 0 : LAYER_IMPL_METRICS.inc_raced_wanted_evicted_accesses();
400 0 :
401 0 : *self = ResidentOrWantedEvicted::Resident(strong.clone());
402 0 :
403 0 : Some((strong, true))
404 : }
405 2 : None => None,
406 : },
407 : }
408 124356 : }
409 :
410 : /// When eviction is first requested, drop down to holding a [`Weak`].
411 : ///
412 : /// Returns `Some` if this was the first time eviction was requested. Care should be taken to
413 : /// drop the possibly last strong reference outside of the mutex of
414 : /// heavier_once_cell::OnceCell.
415 8 : fn downgrade(&mut self) -> Option<Arc<DownloadedLayer>> {
416 8 : match self {
417 8 : ResidentOrWantedEvicted::Resident(strong) => {
418 8 : let weak = Arc::downgrade(strong);
419 8 : let mut temp = ResidentOrWantedEvicted::WantedEvicted(weak, strong.version);
420 8 : std::mem::swap(self, &mut temp);
421 8 : match temp {
422 8 : ResidentOrWantedEvicted::Resident(strong) => Some(strong),
423 0 : ResidentOrWantedEvicted::WantedEvicted(..) => unreachable!("just swapped"),
424 : }
425 : }
426 0 : ResidentOrWantedEvicted::WantedEvicted(..) => None,
427 : }
428 8 : }
429 : }
430 :
431 : struct LayerInner {
432 : /// Only needed to check ondemand_download_behavior_treat_error_as_warn and creation of
433 : /// [`Self::path`].
434 : conf: &'static PageServerConf,
435 :
436 : /// Full path to the file; unclear if this should exist anymore.
437 : path: Utf8PathBuf,
438 :
439 : desc: PersistentLayerDesc,
440 :
441 : /// Timeline access is needed for remote timeline client and metrics.
442 : timeline: Weak<Timeline>,
443 :
444 : /// Cached knowledge of [`Timeline::remote_client`] being `Some`.
445 : have_remote_client: bool,
446 :
447 : access_stats: LayerAccessStats,
448 :
449 : /// This custom OnceCell is backed by std mutex, but only held for short time periods.
450 : /// Initialization and deinitialization are done while holding a permit.
451 : inner: heavier_once_cell::OnceCell<ResidentOrWantedEvicted>,
452 :
453 : /// Do we want to delete locally and remotely this when `LayerInner` is dropped
454 : wanted_deleted: AtomicBool,
455 :
456 : /// Do we want to evict this layer as soon as possible? After being set to `true`, all accesses
457 : /// will try to downgrade [`ResidentOrWantedEvicted`], which will eventually trigger
458 : /// [`LayerInner::on_downloaded_layer_drop`].
459 : wanted_evicted: AtomicBool,
460 :
461 : /// Version is to make sure we will only evict a specific download of a file.
462 : ///
463 : /// Incremented for each download, stored in `DownloadedLayer::version` or
464 : /// `ResidentOrWantedEvicted::WantedEvicted`.
465 : version: AtomicUsize,
466 :
467 : /// Allow subscribing to when the layer actually gets evicted.
468 : status: tokio::sync::broadcast::Sender<Status>,
469 :
470 : /// Counter for exponential backoff with the download
471 : consecutive_failures: AtomicUsize,
472 :
473 : /// The generation of this Layer.
474 : ///
475 : /// For loaded layers (resident or evicted) this comes from [`LayerFileMetadata::generation`],
476 : /// for created layers from [`Timeline::generation`].
477 : generation: Generation,
478 :
479 : /// The shard of this Layer.
480 : ///
481 : /// For layers created in this process, this will always be the [`ShardIndex`] of the
482 : /// current `ShardIdentity`` (TODO: add link once it's introduced).
483 : ///
484 : /// For loaded layers, this may be some other value if the tenant has undergone
485 : /// a shard split since the layer was originally written.
486 : shard: ShardIndex,
487 :
488 : last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,
489 : }
490 :
491 : impl std::fmt::Display for LayerInner {
492 14 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
493 14 : write!(f, "{}", self.layer_desc().short_id())
494 14 : }
495 : }
496 :
497 : impl AsLayerDesc for LayerInner {
498 379780 : fn layer_desc(&self) -> &PersistentLayerDesc {
499 379780 : &self.desc
500 379780 : }
501 : }
502 :
503 6 : #[derive(Debug, Clone, Copy)]
504 : enum Status {
505 : Evicted,
506 : Downloaded,
507 : }
508 :
509 : impl Drop for LayerInner {
510 326 : fn drop(&mut self) {
511 326 : if !*self.wanted_deleted.get_mut() {
512 : // should we try to evict if the last wish was for eviction?
513 : // feels like there's some hazard of overcrowding near shutdown near by, but we don't
514 : // run drops during shutdown (yet)
515 24 : return;
516 302 : }
517 :
518 302 : let span = tracing::info_span!(parent: None, "layer_delete", tenant_id = %self.layer_desc().tenant_shard_id.tenant_id, shard_id=%self.layer_desc().tenant_shard_id.shard_slug(), timeline_id = %self.layer_desc().timeline_id);
519 :
520 302 : let path = std::mem::take(&mut self.path);
521 302 : let file_name = self.layer_desc().filename();
522 302 : let file_size = self.layer_desc().file_size;
523 302 : let timeline = self.timeline.clone();
524 302 : let meta = self.metadata();
525 302 : let status = self.status.clone();
526 302 :
527 302 : crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
528 302 : let _g = span.entered();
529 302 :
530 302 : // carry this until we are finished for [`Layer::wait_drop`] support
531 302 : let _status = status;
532 :
533 302 : let removed = match std::fs::remove_file(path) {
534 300 : Ok(()) => true,
535 2 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
536 2 : // until we no longer do detaches by removing all local files before removing the
537 2 : // tenant from the global map, we will always get these errors even if we knew what
538 2 : // is the latest state.
539 2 : //
540 2 : // we currently do not track the latest state, so we'll also end up here on evicted
541 2 : // layers.
542 2 : false
543 : }
544 0 : Err(e) => {
545 0 : tracing::error!("failed to remove wanted deleted layer: {e}");
546 0 : LAYER_IMPL_METRICS.inc_delete_removes_failed();
547 0 : false
548 : }
549 : };
550 :
551 302 : if let Some(timeline) = timeline.upgrade() {
552 302 : if removed {
553 300 : timeline.metrics.resident_physical_size_sub(file_size);
554 300 : }
555 302 : if let Some(remote_client) = timeline.remote_client.as_ref() {
556 302 : let res = remote_client.schedule_deletion_of_unlinked(vec![(file_name, meta)]);
557 :
558 302 : if let Err(e) = res {
559 : // test_timeline_deletion_with_files_stuck_in_upload_queue is good at
560 : // demonstrating this deadlock (without spawn_blocking): stop will drop
561 : // queued items, which will have ResidentLayer's, and those drops would try
562 : // to re-entrantly lock the RemoteTimelineClient inner state.
563 0 : if !timeline.is_active() {
564 0 : tracing::info!("scheduling deletion on drop failed: {e:#}");
565 : } else {
566 0 : tracing::warn!("scheduling deletion on drop failed: {e:#}");
567 : }
568 0 : LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::DeleteSchedulingFailed);
569 302 : } else {
570 302 : LAYER_IMPL_METRICS.inc_completed_deletes();
571 302 : }
572 0 : }
573 0 : } else {
574 0 : // no need to nag that timeline is gone: under normal situation on
575 0 : // task_mgr::remove_tenant_from_memory the timeline is gone before we get dropped.
576 0 : LAYER_IMPL_METRICS.inc_deletes_failed(DeleteFailed::TimelineGone);
577 0 : }
578 302 : });
579 326 : }
580 : }
581 :
582 : impl LayerInner {
583 578 : fn new(
584 578 : conf: &'static PageServerConf,
585 578 : timeline: &Arc<Timeline>,
586 578 : access_stats: LayerAccessStats,
587 578 : desc: PersistentLayerDesc,
588 578 : downloaded: Option<Arc<DownloadedLayer>>,
589 578 : generation: Generation,
590 578 : shard: ShardIndex,
591 578 : ) -> Self {
592 578 : let path = conf
593 578 : .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
594 578 : .join(desc.filename().to_string());
595 :
596 578 : let (inner, version) = if let Some(inner) = downloaded {
597 578 : let version = inner.version;
598 578 : let resident = ResidentOrWantedEvicted::Resident(inner);
599 578 : (heavier_once_cell::OnceCell::new(resident), version)
600 : } else {
601 0 : (heavier_once_cell::OnceCell::default(), 0)
602 : };
603 :
604 578 : LayerInner {
605 578 : conf,
606 578 : path,
607 578 : desc,
608 578 : timeline: Arc::downgrade(timeline),
609 578 : have_remote_client: timeline.remote_client.is_some(),
610 578 : access_stats,
611 578 : wanted_deleted: AtomicBool::new(false),
612 578 : wanted_evicted: AtomicBool::new(false),
613 578 : inner,
614 578 : version: AtomicUsize::new(version),
615 578 : status: tokio::sync::broadcast::channel(1).0,
616 578 : consecutive_failures: AtomicUsize::new(0),
617 578 : generation,
618 578 : shard,
619 578 : last_evicted_at: std::sync::Mutex::default(),
620 578 : }
621 578 : }
622 :
623 304 : fn delete_on_drop(&self) {
624 304 : let res =
625 304 : self.wanted_deleted
626 304 : .compare_exchange(false, true, Ordering::Release, Ordering::Relaxed);
627 304 :
628 304 : if res.is_ok() {
629 302 : LAYER_IMPL_METRICS.inc_started_deletes();
630 302 : }
631 304 : }
632 :
633 : /// Cancellation safe, however dropping the future and calling this method again might result
634 : /// in a new attempt to evict OR join the previously started attempt.
635 10 : pub(crate) async fn evict_and_wait(&self) -> Result<(), EvictionError> {
636 10 : use tokio::sync::broadcast::error::RecvError;
637 10 :
638 10 : assert!(self.have_remote_client);
639 :
640 10 : let mut rx = self.status.subscribe();
641 :
642 8 : let strong = {
643 10 : match self.inner.get() {
644 8 : Some(mut either) => {
645 8 : self.wanted_evicted.store(true, Ordering::Relaxed);
646 8 : either.downgrade()
647 : }
648 2 : None => return Err(EvictionError::NotFound),
649 : }
650 : };
651 :
652 8 : if strong.is_some() {
653 8 : // drop the DownloadedLayer outside of the holding the guard
654 8 : drop(strong);
655 8 : LAYER_IMPL_METRICS.inc_started_evictions();
656 8 : }
657 :
658 14 : match rx.recv().await {
659 4 : Ok(Status::Evicted) => Ok(()),
660 2 : Ok(Status::Downloaded) => Err(EvictionError::Downloaded),
661 : Err(RecvError::Closed) => {
662 0 : unreachable!("sender cannot be dropped while we are in &self method")
663 : }
664 : Err(RecvError::Lagged(_)) => {
665 : // this is quite unlikely, but we are blocking a lot in the async context, so
666 : // we might be missing this because we are stuck on a LIFO slot on a thread
667 : // which is busy blocking for a 1TB database create_image_layers.
668 : //
669 : // use however late (compared to the initial expressing of wanted) as the
670 : // "outcome" now
671 2 : LAYER_IMPL_METRICS.inc_broadcast_lagged();
672 2 : match self.inner.get() {
673 0 : Some(_) => Err(EvictionError::Downloaded),
674 2 : None => Ok(()),
675 : }
676 : }
677 : }
678 10 : }
679 :
680 : /// Cancellation safe.
681 124358 : async fn get_or_maybe_download(
682 124358 : self: &Arc<Self>,
683 124358 : allow_download: bool,
684 124358 : ctx: Option<&RequestContext>,
685 124358 : ) -> Result<Arc<DownloadedLayer>, DownloadError> {
686 124358 : let mut init_permit = None;
687 :
688 : loop {
689 124360 : let download = move |permit| {
690 6 : async move {
691 6 : // disable any scheduled but not yet running eviction deletions for this
692 6 : let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);
693 6 :
694 6 : // count cancellations, which currently remain largely unexpected
695 6 : let init_cancelled =
696 6 : scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
697 6 :
698 6 : // no need to make the evict_and_wait wait for the actual download to complete
699 6 : drop(self.status.send(Status::Downloaded));
700 :
701 6 : let timeline = self
702 6 : .timeline
703 6 : .upgrade()
704 6 : .ok_or_else(|| DownloadError::TimelineShutdown)?;
705 :
706 : // FIXME: grab a gate
707 :
708 6 : let can_ever_evict = timeline.remote_client.as_ref().is_some();
709 :
710 : // check if we really need to be downloaded; could have been already downloaded by a
711 : // cancelled previous attempt.
712 6 : let needs_download = self
713 6 : .needs_download()
714 8 : .await
715 6 : .map_err(DownloadError::PreStatFailed)?;
716 :
717 6 : let permit = if let Some(reason) = needs_download {
718 4 : if let NeedsDownload::NotFile(ft) = reason {
719 0 : return Err(DownloadError::NotFile(ft));
720 4 : }
721 4 :
722 4 : // only reset this after we've decided we really need to download. otherwise it'd
723 4 : // be impossible to mark cancelled downloads for eviction, like one could imagine
724 4 : // we would like to do for prefetching which was not needed.
725 4 : self.wanted_evicted.store(false, Ordering::Release);
726 4 :
727 4 : if !can_ever_evict {
728 0 : return Err(DownloadError::NoRemoteStorage);
729 4 : }
730 :
731 4 : if let Some(ctx) = ctx {
732 0 : self.check_expected_download(ctx)?;
733 4 : }
734 :
735 4 : if !allow_download {
736 : // this does look weird, but for LayerInner the "downloading" means also changing
737 : // internal once related state ...
738 4 : return Err(DownloadError::DownloadRequired);
739 0 : }
740 0 :
741 0 : tracing::info!(%reason, "downloading on-demand");
742 :
743 0 : self.spawn_download_and_wait(timeline, permit).await?
744 : } else {
745 : // the file is present locally, probably by a previous but cancelled call to
746 : // get_or_maybe_download. alternatively we might be running without remote storage.
747 2 : LAYER_IMPL_METRICS.inc_init_needed_no_download();
748 2 :
749 2 : permit
750 : };
751 :
752 2 : let since_last_eviction =
753 2 : self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
754 2 : if let Some(since_last_eviction) = since_last_eviction {
755 0 : // FIXME: this will not always be recorded correctly until #6028 (the no
756 0 : // download needed branch above)
757 0 : LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
758 2 : }
759 :
760 2 : let res = Arc::new(DownloadedLayer {
761 2 : owner: Arc::downgrade(self),
762 2 : kind: tokio::sync::OnceCell::default(),
763 2 : version: next_version,
764 2 : });
765 2 :
766 2 : self.access_stats.record_residence_event(
767 2 : LayerResidenceStatus::Resident,
768 2 : LayerResidenceEventReason::ResidenceChange,
769 2 : );
770 2 :
771 2 : let waiters = self.inner.initializer_count();
772 2 : if waiters > 0 {
773 0 : tracing::info!(
774 0 : waiters,
775 0 : "completing the on-demand download for other tasks"
776 0 : );
777 2 : }
778 :
779 2 : scopeguard::ScopeGuard::into_inner(init_cancelled);
780 2 :
781 2 : Ok((ResidentOrWantedEvicted::Resident(res), permit))
782 6 : }
783 6 : .instrument(tracing::info_span!("get_or_maybe_download", layer=%self))
784 6 : };
785 :
786 124360 : if let Some(init_permit) = init_permit.take() {
787 : // use the already held initialization permit because it is impossible to hit the
788 : // below paths anymore essentially limiting the max loop iterations to 2.
789 4 : let (value, init_permit) = download(init_permit).await?;
790 2 : let mut guard = self.inner.set(value, init_permit);
791 2 : let (strong, _upgraded) = guard
792 2 : .get_and_upgrade()
793 2 : .expect("init creates strong reference, we held the init permit");
794 2 : return Ok(strong);
795 124358 : }
796 :
797 2 : let (weak, permit) = {
798 124358 : let mut locked = self.inner.get_or_init(download).await?;
799 :
800 124354 : if let Some((strong, upgraded)) = locked.get_and_upgrade() {
801 124352 : if upgraded {
802 0 : // when upgraded back, the Arc<DownloadedLayer> is still available, but
803 0 : // previously a `evict_and_wait` was received.
804 0 : self.wanted_evicted.store(false, Ordering::Relaxed);
805 0 :
806 0 : // error out any `evict_and_wait`
807 0 : drop(self.status.send(Status::Downloaded));
808 0 : LAYER_IMPL_METRICS
809 0 : .inc_eviction_cancelled(EvictionCancelled::UpgradedBackOnAccess);
810 124352 : }
811 :
812 124352 : return Ok(strong);
813 : } else {
814 : // path to here: the evict_blocking is stuck on spawn_blocking queue.
815 : //
816 : // reset the contents, deactivating the eviction and causing a
817 : // EvictionCancelled::LostToDownload or EvictionCancelled::VersionCheckFailed.
818 2 : locked.take_and_deinit()
819 2 : }
820 2 : };
821 2 :
822 2 : // unlock first, then drop the weak, but because upgrade failed, we
823 2 : // know it cannot be a problem.
824 2 :
825 2 : assert!(
826 2 : matches!(weak, ResidentOrWantedEvicted::WantedEvicted(..)),
827 0 : "unexpected {weak:?}, ResidentOrWantedEvicted::get_and_upgrade has a bug"
828 : );
829 :
830 2 : init_permit = Some(permit);
831 2 :
832 2 : LAYER_IMPL_METRICS.inc_retried_get_or_maybe_download();
833 : }
834 124358 : }
835 :
836 : /// Nag or fail per RequestContext policy
837 0 : fn check_expected_download(&self, ctx: &RequestContext) -> Result<(), DownloadError> {
838 0 : use crate::context::DownloadBehavior::*;
839 0 : let b = ctx.download_behavior();
840 0 : match b {
841 0 : Download => Ok(()),
842 : Warn | Error => {
843 0 : tracing::info!(
844 0 : "unexpectedly on-demand downloading for task kind {:?}",
845 0 : ctx.task_kind()
846 0 : );
847 0 : crate::metrics::UNEXPECTED_ONDEMAND_DOWNLOADS.inc();
848 :
849 0 : let really_error =
850 0 : matches!(b, Error) && !self.conf.ondemand_download_behavior_treat_error_as_warn;
851 :
852 0 : if really_error {
853 : // this check is only probablistic, seems like flakyness footgun
854 0 : Err(DownloadError::ContextAndConfigReallyDeniesDownloads)
855 : } else {
856 0 : Ok(())
857 : }
858 : }
859 : }
860 0 : }
861 :
862 : /// Actual download, at most one is executed at the time.
863 0 : async fn spawn_download_and_wait(
864 0 : self: &Arc<Self>,
865 0 : timeline: Arc<Timeline>,
866 0 : permit: heavier_once_cell::InitPermit,
867 0 : ) -> Result<heavier_once_cell::InitPermit, DownloadError> {
868 0 : debug_assert_current_span_has_tenant_and_timeline_id();
869 0 :
870 0 : let task_name = format!("download layer {}", self);
871 0 :
872 0 : let (tx, rx) = tokio::sync::oneshot::channel();
873 0 :
874 0 : // this is sadly needed because of task_mgr::shutdown_tasks, otherwise we cannot
875 0 : // block tenant::mgr::remove_tenant_from_memory.
876 0 :
877 0 : let this: Arc<Self> = self.clone();
878 0 :
879 0 : crate::task_mgr::spawn(
880 0 : &tokio::runtime::Handle::current(),
881 0 : crate::task_mgr::TaskKind::RemoteDownloadTask,
882 0 : Some(self.desc.tenant_shard_id),
883 0 : Some(self.desc.timeline_id),
884 0 : &task_name,
885 0 : false,
886 0 : async move {
887 0 :
888 0 : let client = timeline
889 0 : .remote_client
890 0 : .as_ref()
891 0 : .expect("checked above with have_remote_client");
892 :
893 0 : let result = client.download_layer_file(
894 0 : &this.desc.filename(),
895 0 : &this.metadata(),
896 0 : &crate::task_mgr::shutdown_token()
897 0 : )
898 0 : .await;
899 :
900 0 : let result = match result {
901 0 : Ok(size) => {
902 0 : timeline.metrics.resident_physical_size_add(size);
903 0 : Ok(())
904 : }
905 0 : Err(e) => {
906 0 : let consecutive_failures =
907 0 : this.consecutive_failures.fetch_add(1, Ordering::Relaxed);
908 0 :
909 0 : let backoff = utils::backoff::exponential_backoff_duration_seconds(
910 0 : consecutive_failures.min(u32::MAX as usize) as u32,
911 0 : 1.5,
912 0 : 60.0,
913 0 : );
914 0 :
915 0 : let backoff = std::time::Duration::from_secs_f64(backoff);
916 0 :
917 0 : tokio::select! {
918 0 : _ = tokio::time::sleep(backoff) => {},
919 0 : _ = crate::task_mgr::shutdown_token().cancelled_owned() => {},
920 0 : _ = timeline.cancel.cancelled() => {},
921 0 : };
922 :
923 0 : Err(e)
924 : }
925 : };
926 :
927 0 : if let Err(res) = tx.send((result, permit)) {
928 0 : match res {
929 0 : (Ok(()), _) => {
930 0 : // our caller is cancellation safe so this is fine; if someone
931 0 : // else requests the layer, they'll find it already downloaded.
932 0 : //
933 0 : // See counter [`LayerImplMetrics::inc_init_needed_no_download`]
934 0 : //
935 0 : // FIXME(#6028): however, could be that we should consider marking the
936 0 : // layer for eviction? alas, cannot: because only DownloadedLayer will
937 0 : // handle that.
938 0 : },
939 0 : (Err(e), _) => {
940 0 : // our caller is cancellation safe, but we might be racing with
941 0 : // another attempt to initialize. before we have cancellation
942 0 : // token support: these attempts should converge regardless of
943 0 : // their completion order.
944 0 : tracing::error!("layer file download failed, and additionally failed to communicate this to caller: {e:?}");
945 0 : LAYER_IMPL_METRICS.inc_download_failed_without_requester();
946 : }
947 : }
948 0 : }
949 :
950 0 : Ok(())
951 0 : }
952 0 : .in_current_span(),
953 0 : );
954 0 : match rx.await {
955 0 : Ok((Ok(()), permit)) => {
956 0 : if let Some(reason) = self
957 0 : .needs_download()
958 0 : .await
959 0 : .map_err(DownloadError::PostStatFailed)?
960 : {
961 : // this is really a bug in needs_download or remote timeline client
962 0 : panic!("post-condition failed: needs_download returned {reason:?}");
963 0 : }
964 0 :
965 0 : self.consecutive_failures.store(0, Ordering::Relaxed);
966 0 : tracing::info!("on-demand download successful");
967 :
968 0 : Ok(permit)
969 : }
970 0 : Ok((Err(e), _permit)) => {
971 0 : // sleep already happened in the spawned task, if it was not cancelled
972 0 : let consecutive_failures = self.consecutive_failures.load(Ordering::Relaxed);
973 0 :
974 0 : match e.downcast_ref::<remote_storage::DownloadError>() {
975 : // If the download failed due to its cancellation token,
976 : // propagate the cancellation error upstream.
977 : Some(remote_storage::DownloadError::Cancelled) => {
978 0 : Err(DownloadError::DownloadCancelled)
979 : }
980 : _ => {
981 0 : tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
982 0 : Err(DownloadError::DownloadFailed)
983 : }
984 : }
985 : }
986 0 : Err(_gone) => Err(DownloadError::DownloadCancelled),
987 : }
988 0 : }
989 :
990 6 : async fn needs_download(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
991 8 : match tokio::fs::metadata(&self.path).await {
992 2 : Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),
993 4 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),
994 0 : Err(e) => Err(e),
995 : }
996 6 : }
997 :
998 24 : fn needs_download_blocking(&self) -> Result<Option<NeedsDownload>, std::io::Error> {
999 24 : match self.path.metadata() {
1000 24 : Ok(m) => Ok(self.is_file_present_and_good_size(&m).err()),
1001 0 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(Some(NeedsDownload::NotFound)),
1002 0 : Err(e) => Err(e),
1003 : }
1004 24 : }
1005 :
1006 26 : fn is_file_present_and_good_size(&self, m: &std::fs::Metadata) -> Result<(), NeedsDownload> {
1007 26 : // in future, this should include sha2-256 validation of the file.
1008 26 : if !m.is_file() {
1009 0 : Err(NeedsDownload::NotFile(m.file_type()))
1010 26 : } else if m.len() != self.desc.file_size {
1011 0 : Err(NeedsDownload::WrongSize {
1012 0 : actual: m.len(),
1013 0 : expected: self.desc.file_size,
1014 0 : })
1015 : } else {
1016 26 : Ok(())
1017 : }
1018 26 : }
1019 :
1020 0 : fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
1021 0 : let layer_file_name = self.desc.filename().file_name();
1022 0 :
1023 0 : // this is not accurate: we could have the file locally but there was a cancellation
1024 0 : // and now we are not in sync, or we are currently downloading it.
1025 0 : let remote = self.inner.get().is_none();
1026 0 :
1027 0 : let access_stats = self.access_stats.as_api_model(reset);
1028 0 :
1029 0 : if self.desc.is_delta {
1030 0 : let lsn_range = &self.desc.lsn_range;
1031 0 :
1032 0 : HistoricLayerInfo::Delta {
1033 0 : layer_file_name,
1034 0 : layer_file_size: self.desc.file_size,
1035 0 : lsn_start: lsn_range.start,
1036 0 : lsn_end: lsn_range.end,
1037 0 : remote,
1038 0 : access_stats,
1039 0 : }
1040 : } else {
1041 0 : let lsn = self.desc.image_layer_lsn();
1042 0 :
1043 0 : HistoricLayerInfo::Image {
1044 0 : layer_file_name,
1045 0 : layer_file_size: self.desc.file_size,
1046 0 : lsn_start: lsn,
1047 0 : remote,
1048 0 : access_stats,
1049 0 : }
1050 : }
1051 0 : }
1052 :
1053 : /// `DownloadedLayer` is being dropped, so it calls this method.
1054 8 : fn on_downloaded_layer_drop(self: Arc<LayerInner>, version: usize) {
1055 8 : let evict = self.wanted_evicted.load(Ordering::Acquire);
1056 8 : let can_evict = self.have_remote_client;
1057 8 :
1058 8 : if can_evict && evict {
1059 8 : let span = tracing::info_span!(parent: None, "layer_evict", tenant_id = %self.desc.tenant_shard_id.tenant_id, shard_id = %self.desc.tenant_shard_id.shard_slug(), timeline_id = %self.desc.timeline_id, layer=%self, %version);
1060 :
1061 : // downgrade for queueing, in case there's a tear down already ongoing we should not
1062 : // hold it alive.
1063 8 : let this = Arc::downgrade(&self);
1064 8 : drop(self);
1065 8 :
1066 8 : // NOTE: this scope *must* never call `self.inner.get` because evict_and_wait might
1067 8 : // drop while the `self.inner` is being locked, leading to a deadlock.
1068 8 :
1069 8 : crate::task_mgr::BACKGROUND_RUNTIME.spawn_blocking(move || {
1070 8 : let _g = span.entered();
1071 :
1072 : // if LayerInner is already dropped here, do nothing because the delete on drop
1073 : // has already ran while we were in queue
1074 8 : let Some(this) = this.upgrade() else {
1075 0 : LAYER_IMPL_METRICS.inc_eviction_cancelled(EvictionCancelled::LayerGone);
1076 0 : return;
1077 : };
1078 8 : match this.evict_blocking(version) {
1079 6 : Ok(()) => LAYER_IMPL_METRICS.inc_completed_evictions(),
1080 2 : Err(reason) => LAYER_IMPL_METRICS.inc_eviction_cancelled(reason),
1081 : }
1082 8 : });
1083 0 : }
1084 8 : }
1085 :
1086 8 : fn evict_blocking(&self, only_version: usize) -> Result<(), EvictionCancelled> {
1087 : // deleted or detached timeline, don't do anything.
1088 8 : let Some(timeline) = self.timeline.upgrade() else {
1089 0 : return Err(EvictionCancelled::TimelineGone);
1090 : };
1091 :
1092 : // to avoid starting a new download while we evict, keep holding on to the
1093 : // permit.
1094 6 : let _permit = {
1095 8 : let maybe_downloaded = self.inner.get();
1096 :
1097 8 : let (_weak, permit) = match maybe_downloaded {
1098 8 : Some(mut guard) => {
1099 8 : if let ResidentOrWantedEvicted::WantedEvicted(_weak, version) = &*guard {
1100 8 : if *version == only_version {
1101 6 : guard.take_and_deinit()
1102 : } else {
1103 : // this was not for us; maybe there's another eviction job
1104 : // TODO: does it make any sense to stall here? unique versions do not
1105 : // matter, we only want to make sure not to evict a resident, which we
1106 : // are not doing.
1107 2 : return Err(EvictionCancelled::VersionCheckFailed);
1108 : }
1109 : } else {
1110 0 : return Err(EvictionCancelled::AlreadyReinitialized);
1111 : }
1112 : }
1113 : None => {
1114 : // already deinitialized, perhaps get_or_maybe_download did this and is
1115 : // currently waiting to reinitialize it
1116 0 : return Err(EvictionCancelled::LostToDownload);
1117 : }
1118 : };
1119 :
1120 6 : permit
1121 6 : };
1122 6 :
1123 6 : // now accesses to inner.get_or_init wait on the semaphore or the `_permit`
1124 6 :
1125 6 : self.access_stats.record_residence_event(
1126 6 : LayerResidenceStatus::Evicted,
1127 6 : LayerResidenceEventReason::ResidenceChange,
1128 6 : );
1129 :
1130 6 : let res = match capture_mtime_and_remove(&self.path) {
1131 6 : Ok(local_layer_mtime) => {
1132 6 : let duration = SystemTime::now().duration_since(local_layer_mtime);
1133 6 : match duration {
1134 6 : Ok(elapsed) => {
1135 6 : timeline
1136 6 : .metrics
1137 6 : .evictions_with_low_residence_duration
1138 6 : .read()
1139 6 : .unwrap()
1140 6 : .observe(elapsed);
1141 6 : tracing::info!(
1142 6 : residence_millis = elapsed.as_millis(),
1143 6 : "evicted layer after known residence period"
1144 6 : );
1145 : }
1146 : Err(_) => {
1147 0 : tracing::info!("evicted layer after unknown residence period");
1148 : }
1149 : }
1150 6 : timeline.metrics.evictions.inc();
1151 6 : timeline
1152 6 : .metrics
1153 6 : .resident_physical_size_sub(self.desc.file_size);
1154 6 :
1155 6 : Ok(())
1156 : }
1157 0 : Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
1158 0 : tracing::error!(
1159 0 : layer_size = %self.desc.file_size,
1160 0 : "failed to evict layer from disk, it was already gone (metrics will be inaccurate)"
1161 0 : );
1162 0 : Err(EvictionCancelled::FileNotFound)
1163 : }
1164 0 : Err(e) => {
1165 0 : tracing::error!("failed to evict file from disk: {e:#}");
1166 0 : Err(EvictionCancelled::RemoveFailed)
1167 : }
1168 : };
1169 :
1170 : // we are still holding the permit, so no new spawn_download_and_wait can happen
1171 6 : drop(self.status.send(Status::Evicted));
1172 6 :
1173 6 : *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now());
1174 6 :
1175 6 : res
1176 8 : }
1177 :
1178 864 : fn metadata(&self) -> LayerFileMetadata {
1179 864 : LayerFileMetadata::new(self.desc.file_size, self.generation, self.shard)
1180 864 : }
1181 : }
1182 :
1183 6 : fn capture_mtime_and_remove(path: &Utf8Path) -> Result<SystemTime, std::io::Error> {
1184 6 : let m = path.metadata()?;
1185 6 : let local_layer_mtime = m.modified()?;
1186 6 : std::fs::remove_file(path)?;
1187 6 : Ok(local_layer_mtime)
1188 6 : }
1189 :
1190 0 : #[derive(Debug, thiserror::Error)]
1191 : pub(crate) enum EvictionError {
1192 : #[error("layer was already evicted")]
1193 : NotFound,
1194 :
1195 : /// Evictions must always lose to downloads in races, and this time it happened.
1196 : #[error("layer was downloaded instead")]
1197 : Downloaded,
1198 : }
1199 :
1200 : /// Error internal to the [`LayerInner::get_or_maybe_download`]
1201 0 : #[derive(Debug, thiserror::Error)]
1202 : pub(crate) enum DownloadError {
1203 : #[error("timeline has already shutdown")]
1204 : TimelineShutdown,
1205 : #[error("no remote storage configured")]
1206 : NoRemoteStorage,
1207 : #[error("context denies downloading")]
1208 : ContextAndConfigReallyDeniesDownloads,
1209 : #[error("downloading is really required but not allowed by this method")]
1210 : DownloadRequired,
1211 : #[error("layer path exists, but it is not a file: {0:?}")]
1212 : NotFile(std::fs::FileType),
1213 : /// Why no error here? Because it will be reported by page_service. We should had also done
1214 : /// retries already.
1215 : #[error("downloading evicted layer file failed")]
1216 : DownloadFailed,
1217 : #[error("downloading failed, possibly for shutdown")]
1218 : DownloadCancelled,
1219 : #[error("pre-condition: stat before download failed")]
1220 : PreStatFailed(#[source] std::io::Error),
1221 : #[error("post-condition: stat after download failed")]
1222 : PostStatFailed(#[source] std::io::Error),
1223 : }
1224 :
1225 0 : #[derive(Debug, PartialEq)]
1226 : pub(crate) enum NeedsDownload {
1227 : NotFound,
1228 : NotFile(std::fs::FileType),
1229 : WrongSize { actual: u64, expected: u64 },
1230 : }
1231 :
1232 : impl std::fmt::Display for NeedsDownload {
1233 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1234 0 : match self {
1235 0 : NeedsDownload::NotFound => write!(f, "file was not found"),
1236 0 : NeedsDownload::NotFile(ft) => write!(f, "path is not a file; {ft:?}"),
1237 0 : NeedsDownload::WrongSize { actual, expected } => {
1238 0 : write!(f, "file size mismatch {actual} vs. {expected}")
1239 : }
1240 : }
1241 0 : }
1242 : }
1243 :
1244 : /// Existence of `DownloadedLayer` means that we have the file locally, and can later evict it.
1245 : pub(crate) struct DownloadedLayer {
1246 : owner: Weak<LayerInner>,
1247 : // Use tokio OnceCell as we do not need to deinitialize this, it'll just get dropped with the
1248 : // DownloadedLayer
1249 : kind: tokio::sync::OnceCell<anyhow::Result<LayerKind>>,
1250 : version: usize,
1251 : }
1252 :
1253 : impl std::fmt::Debug for DownloadedLayer {
1254 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1255 0 : f.debug_struct("DownloadedLayer")
1256 0 : // owner omitted because it is always "Weak"
1257 0 : .field("kind", &self.kind)
1258 0 : .field("version", &self.version)
1259 0 : .finish()
1260 0 : }
1261 : }
1262 :
1263 : impl Drop for DownloadedLayer {
1264 332 : fn drop(&mut self) {
1265 332 : if let Some(owner) = self.owner.upgrade() {
1266 8 : owner.on_downloaded_layer_drop(self.version);
1267 324 : } else {
1268 324 : // no need to do anything, we are shutting down
1269 324 : }
1270 332 : }
1271 : }
1272 :
1273 : impl DownloadedLayer {
1274 : /// Initializes the `DeltaLayerInner` or `ImageLayerInner` within [`LayerKind`], or fails to
1275 : /// initialize it permanently.
1276 : ///
1277 : /// `owner` parameter is a strong reference at the same `LayerInner` as the
1278 : /// `DownloadedLayer::owner` would be when upgraded. Given how this method ends up called,
1279 : /// we will always have the LayerInner on the callstack, so we can just use it.
1280 124342 : async fn get<'a>(
1281 124342 : &'a self,
1282 124342 : owner: &Arc<LayerInner>,
1283 124342 : ctx: &RequestContext,
1284 124342 : ) -> anyhow::Result<&'a LayerKind> {
1285 124342 : let init = || async {
1286 450 : assert_eq!(
1287 450 : Weak::as_ptr(&self.owner),
1288 450 : Arc::as_ptr(owner),
1289 0 : "these are the same, just avoiding the upgrade"
1290 : );
1291 :
1292 450 : let res = if owner.desc.is_delta {
1293 440 : let summary = Some(delta_layer::Summary::expected(
1294 440 : owner.desc.tenant_shard_id.tenant_id,
1295 440 : owner.desc.timeline_id,
1296 440 : owner.desc.key_range.clone(),
1297 440 : owner.desc.lsn_range.clone(),
1298 440 : ));
1299 440 : delta_layer::DeltaLayerInner::load(
1300 440 : &owner.path,
1301 440 : summary,
1302 440 : Some(owner.conf.max_vectored_read_bytes),
1303 440 : ctx,
1304 440 : )
1305 441 : .await
1306 440 : .map(|res| res.map(LayerKind::Delta))
1307 : } else {
1308 10 : let lsn = owner.desc.image_layer_lsn();
1309 10 : let summary = Some(image_layer::Summary::expected(
1310 10 : owner.desc.tenant_shard_id.tenant_id,
1311 10 : owner.desc.timeline_id,
1312 10 : owner.desc.key_range.clone(),
1313 10 : lsn,
1314 10 : ));
1315 10 : image_layer::ImageLayerInner::load(
1316 10 : &owner.path,
1317 10 : lsn,
1318 10 : summary,
1319 10 : Some(owner.conf.max_vectored_read_bytes),
1320 10 : ctx,
1321 10 : )
1322 10 : .await
1323 10 : .map(|res| res.map(LayerKind::Image))
1324 : };
1325 :
1326 450 : match res {
1327 450 : Ok(Ok(layer)) => Ok(Ok(layer)),
1328 0 : Ok(Err(transient)) => Err(transient),
1329 0 : Err(permanent) => {
1330 0 : LAYER_IMPL_METRICS.inc_permanent_loading_failures();
1331 0 : // TODO(#5815): we are not logging all errors, so temporarily log them **once**
1332 0 : // here as well
1333 0 : let permanent = permanent.context("load layer");
1334 0 : tracing::error!("layer loading failed permanently: {permanent:#}");
1335 0 : Ok(Err(permanent))
1336 : }
1337 : }
1338 900 : };
1339 124342 : self.kind
1340 124342 : .get_or_try_init(init)
1341 : // return transient errors using `?`
1342 452 : .await?
1343 124342 : .as_ref()
1344 124342 : .map_err(|e| {
1345 0 : // errors are not clonabled, cannot but stringify
1346 0 : // test_broken_timeline matches this string
1347 0 : anyhow::anyhow!("layer loading failed: {e:#}")
1348 124342 : })
1349 124342 : }
1350 :
1351 124028 : async fn get_value_reconstruct_data(
1352 124028 : &self,
1353 124028 : key: Key,
1354 124028 : lsn_range: Range<Lsn>,
1355 124028 : reconstruct_data: &mut ValueReconstructState,
1356 124028 : owner: &Arc<LayerInner>,
1357 124028 : ctx: &RequestContext,
1358 124028 : ) -> anyhow::Result<ValueReconstructResult> {
1359 124028 : use LayerKind::*;
1360 124028 :
1361 124028 : match self.get(owner, ctx).await? {
1362 123516 : Delta(d) => {
1363 123516 : d.get_value_reconstruct_data(key, lsn_range, reconstruct_data, ctx)
1364 22647 : .await
1365 : }
1366 512 : Image(i) => {
1367 512 : i.get_value_reconstruct_data(key, reconstruct_data, ctx)
1368 435 : .await
1369 : }
1370 : }
1371 124028 : }
1372 :
1373 10 : async fn get_values_reconstruct_data(
1374 10 : &self,
1375 10 : keyspace: KeySpace,
1376 10 : lsn_range: Range<Lsn>,
1377 10 : reconstruct_data: &mut ValuesReconstructState,
1378 10 : owner: &Arc<LayerInner>,
1379 10 : ctx: &RequestContext,
1380 10 : ) -> Result<(), GetVectoredError> {
1381 10 : use LayerKind::*;
1382 10 :
1383 10 : match self.get(owner, ctx).await.map_err(GetVectoredError::from)? {
1384 10 : Delta(d) => {
1385 10 : d.get_values_reconstruct_data(keyspace, lsn_range, reconstruct_data, ctx)
1386 15 : .await
1387 : }
1388 0 : Image(i) => {
1389 0 : i.get_values_reconstruct_data(keyspace, reconstruct_data, ctx)
1390 0 : .await
1391 : }
1392 : }
1393 10 : }
1394 :
1395 4 : async fn dump(&self, owner: &Arc<LayerInner>, ctx: &RequestContext) -> anyhow::Result<()> {
1396 4 : use LayerKind::*;
1397 4 : match self.get(owner, ctx).await? {
1398 4 : Delta(d) => d.dump(ctx).await?,
1399 0 : Image(i) => i.dump(ctx).await?,
1400 : }
1401 :
1402 4 : Ok(())
1403 4 : }
1404 : }
1405 :
1406 : /// Wrapper around an actual layer implementation.
1407 0 : #[derive(Debug)]
1408 : enum LayerKind {
1409 : Delta(delta_layer::DeltaLayerInner),
1410 : Image(image_layer::ImageLayerInner),
1411 : }
1412 :
1413 : /// Guard for forcing a layer be resident while it exists.
1414 518 : #[derive(Clone)]
1415 : pub(crate) struct ResidentLayer {
1416 : owner: Layer,
1417 : downloaded: Arc<DownloadedLayer>,
1418 : }
1419 :
1420 : impl std::fmt::Display for ResidentLayer {
1421 1162 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1422 1162 : write!(f, "{}", self.owner)
1423 1162 : }
1424 : }
1425 :
1426 : impl std::fmt::Debug for ResidentLayer {
1427 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1428 0 : write!(f, "{}", self.owner)
1429 0 : }
1430 : }
1431 :
1432 : impl ResidentLayer {
1433 : /// Release the eviction guard, converting back into a plain [`Layer`].
1434 : ///
1435 : /// You can access the [`Layer`] also by using `as_ref`.
1436 322 : pub(crate) fn drop_eviction_guard(self) -> Layer {
1437 322 : self.into()
1438 322 : }
1439 :
1440 : /// Loads all keys stored in the layer. Returns key, lsn and value size.
1441 600 : #[tracing::instrument(skip_all, fields(layer=%self))]
1442 : pub(crate) async fn load_keys<'a>(
1443 : &'a self,
1444 : ctx: &RequestContext,
1445 : ) -> anyhow::Result<Vec<DeltaEntry<'a>>> {
1446 : use LayerKind::*;
1447 :
1448 : let owner = &self.owner.0;
1449 :
1450 : match self.downloaded.get(owner, ctx).await? {
1451 : Delta(ref d) => {
1452 : owner
1453 : .access_stats
1454 : .record_access(LayerAccessKind::KeyIter, ctx);
1455 :
1456 : // this is valid because the DownloadedLayer::kind is a OnceCell, not a
1457 : // Mutex<OnceCell>, so we cannot go and deinitialize the value with OnceCell::take
1458 : // while it's being held.
1459 : delta_layer::DeltaLayerInner::load_keys(d, ctx)
1460 : .await
1461 : .context("Layer index is corrupted")
1462 : }
1463 : Image(_) => anyhow::bail!("cannot load_keys on a image layer"),
1464 : }
1465 : }
1466 :
1467 1109 : pub(crate) fn local_path(&self) -> &Utf8Path {
1468 1109 : &self.owner.0.path
1469 1109 : }
1470 :
1471 562 : pub(crate) fn metadata(&self) -> LayerFileMetadata {
1472 562 : self.owner.metadata()
1473 562 : }
1474 : }
1475 :
1476 : impl AsLayerDesc for ResidentLayer {
1477 1280 : fn layer_desc(&self) -> &PersistentLayerDesc {
1478 1280 : self.owner.layer_desc()
1479 1280 : }
1480 : }
1481 :
1482 : impl AsRef<Layer> for ResidentLayer {
1483 584 : fn as_ref(&self) -> &Layer {
1484 584 : &self.owner
1485 584 : }
1486 : }
1487 :
1488 : /// Drop the eviction guard.
1489 : impl From<ResidentLayer> for Layer {
1490 322 : fn from(value: ResidentLayer) -> Self {
1491 322 : value.owner
1492 322 : }
1493 : }
1494 :
1495 : use metrics::IntCounter;
1496 :
1497 : pub(crate) struct LayerImplMetrics {
1498 : started_evictions: IntCounter,
1499 : completed_evictions: IntCounter,
1500 : cancelled_evictions: enum_map::EnumMap<EvictionCancelled, IntCounter>,
1501 :
1502 : started_deletes: IntCounter,
1503 : completed_deletes: IntCounter,
1504 : failed_deletes: enum_map::EnumMap<DeleteFailed, IntCounter>,
1505 :
1506 : rare_counters: enum_map::EnumMap<RareEvent, IntCounter>,
1507 : inits_cancelled: metrics::core::GenericCounter<metrics::core::AtomicU64>,
1508 : redownload_after: metrics::Histogram,
1509 : }
1510 :
1511 : impl Default for LayerImplMetrics {
1512 12 : fn default() -> Self {
1513 12 : use enum_map::Enum;
1514 12 :
1515 12 : // reminder: these will be pageserver_layer_* with "_total" suffix
1516 12 :
1517 12 : let started_evictions = metrics::register_int_counter!(
1518 12 : "pageserver_layer_started_evictions",
1519 12 : "Evictions started in the Layer implementation"
1520 12 : )
1521 12 : .unwrap();
1522 12 : let completed_evictions = metrics::register_int_counter!(
1523 12 : "pageserver_layer_completed_evictions",
1524 12 : "Evictions completed in the Layer implementation"
1525 12 : )
1526 12 : .unwrap();
1527 12 :
1528 12 : let cancelled_evictions = metrics::register_int_counter_vec!(
1529 12 : "pageserver_layer_cancelled_evictions_count",
1530 12 : "Different reasons for evictions to have been cancelled or failed",
1531 12 : &["reason"]
1532 12 : )
1533 12 : .unwrap();
1534 12 :
1535 96 : let cancelled_evictions = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1536 96 : let reason = EvictionCancelled::from_usize(i);
1537 96 : let s = reason.as_str();
1538 96 : cancelled_evictions.with_label_values(&[s])
1539 96 : }));
1540 12 :
1541 12 : let started_deletes = metrics::register_int_counter!(
1542 12 : "pageserver_layer_started_deletes",
1543 12 : "Deletions on drop pending in the Layer implementation"
1544 12 : )
1545 12 : .unwrap();
1546 12 : let completed_deletes = metrics::register_int_counter!(
1547 12 : "pageserver_layer_completed_deletes",
1548 12 : "Deletions on drop completed in the Layer implementation"
1549 12 : )
1550 12 : .unwrap();
1551 12 :
1552 12 : let failed_deletes = metrics::register_int_counter_vec!(
1553 12 : "pageserver_layer_failed_deletes_count",
1554 12 : "Different reasons for deletions on drop to have failed",
1555 12 : &["reason"]
1556 12 : )
1557 12 : .unwrap();
1558 12 :
1559 24 : let failed_deletes = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1560 24 : let reason = DeleteFailed::from_usize(i);
1561 24 : let s = reason.as_str();
1562 24 : failed_deletes.with_label_values(&[s])
1563 24 : }));
1564 12 :
1565 12 : let rare_counters = metrics::register_int_counter_vec!(
1566 12 : "pageserver_layer_assumed_rare_count",
1567 12 : "Times unexpected or assumed rare event happened",
1568 12 : &["event"]
1569 12 : )
1570 12 : .unwrap();
1571 12 :
1572 84 : let rare_counters = enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1573 84 : let event = RareEvent::from_usize(i);
1574 84 : let s = event.as_str();
1575 84 : rare_counters.with_label_values(&[s])
1576 84 : }));
1577 12 :
1578 12 : let inits_cancelled = metrics::register_int_counter!(
1579 12 : "pageserver_layer_inits_cancelled_count",
1580 12 : "Times Layer initialization was cancelled",
1581 12 : )
1582 12 : .unwrap();
1583 12 :
1584 12 : let redownload_after = {
1585 12 : let minute = 60.0;
1586 12 : let hour = 60.0 * minute;
1587 12 : metrics::register_histogram!(
1588 12 : "pageserver_layer_redownloaded_after",
1589 12 : "Time between evicting and re-downloading.",
1590 12 : vec![
1591 12 : 10.0,
1592 12 : 30.0,
1593 12 : minute,
1594 12 : 5.0 * minute,
1595 12 : 15.0 * minute,
1596 12 : 30.0 * minute,
1597 12 : hour,
1598 12 : 12.0 * hour,
1599 12 : ]
1600 12 : )
1601 12 : .unwrap()
1602 12 : };
1603 12 :
1604 12 : Self {
1605 12 : started_evictions,
1606 12 : completed_evictions,
1607 12 : cancelled_evictions,
1608 12 :
1609 12 : started_deletes,
1610 12 : completed_deletes,
1611 12 : failed_deletes,
1612 12 :
1613 12 : rare_counters,
1614 12 : inits_cancelled,
1615 12 : redownload_after,
1616 12 : }
1617 12 : }
1618 : }
1619 :
1620 : impl LayerImplMetrics {
1621 8 : fn inc_started_evictions(&self) {
1622 8 : self.started_evictions.inc();
1623 8 : }
1624 6 : fn inc_completed_evictions(&self) {
1625 6 : self.completed_evictions.inc();
1626 6 : }
1627 2 : fn inc_eviction_cancelled(&self, reason: EvictionCancelled) {
1628 2 : self.cancelled_evictions[reason].inc()
1629 2 : }
1630 :
1631 302 : fn inc_started_deletes(&self) {
1632 302 : self.started_deletes.inc();
1633 302 : }
1634 302 : fn inc_completed_deletes(&self) {
1635 302 : self.completed_deletes.inc();
1636 302 : }
1637 0 : fn inc_deletes_failed(&self, reason: DeleteFailed) {
1638 0 : self.failed_deletes[reason].inc();
1639 0 : }
1640 :
1641 : /// Counted separatedly from failed layer deletes because we will complete the layer deletion
1642 : /// attempt regardless of failure to delete local file.
1643 0 : fn inc_delete_removes_failed(&self) {
1644 0 : self.rare_counters[RareEvent::RemoveOnDropFailed].inc();
1645 0 : }
1646 :
1647 : /// Expected rare because requires a race with `evict_blocking` and `get_or_maybe_download`.
1648 2 : fn inc_retried_get_or_maybe_download(&self) {
1649 2 : self.rare_counters[RareEvent::RetriedGetOrMaybeDownload].inc();
1650 2 : }
1651 :
1652 : /// Expected rare because cancellations are unexpected, and failures are unexpected
1653 0 : fn inc_download_failed_without_requester(&self) {
1654 0 : self.rare_counters[RareEvent::DownloadFailedWithoutRequester].inc();
1655 0 : }
1656 :
1657 : /// The Weak in ResidentOrWantedEvicted::WantedEvicted was successfully upgraded.
1658 : ///
1659 : /// If this counter is always zero, we should replace ResidentOrWantedEvicted type with an
1660 : /// Option.
1661 0 : fn inc_raced_wanted_evicted_accesses(&self) {
1662 0 : self.rare_counters[RareEvent::UpgradedWantedEvicted].inc();
1663 0 : }
1664 :
1665 : /// These are only expected for [`Self::inc_init_cancelled`] amount when
1666 : /// running with remote storage.
1667 2 : fn inc_init_needed_no_download(&self) {
1668 2 : self.rare_counters[RareEvent::InitWithoutDownload].inc();
1669 2 : }
1670 :
1671 : /// Expected rare because all layer files should be readable and good
1672 0 : fn inc_permanent_loading_failures(&self) {
1673 0 : self.rare_counters[RareEvent::PermanentLoadingFailure].inc();
1674 0 : }
1675 :
1676 2 : fn inc_broadcast_lagged(&self) {
1677 2 : self.rare_counters[RareEvent::EvictAndWaitLagged].inc();
1678 2 : }
1679 :
1680 4 : fn inc_init_cancelled(&self) {
1681 4 : self.inits_cancelled.inc()
1682 4 : }
1683 :
1684 0 : fn record_redownloaded_after(&self, duration: std::time::Duration) {
1685 0 : self.redownload_after.observe(duration.as_secs_f64())
1686 0 : }
1687 : }
1688 :
1689 98 : #[derive(enum_map::Enum)]
1690 : enum EvictionCancelled {
1691 : LayerGone,
1692 : TimelineGone,
1693 : VersionCheckFailed,
1694 : FileNotFound,
1695 : RemoveFailed,
1696 : AlreadyReinitialized,
1697 : /// Not evicted because of a pending reinitialization
1698 : LostToDownload,
1699 : /// After eviction, there was a new layer access which cancelled the eviction.
1700 : UpgradedBackOnAccess,
1701 : }
1702 :
1703 : impl EvictionCancelled {
1704 96 : fn as_str(&self) -> &'static str {
1705 96 : match self {
1706 12 : EvictionCancelled::LayerGone => "layer_gone",
1707 12 : EvictionCancelled::TimelineGone => "timeline_gone",
1708 12 : EvictionCancelled::VersionCheckFailed => "version_check_fail",
1709 12 : EvictionCancelled::FileNotFound => "file_not_found",
1710 12 : EvictionCancelled::RemoveFailed => "remove_failed",
1711 12 : EvictionCancelled::AlreadyReinitialized => "already_reinitialized",
1712 12 : EvictionCancelled::LostToDownload => "lost_to_download",
1713 12 : EvictionCancelled::UpgradedBackOnAccess => "upgraded_back_on_access",
1714 : }
1715 96 : }
1716 : }
1717 :
1718 24 : #[derive(enum_map::Enum)]
1719 : enum DeleteFailed {
1720 : TimelineGone,
1721 : DeleteSchedulingFailed,
1722 : }
1723 :
1724 : impl DeleteFailed {
1725 24 : fn as_str(&self) -> &'static str {
1726 24 : match self {
1727 12 : DeleteFailed::TimelineGone => "timeline_gone",
1728 12 : DeleteFailed::DeleteSchedulingFailed => "delete_scheduling_failed",
1729 : }
1730 24 : }
1731 : }
1732 :
1733 90 : #[derive(enum_map::Enum)]
1734 : enum RareEvent {
1735 : RemoveOnDropFailed,
1736 : RetriedGetOrMaybeDownload,
1737 : DownloadFailedWithoutRequester,
1738 : UpgradedWantedEvicted,
1739 : InitWithoutDownload,
1740 : PermanentLoadingFailure,
1741 : EvictAndWaitLagged,
1742 : }
1743 :
1744 : impl RareEvent {
1745 84 : fn as_str(&self) -> &'static str {
1746 84 : use RareEvent::*;
1747 84 :
1748 84 : match self {
1749 12 : RemoveOnDropFailed => "remove_on_drop_failed",
1750 12 : RetriedGetOrMaybeDownload => "retried_gomd",
1751 12 : DownloadFailedWithoutRequester => "download_failed_without",
1752 12 : UpgradedWantedEvicted => "raced_wanted_evicted",
1753 12 : InitWithoutDownload => "init_needed_no_download",
1754 12 : PermanentLoadingFailure => "permanent_loading_failure",
1755 12 : EvictAndWaitLagged => "broadcast_lagged",
1756 : }
1757 84 : }
1758 : }
1759 :
1760 : pub(crate) static LAYER_IMPL_METRICS: once_cell::sync::Lazy<LayerImplMetrics> =
1761 : once_cell::sync::Lazy::new(LayerImplMetrics::default);
|