LCOV - differential code coverage report
Current view: top level - pageserver/src - metrics.rs (source / functions) Coverage Total Hit UBC CBC
Current: cd44433dd675caa99df17a61b18949c8387e2242.info Lines: 96.9 % 1631 1580 51 1580
Current Date: 2024-01-09 02:06:09 Functions: 86.3 % 313 270 43 270
Baseline: 66c52a629a0f4a503e193045e0df4c77139e344b.info
Baseline Date: 2024-01-08 15:34:46

           TLA  Line data    Source code
       1                 : use enum_map::EnumMap;
       2                 : use metrics::metric_vec_duration::DurationResultObserver;
       3                 : use metrics::{
       4                 :     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
       5                 :     register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
       6                 :     register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
       7                 :     Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
       8                 :     IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
       9                 : };
      10                 : use once_cell::sync::Lazy;
      11                 : use pageserver_api::shard::TenantShardId;
      12                 : use strum::{EnumCount, IntoEnumIterator, VariantNames};
      13                 : use strum_macros::{EnumVariantNames, IntoStaticStr};
      14                 : use utils::id::{TenantId, TimelineId};
      15                 : 
      16                 : /// Prometheus histogram buckets (in seconds) for operations in the critical
      17                 : /// path. In other words, operations that directly affect that latency of user
      18                 : /// queries.
      19                 : ///
      20                 : /// The buckets capture the majority of latencies in the microsecond and
      21                 : /// millisecond range but also extend far enough up to distinguish "bad" from
      22                 : /// "really bad".
      23                 : const CRITICAL_OP_BUCKETS: &[f64] = &[
      24                 :     0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
      25                 :     0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
      26                 :     1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
      27                 : ];
      28                 : 
      29                 : // Metrics collected on operations on the storage repository.
      30 CBC        9461 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
      31                 : #[strum(serialize_all = "kebab_case")]
      32                 : pub(crate) enum StorageTimeOperation {
      33                 :     #[strum(serialize = "layer flush")]
      34                 :     LayerFlush,
      35                 : 
      36                 :     #[strum(serialize = "compact")]
      37                 :     Compact,
      38                 : 
      39                 :     #[strum(serialize = "create images")]
      40                 :     CreateImages,
      41                 : 
      42                 :     #[strum(serialize = "logical size")]
      43                 :     LogicalSize,
      44                 : 
      45                 :     #[strum(serialize = "imitate logical size")]
      46                 :     ImitateLogicalSize,
      47                 : 
      48                 :     #[strum(serialize = "load layer map")]
      49                 :     LoadLayerMap,
      50                 : 
      51                 :     #[strum(serialize = "gc")]
      52                 :     Gc,
      53                 : 
      54                 :     #[strum(serialize = "create tenant")]
      55                 :     CreateTenant,
      56                 : }
      57                 : 
      58             560 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
      59             560 :     register_counter_vec!(
      60             560 :         "pageserver_storage_operations_seconds_sum",
      61             560 :         "Total time spent on storage operations with operation, tenant and timeline dimensions",
      62             560 :         &["operation", "tenant_id", "timeline_id"],
      63             560 :     )
      64             560 :     .expect("failed to define a metric")
      65             560 : });
      66                 : 
      67             560 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
      68             560 :     register_int_counter_vec!(
      69             560 :         "pageserver_storage_operations_seconds_count",
      70             560 :         "Count of storage operations with operation, tenant and timeline dimensions",
      71             560 :         &["operation", "tenant_id", "timeline_id"],
      72             560 :     )
      73             560 :     .expect("failed to define a metric")
      74             560 : });
      75                 : 
      76                 : // Buckets for background operations like compaction, GC, size calculation
      77                 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
      78                 : 
      79             560 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
      80             560 :     register_histogram_vec!(
      81             560 :         "pageserver_storage_operations_seconds_global",
      82             560 :         "Time spent on storage operations",
      83             560 :         &["operation"],
      84             560 :         STORAGE_OP_BUCKETS.into(),
      85             560 :     )
      86             560 :     .expect("failed to define a metric")
      87             560 : });
      88                 : 
      89             593 : pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
      90             593 :     register_histogram!(
      91             593 :         "pageserver_read_num_fs_layers",
      92             593 :         "Number of persistent layers accessed for processing a read request, including those in the cache",
      93             593 :         vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
      94             593 :     )
      95             593 :     .expect("failed to define a metric")
      96             593 : });
      97                 : 
      98                 : // Metrics collected on operations on the storage repository.
      99                 : 
     100                 : pub(crate) struct ReconstructTimeMetrics {
     101                 :     ok: Histogram,
     102                 :     err: Histogram,
     103                 : }
     104                 : 
     105             593 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
     106             593 :     let inner = register_histogram_vec!(
     107             593 :         "pageserver_getpage_reconstruct_seconds",
     108             593 :         "Time spent in reconstruct_value (reconstruct a page from deltas)",
     109             593 :         &["result"],
     110             593 :         CRITICAL_OP_BUCKETS.into(),
     111             593 :     )
     112             593 :     .expect("failed to define a metric");
     113             593 :     ReconstructTimeMetrics {
     114             593 :         ok: inner.get_metric_with_label_values(&["ok"]).unwrap(),
     115             593 :         err: inner.get_metric_with_label_values(&["err"]).unwrap(),
     116             593 :     }
     117             593 : });
     118                 : 
     119                 : impl ReconstructTimeMetrics {
     120         5861451 :     pub(crate) fn for_result<T, E>(&self, result: &Result<T, E>) -> &Histogram {
     121         5861451 :         match result {
     122         5861451 :             Ok(_) => &self.ok,
     123 UBC           0 :             Err(_) => &self.err,
     124                 :         }
     125 CBC     5861451 :     }
     126                 : }
     127                 : 
     128             557 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
     129             557 :     register_int_counter!(
     130             557 :         "pageserver_materialized_cache_hits_direct_total",
     131             557 :         "Number of cache hits from materialized page cache without redo",
     132             557 :     )
     133             557 :     .expect("failed to define a metric")
     134             557 : });
     135                 : 
     136             546 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<Histogram> = Lazy::new(|| {
     137             546 :     register_histogram!(
     138             546 :         "pageserver_getpage_get_reconstruct_data_seconds",
     139             546 :         "Time spent in get_reconstruct_value_data",
     140             546 :         CRITICAL_OP_BUCKETS.into(),
     141             546 :     )
     142             546 :     .expect("failed to define a metric")
     143             546 : });
     144                 : 
     145             557 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
     146             557 :     register_int_counter!(
     147             557 :         "pageserver_materialized_cache_hits_total",
     148             557 :         "Number of cache hits from materialized page cache",
     149             557 :     )
     150             557 :     .expect("failed to define a metric")
     151             557 : });
     152                 : 
     153                 : pub(crate) struct PageCacheMetricsForTaskKind {
     154                 :     pub read_accesses_materialized_page: IntCounter,
     155                 :     pub read_accesses_immutable: IntCounter,
     156                 : 
     157                 :     pub read_hits_immutable: IntCounter,
     158                 :     pub read_hits_materialized_page_exact: IntCounter,
     159                 :     pub read_hits_materialized_page_older_lsn: IntCounter,
     160                 : }
     161                 : 
     162                 : pub(crate) struct PageCacheMetrics {
     163                 :     map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
     164                 : }
     165                 : 
     166             547 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
     167             547 :     register_int_counter_vec!(
     168             547 :         "pageserver_page_cache_read_hits_total",
     169             547 :         "Number of read accesses to the page cache that hit",
     170             547 :         &["task_kind", "key_kind", "content_kind", "hit_kind"]
     171             547 :     )
     172             547 :     .expect("failed to define a metric")
     173             547 : });
     174                 : 
     175             547 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
     176             547 :     register_int_counter_vec!(
     177             547 :         "pageserver_page_cache_read_accesses_total",
     178             547 :         "Number of read accesses to the page cache",
     179             547 :         &["task_kind", "key_kind", "content_kind"]
     180             547 :     )
     181             547 :     .expect("failed to define a metric")
     182             547 : });
     183                 : 
     184             547 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
     185           14259 :     map: EnumMap::from_array(std::array::from_fn(|task_kind| {
     186           14259 :         let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
     187           14259 :         let task_kind: &'static str = task_kind.into();
     188           85554 :         EnumMap::from_array(std::array::from_fn(|content_kind| {
     189           85554 :             let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
     190           85554 :             let content_kind: &'static str = content_kind.into();
     191           85554 :             PageCacheMetricsForTaskKind {
     192           85554 :                 read_accesses_materialized_page: {
     193           85554 :                     PAGE_CACHE_READ_ACCESSES
     194           85554 :                         .get_metric_with_label_values(&[
     195           85554 :                             task_kind,
     196           85554 :                             "materialized_page",
     197           85554 :                             content_kind,
     198           85554 :                         ])
     199           85554 :                         .unwrap()
     200           85554 :                 },
     201           85554 : 
     202           85554 :                 read_accesses_immutable: {
     203           85554 :                     PAGE_CACHE_READ_ACCESSES
     204           85554 :                         .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
     205           85554 :                         .unwrap()
     206           85554 :                 },
     207           85554 : 
     208           85554 :                 read_hits_immutable: {
     209           85554 :                     PAGE_CACHE_READ_HITS
     210           85554 :                         .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
     211           85554 :                         .unwrap()
     212           85554 :                 },
     213           85554 : 
     214           85554 :                 read_hits_materialized_page_exact: {
     215           85554 :                     PAGE_CACHE_READ_HITS
     216           85554 :                         .get_metric_with_label_values(&[
     217           85554 :                             task_kind,
     218           85554 :                             "materialized_page",
     219           85554 :                             content_kind,
     220           85554 :                             "exact",
     221           85554 :                         ])
     222           85554 :                         .unwrap()
     223           85554 :                 },
     224           85554 : 
     225           85554 :                 read_hits_materialized_page_older_lsn: {
     226           85554 :                     PAGE_CACHE_READ_HITS
     227           85554 :                         .get_metric_with_label_values(&[
     228           85554 :                             task_kind,
     229           85554 :                             "materialized_page",
     230           85554 :                             content_kind,
     231           85554 :                             "older_lsn",
     232           85554 :                         ])
     233           85554 :                         .unwrap()
     234           85554 :                 },
     235           85554 :             }
     236           85554 :         }))
     237           14259 :     })),
     238             547 : });
     239                 : 
     240                 : impl PageCacheMetrics {
     241       313684531 :     pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
     242       313684531 :         &self.map[ctx.task_kind()][ctx.page_content_kind()]
     243       313684531 :     }
     244                 : }
     245                 : 
     246                 : pub(crate) struct PageCacheSizeMetrics {
     247                 :     pub max_bytes: UIntGauge,
     248                 : 
     249                 :     pub current_bytes_immutable: UIntGauge,
     250                 :     pub current_bytes_materialized_page: UIntGauge,
     251                 : }
     252                 : 
     253             594 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
     254             594 :     register_uint_gauge_vec!(
     255             594 :         "pageserver_page_cache_size_current_bytes",
     256             594 :         "Current size of the page cache in bytes, by key kind",
     257             594 :         &["key_kind"]
     258             594 :     )
     259             594 :     .expect("failed to define a metric")
     260             594 : });
     261                 : 
     262                 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
     263             594 :     Lazy::new(|| PageCacheSizeMetrics {
     264             594 :         max_bytes: {
     265             594 :             register_uint_gauge!(
     266             594 :                 "pageserver_page_cache_size_max_bytes",
     267             594 :                 "Maximum size of the page cache in bytes"
     268             594 :             )
     269             594 :             .expect("failed to define a metric")
     270             594 :         },
     271             594 :         current_bytes_immutable: {
     272             594 :             PAGE_CACHE_SIZE_CURRENT_BYTES
     273             594 :                 .get_metric_with_label_values(&["immutable"])
     274             594 :                 .unwrap()
     275             594 :         },
     276             594 :         current_bytes_materialized_page: {
     277             594 :             PAGE_CACHE_SIZE_CURRENT_BYTES
     278             594 :                 .get_metric_with_label_values(&["materialized_page"])
     279             594 :                 .unwrap()
     280             594 :         },
     281             594 :     });
     282                 : 
     283                 : pub(crate) mod page_cache_eviction_metrics {
     284                 :     use std::num::NonZeroUsize;
     285                 : 
     286                 :     use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
     287                 :     use once_cell::sync::Lazy;
     288                 : 
     289 UBC           0 :     #[derive(Clone, Copy)]
     290                 :     pub(crate) enum Outcome {
     291                 :         FoundSlotUnused { iters: NonZeroUsize },
     292                 :         FoundSlotEvicted { iters: NonZeroUsize },
     293                 :         ItersExceeded { iters: NonZeroUsize },
     294                 :     }
     295                 : 
     296 CBC         524 :     static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
     297             524 :         register_int_counter_vec!(
     298             524 :             "pageserver_page_cache_find_victim_iters_total",
     299             524 :             "Counter for the number of iterations in the find_victim loop",
     300             524 :             &["outcome"],
     301             524 :         )
     302             524 :         .expect("failed to define a metric")
     303             524 :     });
     304                 : 
     305             524 :     static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
     306             524 :         register_int_counter_vec!(
     307             524 :             "pageserver_page_cache_find_victim_calls",
     308             524 :             "Incremented at the end of each find_victim() call.\
     309             524 :              Filter by outcome to get e.g., eviction rate.",
     310             524 :             &["outcome"]
     311             524 :         )
     312             524 :         .unwrap()
     313             524 :     });
     314                 : 
     315         9888454 :     pub(crate) fn observe(outcome: Outcome) {
     316         9888454 :         macro_rules! dry {
     317         9888454 :             ($label:literal, $iters:expr) => {{
     318         9888454 :                 static LABEL: &'static str = $label;
     319         9888454 :                 static ITERS_TOTAL: Lazy<IntCounter> =
     320         9888454 :                     Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
     321         9888454 :                 static CALLS: Lazy<IntCounter> =
     322         9888454 :                     Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
     323         9888454 :                 ITERS_TOTAL.inc_by(($iters.get()) as u64);
     324         9888454 :                 CALLS.inc();
     325         9888454 :             }};
     326         9888454 :         }
     327         9888454 :         match outcome {
     328         2592714 :             Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
     329         7295740 :             Outcome::FoundSlotEvicted { iters } => {
     330         7295740 :                 dry!("found_evicted", iters)
     331                 :             }
     332 UBC           0 :             Outcome::ItersExceeded { iters } => {
     333               0 :                 dry!("err_iters_exceeded", iters);
     334               0 :                 super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
     335               0 :             }
     336                 :         }
     337 CBC     9888454 :     }
     338                 : }
     339                 : 
     340             547 : pub(crate) static PAGE_CACHE_ACQUIRE_PINNED_SLOT_TIME: Lazy<Histogram> = Lazy::new(|| {
     341             547 :     register_histogram!(
     342             547 :         "pageserver_page_cache_acquire_pinned_slot_seconds",
     343             547 :         "Time spent acquiring a pinned slot in the page cache",
     344             547 :         CRITICAL_OP_BUCKETS.into(),
     345             547 :     )
     346             547 :     .expect("failed to define a metric")
     347             547 : });
     348                 : 
     349 UBC           0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
     350               0 :     register_int_counter_vec!(
     351               0 :         "page_cache_errors_total",
     352               0 :         "Number of timeouts while acquiring a pinned slot in the page cache",
     353               0 :         &["error_kind"]
     354               0 :     )
     355               0 :     .expect("failed to define a metric")
     356               0 : });
     357                 : 
     358               0 : #[derive(IntoStaticStr)]
     359                 : #[strum(serialize_all = "kebab_case")]
     360                 : pub(crate) enum PageCacheErrorKind {
     361                 :     AcquirePinnedSlotTimeout,
     362                 :     EvictIterLimit,
     363                 : }
     364                 : 
     365               0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
     366               0 :     PAGE_CACHE_ERRORS
     367               0 :         .get_metric_with_label_values(&[error_kind.into()])
     368               0 :         .unwrap()
     369               0 :         .inc();
     370               0 : }
     371                 : 
     372 CBC         562 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
     373             562 :     register_histogram!(
     374             562 :         "pageserver_wait_lsn_seconds",
     375             562 :         "Time spent waiting for WAL to arrive",
     376             562 :         CRITICAL_OP_BUCKETS.into(),
     377             562 :     )
     378             562 :     .expect("failed to define a metric")
     379             562 : });
     380                 : 
     381             560 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
     382             560 :     register_int_gauge_vec!(
     383             560 :         "pageserver_last_record_lsn",
     384             560 :         "Last record LSN grouped by timeline",
     385             560 :         &["tenant_id", "timeline_id"]
     386             560 :     )
     387             560 :     .expect("failed to define a metric")
     388             560 : });
     389                 : 
     390             560 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
     391             560 :     register_uint_gauge_vec!(
     392             560 :         "pageserver_resident_physical_size",
     393             560 :         "The size of the layer files present in the pageserver's filesystem.",
     394             560 :         &["tenant_id", "timeline_id"]
     395             560 :     )
     396             560 :     .expect("failed to define a metric")
     397             560 : });
     398                 : 
     399             557 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
     400             557 :     register_uint_gauge!(
     401             557 :         "pageserver_resident_physical_size_global",
     402             557 :         "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
     403             557 :     )
     404             557 :     .expect("failed to define a metric")
     405             557 : });
     406                 : 
     407             569 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
     408             569 :     register_uint_gauge_vec!(
     409             569 :         "pageserver_remote_physical_size",
     410             569 :         "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
     411             569 :         // Corollary: If any files are missing from the index part, they won't be included here.
     412             569 :         &["tenant_id", "timeline_id"]
     413             569 :     )
     414             569 :     .expect("failed to define a metric")
     415             569 : });
     416                 : 
     417             560 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
     418             560 :     register_uint_gauge!(
     419             560 :         "pageserver_remote_physical_size_global",
     420             560 :         "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
     421             560 :     )
     422             560 :     .expect("failed to define a metric")
     423             560 : });
     424                 : 
     425              72 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
     426              72 :     register_int_counter!(
     427              72 :         "pageserver_remote_ondemand_downloaded_layers_total",
     428              72 :         "Total on-demand downloaded layers"
     429              72 :     )
     430              72 :     .unwrap()
     431              72 : });
     432                 : 
     433              72 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
     434              72 :     register_int_counter!(
     435              72 :         "pageserver_remote_ondemand_downloaded_bytes_total",
     436              72 :         "Total bytes of layers on-demand downloaded",
     437              72 :     )
     438              72 :     .unwrap()
     439              72 : });
     440                 : 
     441             560 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
     442             560 :     register_uint_gauge_vec!(
     443             560 :         "pageserver_current_logical_size",
     444             560 :         "Current logical size grouped by timeline",
     445             560 :         &["tenant_id", "timeline_id"]
     446             560 :     )
     447             560 :     .expect("failed to define current logical size metric")
     448             560 : });
     449                 : 
     450                 : pub(crate) mod initial_logical_size {
     451                 :     use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
     452                 :     use once_cell::sync::Lazy;
     453                 : 
     454                 :     pub(crate) struct StartCalculation(IntCounterVec);
     455             547 :     pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
     456             547 :         StartCalculation(
     457             547 :             register_int_counter_vec!(
     458             547 :                 "pageserver_initial_logical_size_start_calculation",
     459             547 :                 "Incremented each time we start an initial logical size calculation attempt. \
     460             547 :                  The `circumstances` label provides some additional details.",
     461             547 :                 &["attempt", "circumstances"]
     462             547 :             )
     463             547 :             .unwrap(),
     464             547 :         )
     465             547 :     });
     466                 : 
     467                 :     struct DropCalculation {
     468                 :         first: IntCounter,
     469                 :         retry: IntCounter,
     470                 :     }
     471                 : 
     472             547 :     static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
     473             547 :         let vec = register_int_counter_vec!(
     474             547 :             "pageserver_initial_logical_size_drop_calculation",
     475             547 :             "Incremented each time we abort a started size calculation attmpt.",
     476             547 :             &["attempt"]
     477             547 :         )
     478             547 :         .unwrap();
     479             547 :         DropCalculation {
     480             547 :             first: vec.with_label_values(&["first"]),
     481             547 :             retry: vec.with_label_values(&["retry"]),
     482             547 :         }
     483             547 :     });
     484                 : 
     485                 :     pub(crate) struct Calculated {
     486                 :         pub(crate) births: IntCounter,
     487                 :         pub(crate) deaths: IntCounter,
     488                 :     }
     489                 : 
     490             543 :     pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
     491             543 :         births: register_int_counter!(
     492             543 :             "pageserver_initial_logical_size_finish_calculation",
     493             543 :             "Incremented every time we finish calculation of initial logical size.\
     494             543 :              If everything is working well, this should happen at most once per Timeline object."
     495             543 :         )
     496             543 :         .unwrap(),
     497             543 :         deaths: register_int_counter!(
     498             543 :             "pageserver_initial_logical_size_drop_finished_calculation",
     499             543 :             "Incremented when we drop a finished initial logical size calculation result.\
     500             543 :              Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
     501             543 :         )
     502             543 :         .unwrap(),
     503             543 :     });
     504                 : 
     505                 :     pub(crate) struct OngoingCalculationGuard {
     506                 :         inc_drop_calculation: Option<IntCounter>,
     507                 :     }
     508                 : 
     509            1148 :     #[derive(strum_macros::IntoStaticStr)]
     510                 :     pub(crate) enum StartCircumstances {
     511                 :         EmptyInitial,
     512                 :         SkippedConcurrencyLimiter,
     513                 :         AfterBackgroundTasksRateLimit,
     514                 :     }
     515                 : 
     516                 :     impl StartCalculation {
     517            1148 :         pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
     518            1148 :             let circumstances_label: &'static str = circumstances.into();
     519            1148 :             self.0
     520            1148 :                 .with_label_values(&["first", circumstances_label])
     521            1148 :                 .inc();
     522            1148 :             OngoingCalculationGuard {
     523            1148 :                 inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
     524            1148 :             }
     525            1148 :         }
     526 UBC           0 :         pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
     527               0 :             let circumstances_label: &'static str = circumstances.into();
     528               0 :             self.0
     529               0 :                 .with_label_values(&["retry", circumstances_label])
     530               0 :                 .inc();
     531               0 :             OngoingCalculationGuard {
     532               0 :                 inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
     533               0 :             }
     534               0 :         }
     535                 :     }
     536                 : 
     537                 :     impl Drop for OngoingCalculationGuard {
     538 CBC        1144 :         fn drop(&mut self) {
     539            1144 :             if let Some(counter) = self.inc_drop_calculation.take() {
     540              23 :                 counter.inc();
     541            1121 :             }
     542            1144 :         }
     543                 :     }
     544                 : 
     545                 :     impl OngoingCalculationGuard {
     546            1121 :         pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
     547            1121 :             drop(self.inc_drop_calculation.take());
     548            1121 :             CALCULATED.births.inc();
     549            1121 :             FinishedCalculationGuard {
     550            1121 :                 inc_on_drop: CALCULATED.deaths.clone(),
     551            1121 :             }
     552            1121 :         }
     553                 :     }
     554                 : 
     555                 :     pub(crate) struct FinishedCalculationGuard {
     556                 :         inc_on_drop: IntCounter,
     557                 :     }
     558                 : 
     559                 :     impl Drop for FinishedCalculationGuard {
     560             223 :         fn drop(&mut self) {
     561             223 :             self.inc_on_drop.inc();
     562             223 :         }
     563                 :     }
     564                 : 
     565                 :     // context: https://github.com/neondatabase/neon/issues/5963
     566                 :     pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
     567              56 :         Lazy::new(|| {
     568              56 :             register_int_counter!(
     569              56 :                 "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
     570              56 :                 "Counter for the following event: walreceiver calls\
     571              56 :                  Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
     572              56 :             )
     573              56 :             .unwrap()
     574              56 :         });
     575                 : }
     576                 : 
     577             576 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
     578             576 :     register_uint_gauge_vec!(
     579             576 :         "pageserver_tenant_states_count",
     580             576 :         "Count of tenants per state",
     581             576 :         &["state"]
     582             576 :     )
     583             576 :     .expect("Failed to register pageserver_tenant_states_count metric")
     584             576 : });
     585                 : 
     586                 : /// A set of broken tenants.
     587                 : ///
     588                 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
     589                 : /// tenant.
     590             576 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
     591             576 :     register_uint_gauge_vec!(
     592             576 :         "pageserver_broken_tenants_count",
     593             576 :         "Set of broken tenants",
     594             576 :         &["tenant_id"]
     595             576 :     )
     596             576 :     .expect("Failed to register pageserver_tenant_states_count metric")
     597             576 : });
     598                 : 
     599             157 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
     600             157 :     register_uint_gauge_vec!(
     601             157 :         "pageserver_tenant_synthetic_cached_size_bytes",
     602             157 :         "Synthetic size of each tenant in bytes",
     603             157 :         &["tenant_id"]
     604             157 :     )
     605             157 :     .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
     606             157 : });
     607                 : 
     608                 : // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
     609                 : // or in testing they estimate how much we would upload if we did.
     610             560 : static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
     611             560 :     register_int_counter_vec!(
     612             560 :         "pageserver_created_persistent_files_total",
     613             560 :         "Number of files created that are meant to be uploaded to cloud storage",
     614             560 :         &["tenant_id", "timeline_id"]
     615             560 :     )
     616             560 :     .expect("failed to define a metric")
     617             560 : });
     618                 : 
     619             560 : static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
     620             560 :     register_int_counter_vec!(
     621             560 :         "pageserver_written_persistent_bytes_total",
     622             560 :         "Total bytes written that are meant to be uploaded to cloud storage",
     623             560 :         &["tenant_id", "timeline_id"]
     624             560 :     )
     625             560 :     .expect("failed to define a metric")
     626             560 : });
     627                 : 
     628               1 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
     629               1 :     register_histogram_vec!(
     630               1 :         "pageserver_eviction_iteration_duration_seconds_global",
     631               1 :         "Time spent on a single eviction iteration",
     632               1 :         &["period_secs", "threshold_secs"],
     633               1 :         STORAGE_OP_BUCKETS.into(),
     634               1 :     )
     635               1 :     .expect("failed to define a metric")
     636               1 : });
     637                 : 
     638             560 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
     639             560 :     register_int_counter_vec!(
     640             560 :         "pageserver_evictions",
     641             560 :         "Number of layers evicted from the pageserver",
     642             560 :         &["tenant_id", "timeline_id"]
     643             560 :     )
     644             560 :     .expect("failed to define a metric")
     645             560 : });
     646                 : 
     647             560 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
     648             560 :     register_int_counter_vec!(
     649             560 :         "pageserver_evictions_with_low_residence_duration",
     650             560 :         "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
     651             560 :          Residence duration is determined using the `residence_duration_data_source`.",
     652             560 :         &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
     653             560 :     )
     654             560 :     .expect("failed to define a metric")
     655             560 : });
     656                 : 
     657             557 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
     658             557 :     register_int_counter!(
     659             557 :         "pageserver_unexpected_ondemand_downloads_count",
     660             557 :         "Number of unexpected on-demand downloads. \
     661             557 :          We log more context for each increment, so, forgo any labels in this metric.",
     662             557 :     )
     663             557 :     .expect("failed to define a metric")
     664             557 : });
     665                 : 
     666                 : /// How long did we take to start up?  Broken down by labels to describe
     667                 : /// different phases of startup.
     668             557 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
     669             557 :     register_gauge_vec!(
     670             557 :         "pageserver_startup_duration_seconds",
     671             557 :         "Time taken by phases of pageserver startup, in seconds",
     672             557 :         &["phase"]
     673             557 :     )
     674             557 :     .expect("Failed to register pageserver_startup_duration_seconds metric")
     675             557 : });
     676                 : 
     677             557 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
     678             557 :     register_uint_gauge!(
     679             557 :         "pageserver_startup_is_loading",
     680             557 :         "1 while in initial startup load of tenants, 0 at other times"
     681             557 :     )
     682             557 :     .expect("Failed to register pageserver_startup_is_loading")
     683             557 : });
     684                 : 
     685                 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
     686                 : /// like how long it took to load.
     687                 : ///
     688                 : /// Note that these are process-global metrics, _not_ per-tenant metrics.  Per-tenant
     689                 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
     690                 : /// at a timeline level than tenant level.
     691                 : pub(crate) struct TenantMetrics {
     692                 :     /// How long did tenants take to go from construction to active state?
     693                 :     pub(crate) activation: Histogram,
     694                 :     pub(crate) preload: Histogram,
     695                 :     pub(crate) attach: Histogram,
     696                 : 
     697                 :     /// How many tenants are included in the initial startup of the pagesrever?
     698                 :     pub(crate) startup_scheduled: IntCounter,
     699                 :     pub(crate) startup_complete: IntCounter,
     700                 : }
     701                 : 
     702             557 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
     703             557 :     TenantMetrics {
     704             557 :     activation: register_histogram!(
     705             557 :         "pageserver_tenant_activation_seconds",
     706             557 :         "Time taken by tenants to activate, in seconds",
     707             557 :         CRITICAL_OP_BUCKETS.into()
     708             557 :     )
     709             557 :     .expect("Failed to register metric"),
     710             557 :     preload: register_histogram!(
     711             557 :         "pageserver_tenant_preload_seconds",
     712             557 :         "Time taken by tenants to load remote metadata on startup/attach, in seconds",
     713             557 :         CRITICAL_OP_BUCKETS.into()
     714             557 :     )
     715             557 :     .expect("Failed to register metric"),
     716             557 :     attach: register_histogram!(
     717             557 :         "pageserver_tenant_attach_seconds",
     718             557 :         "Time taken by tenants to intialize, after remote metadata is already loaded",
     719             557 :         CRITICAL_OP_BUCKETS.into()
     720             557 :     )
     721             557 :     .expect("Failed to register metric"),
     722             557 :     startup_scheduled: register_int_counter!(
     723             557 :         "pageserver_tenant_startup_scheduled",
     724             557 :         "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
     725             557 :     ).expect("Failed to register metric"),
     726             557 :     startup_complete: register_int_counter!(
     727             557 :         "pageserver_tenant_startup_complete",
     728             557 :         "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
     729             557 :          should eventually reach `pageserver_tenant_startup_scheduled_total`.  Does not include broken \
     730             557 :          tenants: such cases will lead to this metric never reaching the scheduled count."
     731             557 :     ).expect("Failed to register metric"),
     732             557 : }
     733             557 : });
     734                 : 
     735                 : /// Each `Timeline`'s  [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
     736 UBC           0 : #[derive(Debug)]
     737                 : pub(crate) struct EvictionsWithLowResidenceDuration {
     738                 :     data_source: &'static str,
     739                 :     threshold: Duration,
     740                 :     counter: Option<IntCounter>,
     741                 : }
     742                 : 
     743                 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
     744                 :     data_source: &'static str,
     745                 :     threshold: Duration,
     746                 : }
     747                 : 
     748                 : impl EvictionsWithLowResidenceDurationBuilder {
     749 CBC        1298 :     pub fn new(data_source: &'static str, threshold: Duration) -> Self {
     750            1298 :         Self {
     751            1298 :             data_source,
     752            1298 :             threshold,
     753            1298 :         }
     754            1298 :     }
     755                 : 
     756            1298 :     fn build(
     757            1298 :         &self,
     758            1298 :         tenant_id: &str,
     759            1298 :         shard_id: &str,
     760            1298 :         timeline_id: &str,
     761            1298 :     ) -> EvictionsWithLowResidenceDuration {
     762            1298 :         let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
     763            1298 :             .get_metric_with_label_values(&[
     764            1298 :                 tenant_id,
     765            1298 :                 shard_id,
     766            1298 :                 timeline_id,
     767            1298 :                 self.data_source,
     768            1298 :                 &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
     769            1298 :             ])
     770            1298 :             .unwrap();
     771            1298 :         EvictionsWithLowResidenceDuration {
     772            1298 :             data_source: self.data_source,
     773            1298 :             threshold: self.threshold,
     774            1298 :             counter: Some(counter),
     775            1298 :         }
     776            1298 :     }
     777                 : }
     778                 : 
     779                 : impl EvictionsWithLowResidenceDuration {
     780            1571 :     fn threshold_label_value(threshold: Duration) -> String {
     781            1571 :         format!("{}", threshold.as_secs())
     782            1571 :     }
     783                 : 
     784            2538 :     pub fn observe(&self, observed_value: Duration) {
     785            2538 :         if observed_value < self.threshold {
     786            2538 :             self.counter
     787            2538 :                 .as_ref()
     788            2538 :                 .expect("nobody calls this function after `remove_from_vec`")
     789            2538 :                 .inc();
     790            2538 :         }
     791            2538 :     }
     792                 : 
     793              52 :     pub fn change_threshold(
     794              52 :         &mut self,
     795              52 :         tenant_id: &str,
     796              52 :         shard_id: &str,
     797              52 :         timeline_id: &str,
     798              52 :         new_threshold: Duration,
     799              52 :     ) {
     800              52 :         if new_threshold == self.threshold {
     801              44 :             return;
     802               8 :         }
     803               8 :         let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
     804               8 :             self.data_source,
     805               8 :             new_threshold,
     806               8 :         )
     807               8 :         .build(tenant_id, shard_id, timeline_id);
     808               8 :         std::mem::swap(self, &mut with_new);
     809               8 :         with_new.remove(tenant_id, shard_id, timeline_id);
     810              52 :     }
     811                 : 
     812                 :     // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
     813             273 :     fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
     814             273 :         let Some(_counter) = self.counter.take() else {
     815 UBC           0 :             return;
     816                 :         };
     817                 : 
     818 CBC         273 :         let threshold = Self::threshold_label_value(self.threshold);
     819             273 : 
     820             273 :         let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
     821             273 :             tenant_id,
     822             273 :             shard_id,
     823             273 :             timeline_id,
     824             273 :             self.data_source,
     825             273 :             &threshold,
     826             273 :         ]);
     827             273 : 
     828             273 :         match removed {
     829 UBC           0 :             Err(e) => {
     830               0 :                 // this has been hit in staging as
     831               0 :                 // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
     832               0 :                 // because we can be in the drop path already, don't risk:
     833               0 :                 // - "double-panic => illegal instruction" or
     834               0 :                 // - future "drop panick => abort"
     835               0 :                 //
     836               0 :                 // so just nag: (the error has the labels)
     837               0 :                 tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
     838                 :             }
     839                 :             Ok(()) => {
     840                 :                 // to help identify cases where we double-remove the same values, let's log all
     841                 :                 // deletions?
     842 CBC         273 :                 tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
     843                 :             }
     844                 :         }
     845             273 :     }
     846                 : }
     847                 : 
     848                 : // Metrics collected on disk IO operations
     849                 : //
     850                 : // Roughly logarithmic scale.
     851                 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
     852                 :     0.000030, // 30 usec
     853                 :     0.001000, // 1000 usec
     854                 :     0.030,    // 30 ms
     855                 :     1.000,    // 1000 ms
     856                 :     30.000,   // 30000 ms
     857                 : ];
     858                 : 
     859                 : /// VirtualFile fs operation variants.
     860                 : ///
     861                 : /// Operations:
     862                 : /// - open ([`std::fs::OpenOptions::open`])
     863                 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
     864                 : /// - close-by-replace (close by replacement algorithm)
     865                 : /// - read (`read_at`)
     866                 : /// - write (`write_at`)
     867                 : /// - seek (modify internal position or file length query)
     868                 : /// - fsync ([`std::fs::File::sync_all`])
     869                 : /// - metadata ([`std::fs::File::metadata`])
     870                 : #[derive(
     871            5355 :     Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
     872                 : )]
     873                 : pub(crate) enum StorageIoOperation {
     874                 :     Open,
     875                 :     OpenAfterReplace,
     876                 :     Close,
     877                 :     CloseByReplace,
     878                 :     Read,
     879                 :     Write,
     880                 :     Seek,
     881                 :     Fsync,
     882                 :     Metadata,
     883                 : }
     884                 : 
     885                 : impl StorageIoOperation {
     886            5355 :     pub fn as_str(&self) -> &'static str {
     887            5355 :         match self {
     888             595 :             StorageIoOperation::Open => "open",
     889             595 :             StorageIoOperation::OpenAfterReplace => "open-after-replace",
     890             595 :             StorageIoOperation::Close => "close",
     891             595 :             StorageIoOperation::CloseByReplace => "close-by-replace",
     892             595 :             StorageIoOperation::Read => "read",
     893             595 :             StorageIoOperation::Write => "write",
     894             595 :             StorageIoOperation::Seek => "seek",
     895             595 :             StorageIoOperation::Fsync => "fsync",
     896             595 :             StorageIoOperation::Metadata => "metadata",
     897                 :         }
     898            5355 :     }
     899                 : }
     900                 : 
     901                 : /// Tracks time taken by fs operations near VirtualFile.
     902 UBC           0 : #[derive(Debug)]
     903                 : pub(crate) struct StorageIoTime {
     904                 :     metrics: [Histogram; StorageIoOperation::COUNT],
     905                 : }
     906                 : 
     907                 : impl StorageIoTime {
     908 CBC         595 :     fn new() -> Self {
     909             595 :         let storage_io_histogram_vec = register_histogram_vec!(
     910             595 :             "pageserver_io_operations_seconds",
     911             595 :             "Time spent in IO operations",
     912             595 :             &["operation"],
     913             595 :             STORAGE_IO_TIME_BUCKETS.into()
     914             595 :         )
     915             595 :         .expect("failed to define a metric");
     916            5355 :         let metrics = std::array::from_fn(|i| {
     917            5355 :             let op = StorageIoOperation::from_repr(i).unwrap();
     918            5355 :             storage_io_histogram_vec
     919            5355 :                 .get_metric_with_label_values(&[op.as_str()])
     920            5355 :                 .unwrap()
     921            5355 :         });
     922             595 :         Self { metrics }
     923             595 :     }
     924                 : 
     925        13005788 :     pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
     926        13005788 :         &self.metrics[op as usize]
     927        13005788 :     }
     928                 : }
     929                 : 
     930                 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
     931                 : 
     932                 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
     933                 : 
     934                 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
     935             595 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
     936             595 :     register_int_gauge_vec!(
     937             595 :         "pageserver_io_operations_bytes_total",
     938             595 :         "Total amount of bytes read/written in IO operations",
     939             595 :         &["operation", "tenant_id", "timeline_id"]
     940             595 :     )
     941             595 :     .expect("failed to define a metric")
     942             595 : });
     943                 : 
     944                 : pub(crate) mod virtual_file_descriptor_cache {
     945                 :     use super::*;
     946                 : 
     947             557 :     pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
     948             557 :         register_uint_gauge!(
     949             557 :             "pageserver_virtual_file_descriptor_cache_size_max",
     950             557 :             "Maximum number of open file descriptors in the cache."
     951             557 :         )
     952             557 :         .unwrap()
     953             557 :     });
     954                 : 
     955                 :     // SIZE_CURRENT: derive it like so:
     956                 :     // ```
     957                 :     // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
     958                 :     // -ignoring(operation)
     959                 :     // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
     960                 :     // ```
     961                 : }
     962                 : 
     963 UBC           0 : #[derive(Debug)]
     964                 : struct GlobalAndPerTimelineHistogram {
     965                 :     global: Histogram,
     966                 :     per_tenant_timeline: Histogram,
     967                 : }
     968                 : 
     969                 : impl GlobalAndPerTimelineHistogram {
     970 CBC     3641514 :     fn observe(&self, value: f64) {
     971         3641514 :         self.global.observe(value);
     972         3641514 :         self.per_tenant_timeline.observe(value);
     973         3641514 :     }
     974                 : }
     975                 : 
     976                 : struct GlobalAndPerTimelineHistogramTimer<'a> {
     977                 :     h: &'a GlobalAndPerTimelineHistogram,
     978                 :     start: std::time::Instant,
     979                 : }
     980                 : 
     981                 : impl<'a> Drop for GlobalAndPerTimelineHistogramTimer<'a> {
     982         3641514 :     fn drop(&mut self) {
     983         3641514 :         let elapsed = self.start.elapsed();
     984         3641514 :         self.h.observe(elapsed.as_secs_f64());
     985         3641514 :     }
     986                 : }
     987                 : 
     988                 : #[derive(
     989 UBC           0 :     Debug,
     990               0 :     Clone,
     991                 :     Copy,
     992 CBC       56784 :     IntoStaticStr,
     993                 :     strum_macros::EnumCount,
     994            2395 :     strum_macros::EnumIter,
     995           27860 :     strum_macros::FromRepr,
     996                 : )]
     997                 : #[strum(serialize_all = "snake_case")]
     998                 : pub enum SmgrQueryType {
     999                 :     GetRelExists,
    1000                 :     GetRelSize,
    1001                 :     GetPageAtLsn,
    1002                 :     GetDbSize,
    1003                 : }
    1004                 : 
    1005 UBC           0 : #[derive(Debug)]
    1006                 : pub(crate) struct SmgrQueryTimePerTimeline {
    1007                 :     metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
    1008                 : }
    1009                 : 
    1010 CBC         443 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
    1011             443 :     register_histogram_vec!(
    1012             443 :         "pageserver_smgr_query_seconds",
    1013             443 :         "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
    1014             443 :         &["smgr_query_type", "tenant_id", "timeline_id"],
    1015             443 :         CRITICAL_OP_BUCKETS.into(),
    1016             443 :     )
    1017             443 :     .expect("failed to define a metric")
    1018             443 : });
    1019                 : 
    1020             399 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
    1021             399 :     [
    1022             399 :         1,
    1023             399 :         10,
    1024             399 :         20,
    1025             399 :         40,
    1026             399 :         60,
    1027             399 :         80,
    1028             399 :         100,
    1029             399 :         200,
    1030             399 :         300,
    1031             399 :         400,
    1032             399 :         500,
    1033             399 :         600,
    1034             399 :         700,
    1035             399 :         800,
    1036             399 :         900,
    1037             399 :         1_000, // 1ms
    1038             399 :         2_000,
    1039             399 :         4_000,
    1040             399 :         6_000,
    1041             399 :         8_000,
    1042             399 :         10_000, // 10ms
    1043             399 :         20_000,
    1044             399 :         40_000,
    1045             399 :         60_000,
    1046             399 :         80_000,
    1047             399 :         100_000,
    1048             399 :         200_000,
    1049             399 :         400_000,
    1050             399 :         600_000,
    1051             399 :         800_000,
    1052             399 :         1_000_000, // 1s
    1053             399 :         2_000_000,
    1054             399 :         4_000_000,
    1055             399 :         6_000_000,
    1056             399 :         8_000_000,
    1057             399 :         10_000_000, // 10s
    1058             399 :         20_000_000,
    1059             399 :         50_000_000,
    1060             399 :         100_000_000,
    1061             399 :         200_000_000,
    1062             399 :         1_000_000_000, // 1000s
    1063             399 :     ]
    1064             399 :     .into_iter()
    1065             399 :     .map(Duration::from_micros)
    1066           16359 :     .map(|d| d.as_secs_f64())
    1067             399 :     .collect()
    1068             399 : });
    1069                 : 
    1070             399 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
    1071             399 :     register_histogram_vec!(
    1072             399 :         "pageserver_smgr_query_seconds_global",
    1073             399 :         "Time spent on smgr query handling, aggregated by query type.",
    1074             399 :         &["smgr_query_type"],
    1075             399 :         SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
    1076             399 :     )
    1077             399 :     .expect("failed to define a metric")
    1078             399 : });
    1079                 : 
    1080                 : impl SmgrQueryTimePerTimeline {
    1081            6965 :     pub(crate) fn new(tenant_id: &TenantId, timeline_id: &TimelineId) -> Self {
    1082            6965 :         let tenant_id = tenant_id.to_string();
    1083            6965 :         let timeline_id = timeline_id.to_string();
    1084           27860 :         let metrics = std::array::from_fn(|i| {
    1085           27860 :             let op = SmgrQueryType::from_repr(i).unwrap();
    1086           27860 :             let global = SMGR_QUERY_TIME_GLOBAL
    1087           27860 :                 .get_metric_with_label_values(&[op.into()])
    1088           27860 :                 .unwrap();
    1089           27860 :             let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
    1090           27860 :                 .get_metric_with_label_values(&[op.into(), &tenant_id, &timeline_id])
    1091           27860 :                 .unwrap();
    1092           27860 :             GlobalAndPerTimelineHistogram {
    1093           27860 :                 global,
    1094           27860 :                 per_tenant_timeline,
    1095           27860 :             }
    1096           27860 :         });
    1097            6965 :         Self { metrics }
    1098            6965 :     }
    1099         3641540 :     pub(crate) fn start_timer(&self, op: SmgrQueryType) -> impl Drop + '_ {
    1100         3641540 :         let metric = &self.metrics[op as usize];
    1101         3641540 :         GlobalAndPerTimelineHistogramTimer {
    1102         3641540 :             h: metric,
    1103         3641540 :             start: std::time::Instant::now(),
    1104         3641540 :         }
    1105         3641540 :     }
    1106                 : }
    1107                 : 
    1108                 : #[cfg(test)]
    1109                 : mod smgr_query_time_tests {
    1110                 :     use strum::IntoEnumIterator;
    1111                 :     use utils::id::{TenantId, TimelineId};
    1112                 : 
    1113                 :     // Regression test, we used hard-coded string constants before using an enum.
    1114               1 :     #[test]
    1115               1 :     fn op_label_name() {
    1116               1 :         use super::SmgrQueryType::*;
    1117               1 :         let expect: [(super::SmgrQueryType, &'static str); 4] = [
    1118               1 :             (GetRelExists, "get_rel_exists"),
    1119               1 :             (GetRelSize, "get_rel_size"),
    1120               1 :             (GetPageAtLsn, "get_page_at_lsn"),
    1121               1 :             (GetDbSize, "get_db_size"),
    1122               1 :         ];
    1123               5 :         for (op, expect) in expect {
    1124               4 :             let actual: &'static str = op.into();
    1125               4 :             assert_eq!(actual, expect);
    1126                 :         }
    1127               1 :     }
    1128                 : 
    1129               1 :     #[test]
    1130               1 :     fn basic() {
    1131               1 :         let ops: Vec<_> = super::SmgrQueryType::iter().collect();
    1132                 : 
    1133               5 :         for op in &ops {
    1134               4 :             let tenant_id = TenantId::generate();
    1135               4 :             let timeline_id = TimelineId::generate();
    1136               4 :             let metrics = super::SmgrQueryTimePerTimeline::new(&tenant_id, &timeline_id);
    1137               4 : 
    1138               8 :             let get_counts = || {
    1139               8 :                 let global: u64 = ops
    1140               8 :                     .iter()
    1141              32 :                     .map(|op| metrics.metrics[*op as usize].global.get_sample_count())
    1142               8 :                     .sum();
    1143               8 :                 let per_tenant_timeline: u64 = ops
    1144               8 :                     .iter()
    1145              32 :                     .map(|op| {
    1146              32 :                         metrics.metrics[*op as usize]
    1147              32 :                             .per_tenant_timeline
    1148              32 :                             .get_sample_count()
    1149              32 :                     })
    1150               8 :                     .sum();
    1151               8 :                 (global, per_tenant_timeline)
    1152               8 :             };
    1153                 : 
    1154               4 :             let (pre_global, pre_per_tenant_timeline) = get_counts();
    1155               4 :             assert_eq!(pre_per_tenant_timeline, 0);
    1156                 : 
    1157               4 :             let timer = metrics.start_timer(*op);
    1158               4 :             drop(timer);
    1159               4 : 
    1160               4 :             let (post_global, post_per_tenant_timeline) = get_counts();
    1161               4 :             assert_eq!(post_per_tenant_timeline, 1);
    1162               4 :             assert!(post_global > pre_global);
    1163                 :         }
    1164               1 :     }
    1165                 : }
    1166                 : 
    1167                 : // keep in sync with control plane Go code so that we can validate
    1168                 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
    1169             338 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
    1170             338 :     // Go code uses milliseconds. Variable is called `computeStartupBuckets`
    1171             338 :     [
    1172             338 :         5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
    1173             338 :         1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
    1174             338 :     ]
    1175            9464 :     .map(|ms| (ms as f64) / 1000.0)
    1176             338 : });
    1177                 : 
    1178                 : pub(crate) struct BasebackupQueryTime(HistogramVec);
    1179             338 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
    1180             338 :     BasebackupQueryTime({
    1181             338 :         register_histogram_vec!(
    1182             338 :             "pageserver_basebackup_query_seconds",
    1183             338 :             "Histogram of basebackup queries durations, by result type",
    1184             338 :             &["result"],
    1185             338 :             COMPUTE_STARTUP_BUCKETS.to_vec(),
    1186             338 :         )
    1187             338 :         .expect("failed to define a metric")
    1188             338 :     })
    1189             338 : });
    1190                 : 
    1191                 : impl DurationResultObserver for BasebackupQueryTime {
    1192             550 :     fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
    1193             550 :         let label_value = if res.is_ok() { "ok" } else { "error" };
    1194             550 :         let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
    1195             550 :         metric.observe(duration.as_secs_f64());
    1196             550 :     }
    1197                 : }
    1198                 : 
    1199             420 : pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
    1200             420 :     register_int_gauge_vec!(
    1201             420 :         "pageserver_live_connections",
    1202             420 :         "Number of live network connections",
    1203             420 :         &["pageserver_connection_kind"]
    1204             420 :     )
    1205             420 :     .expect("failed to define a metric")
    1206             420 : });
    1207                 : 
    1208                 : // remote storage metrics
    1209                 : 
    1210                 : /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
    1211             565 : static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::new(|| {
    1212             565 :     register_int_gauge_vec!(
    1213             565 :         "pageserver_remote_timeline_client_calls_unfinished",
    1214             565 :         "Number of ongoing calls to remote timeline client. \
    1215             565 :          Used to populate pageserver_remote_timeline_client_calls_started. \
    1216             565 :          This metric is not useful for sampling from Prometheus, but useful in tests.",
    1217             565 :         &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    1218             565 :     )
    1219             565 :     .expect("failed to define a metric")
    1220             565 : });
    1221                 : 
    1222             565 : static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new(|| {
    1223             565 :     register_histogram_vec!(
    1224             565 :         "pageserver_remote_timeline_client_calls_started",
    1225             565 :         "When calling a remote timeline client method, we record the current value \
    1226             565 :          of the calls_unfinished gauge in this histogram. Plot the histogram \
    1227             565 :          over time in a heatmap to visualize how many operations were ongoing \
    1228             565 :          at a given instant. It gives you a better idea of the queue depth \
    1229             565 :          than plotting the gauge directly, since operations may complete faster \
    1230             565 :          than the sampling interval.",
    1231             565 :         &["file_kind", "op_kind"],
    1232             565 :         // The calls_unfinished gauge is an integer gauge, hence we have integer buckets.
    1233             565 :         vec![0.0, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 15.0, 20.0, 40.0, 60.0, 80.0, 100.0, 500.0],
    1234             565 :     )
    1235             565 :     .expect("failed to define a metric")
    1236             565 : });
    1237                 : 
    1238             431 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
    1239             431 :     register_int_counter_vec!(
    1240             431 :         "pageserver_remote_timeline_client_bytes_started",
    1241             431 :         "Incremented by the number of bytes associated with a remote timeline client operation. \
    1242             431 :          The increment happens when the operation is scheduled.",
    1243             431 :         &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    1244             431 :     )
    1245             431 :     .expect("failed to define a metric")
    1246             431 : });
    1247                 : 
    1248             431 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
    1249             431 :     register_int_counter_vec!(
    1250             431 :         "pageserver_remote_timeline_client_bytes_finished",
    1251             431 :         "Incremented by the number of bytes associated with a remote timeline client operation. \
    1252             431 :          The increment happens when the operation finishes (regardless of success/failure/shutdown).",
    1253             431 :         &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    1254             431 :     )
    1255             431 :     .expect("failed to define a metric")
    1256             431 : });
    1257                 : 
    1258                 : pub(crate) struct TenantManagerMetrics {
    1259                 :     pub(crate) tenant_slots: UIntGauge,
    1260                 :     pub(crate) tenant_slot_writes: IntCounter,
    1261                 :     pub(crate) unexpected_errors: IntCounter,
    1262                 : }
    1263                 : 
    1264             558 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
    1265             558 :     TenantManagerMetrics {
    1266             558 :     tenant_slots: register_uint_gauge!(
    1267             558 :         "pageserver_tenant_manager_slots",
    1268             558 :         "How many slots currently exist, including all attached, secondary and in-progress operations",
    1269             558 :     )
    1270             558 :     .expect("failed to define a metric"),
    1271             558 :     tenant_slot_writes: register_int_counter!(
    1272             558 :         "pageserver_tenant_manager_slot_writes",
    1273             558 :         "Writes to a tenant slot, including all of create/attach/detach/delete"
    1274             558 :     )
    1275             558 :     .expect("failed to define a metric"),
    1276             558 :     unexpected_errors: register_int_counter!(
    1277             558 :         "pageserver_tenant_manager_unexpected_errors_total",
    1278             558 :         "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
    1279             558 :     )
    1280             558 :     .expect("failed to define a metric"),
    1281             558 : }
    1282             558 : });
    1283                 : 
    1284                 : pub(crate) struct DeletionQueueMetrics {
    1285                 :     pub(crate) keys_submitted: IntCounter,
    1286                 :     pub(crate) keys_dropped: IntCounter,
    1287                 :     pub(crate) keys_executed: IntCounter,
    1288                 :     pub(crate) keys_validated: IntCounter,
    1289                 :     pub(crate) dropped_lsn_updates: IntCounter,
    1290                 :     pub(crate) unexpected_errors: IntCounter,
    1291                 :     pub(crate) remote_errors: IntCounterVec,
    1292                 : }
    1293             564 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
    1294             564 :     DeletionQueueMetrics{
    1295             564 : 
    1296             564 :     keys_submitted: register_int_counter!(
    1297             564 :         "pageserver_deletion_queue_submitted_total",
    1298             564 :         "Number of objects submitted for deletion"
    1299             564 :     )
    1300             564 :     .expect("failed to define a metric"),
    1301             564 : 
    1302             564 :     keys_dropped: register_int_counter!(
    1303             564 :         "pageserver_deletion_queue_dropped_total",
    1304             564 :         "Number of object deletions dropped due to stale generation."
    1305             564 :     )
    1306             564 :     .expect("failed to define a metric"),
    1307             564 : 
    1308             564 :     keys_executed: register_int_counter!(
    1309             564 :         "pageserver_deletion_queue_executed_total",
    1310             564 :         "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
    1311             564 :     )
    1312             564 :     .expect("failed to define a metric"),
    1313             564 : 
    1314             564 :     keys_validated: register_int_counter!(
    1315             564 :         "pageserver_deletion_queue_validated_total",
    1316             564 :         "Number of keys validated for deletion.  Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
    1317             564 :     )
    1318             564 :     .expect("failed to define a metric"),
    1319             564 : 
    1320             564 :     dropped_lsn_updates: register_int_counter!(
    1321             564 :         "pageserver_deletion_queue_dropped_lsn_updates_total",
    1322             564 :         "Updates to remote_consistent_lsn dropped due to stale generation number."
    1323             564 :     )
    1324             564 :     .expect("failed to define a metric"),
    1325             564 :     unexpected_errors: register_int_counter!(
    1326             564 :         "pageserver_deletion_queue_unexpected_errors_total",
    1327             564 :         "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
    1328             564 :     )
    1329             564 :     .expect("failed to define a metric"),
    1330             564 :     remote_errors: register_int_counter_vec!(
    1331             564 :         "pageserver_deletion_queue_remote_errors_total",
    1332             564 :         "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
    1333             564 :         &["op_kind"],
    1334             564 :     )
    1335             564 :     .expect("failed to define a metric")
    1336             564 : }
    1337             564 : });
    1338                 : 
    1339                 : pub(crate) struct WalIngestMetrics {
    1340                 :     pub(crate) records_received: IntCounter,
    1341                 :     pub(crate) records_committed: IntCounter,
    1342                 :     pub(crate) records_filtered: IntCounter,
    1343                 : }
    1344                 : 
    1345             449 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
    1346             449 :     records_received: register_int_counter!(
    1347             449 :         "pageserver_wal_ingest_records_received",
    1348             449 :         "Number of WAL records received from safekeepers"
    1349             449 :     )
    1350             449 :     .expect("failed to define a metric"),
    1351             449 :     records_committed: register_int_counter!(
    1352             449 :         "pageserver_wal_ingest_records_committed",
    1353             449 :         "Number of WAL records which resulted in writes to pageserver storage"
    1354             449 :     )
    1355             449 :     .expect("failed to define a metric"),
    1356             449 :     records_filtered: register_int_counter!(
    1357             449 :         "pageserver_wal_ingest_records_filtered",
    1358             449 :         "Number of WAL records filtered out due to sharding"
    1359             449 :     )
    1360             449 :     .expect("failed to define a metric"),
    1361             449 : });
    1362                 : pub(crate) struct SecondaryModeMetrics {
    1363                 :     pub(crate) upload_heatmap: IntCounter,
    1364                 :     pub(crate) upload_heatmap_errors: IntCounter,
    1365                 :     pub(crate) upload_heatmap_duration: Histogram,
    1366                 :     pub(crate) download_heatmap: IntCounter,
    1367                 :     pub(crate) download_layer: IntCounter,
    1368                 : }
    1369               5 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
    1370               5 :     upload_heatmap: register_int_counter!(
    1371               5 :         "pageserver_secondary_upload_heatmap",
    1372               5 :         "Number of heatmaps written to remote storage by attached tenants"
    1373               5 :     )
    1374               5 :     .expect("failed to define a metric"),
    1375               5 :     upload_heatmap_errors: register_int_counter!(
    1376               5 :         "pageserver_secondary_upload_heatmap_errors",
    1377               5 :         "Failures writing heatmap to remote storage"
    1378               5 :     )
    1379               5 :     .expect("failed to define a metric"),
    1380               5 :     upload_heatmap_duration: register_histogram!(
    1381               5 :         "pageserver_secondary_upload_heatmap_duration",
    1382               5 :         "Time to build and upload a heatmap, including any waiting inside the S3 client"
    1383               5 :     )
    1384               5 :     .expect("failed to define a metric"),
    1385               5 :     download_heatmap: register_int_counter!(
    1386               5 :         "pageserver_secondary_download_heatmap",
    1387               5 :         "Number of downloads of heatmaps by secondary mode locations"
    1388               5 :     )
    1389               5 :     .expect("failed to define a metric"),
    1390               5 :     download_layer: register_int_counter!(
    1391               5 :         "pageserver_secondary_download_layer",
    1392               5 :         "Number of downloads of layers by secondary mode locations"
    1393               5 :     )
    1394               5 :     .expect("failed to define a metric"),
    1395               5 : });
    1396                 : 
    1397 UBC           0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
    1398                 : pub enum RemoteOpKind {
    1399                 :     Upload,
    1400                 :     Download,
    1401                 :     Delete,
    1402                 : }
    1403                 : impl RemoteOpKind {
    1404 CBC      202971 :     pub fn as_str(&self) -> &'static str {
    1405          202971 :         match self {
    1406          159595 :             Self::Upload => "upload",
    1407           29433 :             Self::Download => "download",
    1408           13943 :             Self::Delete => "delete",
    1409                 :         }
    1410          202971 :     }
    1411                 : }
    1412                 : 
    1413 UBC           0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
    1414                 : pub enum RemoteOpFileKind {
    1415                 :     Layer,
    1416                 :     Index,
    1417                 : }
    1418                 : impl RemoteOpFileKind {
    1419 CBC      202971 :     pub fn as_str(&self) -> &'static str {
    1420          202971 :         match self {
    1421          180414 :             Self::Layer => "layer",
    1422           22557 :             Self::Index => "index",
    1423                 :         }
    1424          202971 :     }
    1425                 : }
    1426                 : 
    1427             565 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
    1428             565 :     register_histogram_vec!(
    1429             565 :         "pageserver_remote_operation_seconds",
    1430             565 :         "Time spent on remote storage operations. \
    1431             565 :         Grouped by tenant, timeline, operation_kind and status. \
    1432             565 :         Does not account for time spent waiting in remote timeline client's queues.",
    1433             565 :         &["file_kind", "op_kind", "status"]
    1434             565 :     )
    1435             565 :     .expect("failed to define a metric")
    1436             565 : });
    1437                 : 
    1438             511 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    1439             511 :     register_int_counter_vec!(
    1440             511 :         "pageserver_tenant_task_events",
    1441             511 :         "Number of task start/stop/fail events.",
    1442             511 :         &["event"],
    1443             511 :     )
    1444             511 :     .expect("Failed to register tenant_task_events metric")
    1445             511 : });
    1446                 : 
    1447             447 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
    1448             894 :     register_int_counter_pair_vec!(
    1449             894 :         "pageserver_background_loop_semaphore_wait_start_count",
    1450             894 :         "Counter for background loop concurrency-limiting semaphore acquire calls started",
    1451             894 :         "pageserver_background_loop_semaphore_wait_finish_count",
    1452             894 :         "Counter for background loop concurrency-limiting semaphore acquire calls finished",
    1453             894 :         &["task"],
    1454             894 :     )
    1455             447 :     .unwrap()
    1456             447 : });
    1457                 : 
    1458             557 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
    1459             557 :     register_int_counter_vec!(
    1460             557 :         "pageserver_background_loop_period_overrun_count",
    1461             557 :         "Incremented whenever warn_when_period_overrun() logs a warning.",
    1462             557 :         &["task", "period"],
    1463             557 :     )
    1464             557 :     .expect("failed to define a metric")
    1465             557 : });
    1466                 : 
    1467                 : // walreceiver metrics
    1468                 : 
    1469             557 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
    1470             557 :     register_int_counter!(
    1471             557 :         "pageserver_walreceiver_started_connections_total",
    1472             557 :         "Number of started walreceiver connections"
    1473             557 :     )
    1474             557 :     .expect("failed to define a metric")
    1475             557 : });
    1476                 : 
    1477             557 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
    1478             557 :     register_int_gauge!(
    1479             557 :         "pageserver_walreceiver_active_managers",
    1480             557 :         "Number of active walreceiver managers"
    1481             557 :     )
    1482             557 :     .expect("failed to define a metric")
    1483             557 : });
    1484                 : 
    1485             397 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
    1486             397 :     register_int_counter_vec!(
    1487             397 :         "pageserver_walreceiver_switches_total",
    1488             397 :         "Number of walreceiver manager change_connection calls",
    1489             397 :         &["reason"]
    1490             397 :     )
    1491             397 :     .expect("failed to define a metric")
    1492             397 : });
    1493                 : 
    1494             557 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
    1495             557 :     register_int_counter!(
    1496             557 :         "pageserver_walreceiver_broker_updates_total",
    1497             557 :         "Number of received broker updates in walreceiver"
    1498             557 :     )
    1499             557 :     .expect("failed to define a metric")
    1500             557 : });
    1501                 : 
    1502             558 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    1503             558 :     register_int_counter_vec!(
    1504             558 :         "pageserver_walreceiver_candidates_events_total",
    1505             558 :         "Number of walreceiver candidate events",
    1506             558 :         &["event"]
    1507             558 :     )
    1508             558 :     .expect("failed to define a metric")
    1509             558 : });
    1510                 : 
    1511                 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
    1512             557 :     Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
    1513                 : 
    1514                 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
    1515             558 :     Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
    1516                 : 
    1517                 : // Metrics collected on WAL redo operations
    1518                 : //
    1519                 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
    1520                 : // for access to the postgres process ('wait') since there is only one for
    1521                 : // each tenant.
    1522                 : 
    1523                 : /// Time buckets are small because we want to be able to measure the
    1524                 : /// smallest redo processing times. These buckets allow us to measure down
    1525                 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
    1526                 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
    1527                 : ///
    1528                 : /// Values up to 1s are recorded because metrics show that we have redo
    1529                 : /// durations and lock times larger than 0.250s.
    1530                 : macro_rules! redo_histogram_time_buckets {
    1531                 :     () => {
    1532                 :         vec![
    1533                 :             0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
    1534                 :             0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
    1535                 :             1.000_000,
    1536                 :         ]
    1537                 :     };
    1538                 : }
    1539                 : 
    1540                 : /// While we're at it, also measure the amount of records replayed in each
    1541                 : /// operation. We have a global 'total replayed' counter, but that's not
    1542                 : /// as useful as 'what is the skew for how many records we replay in one
    1543                 : /// operation'.
    1544                 : macro_rules! redo_histogram_count_buckets {
    1545                 :     () => {
    1546                 :         vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
    1547                 :     };
    1548                 : }
    1549                 : 
    1550                 : macro_rules! redo_bytes_histogram_count_buckets {
    1551                 :     () => {
    1552                 :         // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
    1553                 :         // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
    1554                 :         vec![
    1555                 :             24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
    1556                 :             2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
    1557                 :         ]
    1558                 :     };
    1559                 : }
    1560                 : 
    1561             560 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
    1562             560 :     register_histogram!(
    1563             560 :         "pageserver_wal_redo_seconds",
    1564             560 :         "Time spent on WAL redo",
    1565             560 :         redo_histogram_time_buckets!()
    1566             560 :     )
    1567             560 :     .expect("failed to define a metric")
    1568             560 : });
    1569                 : 
    1570             560 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    1571             560 :     register_histogram!(
    1572             560 :         "pageserver_wal_redo_records_histogram",
    1573             560 :         "Histogram of number of records replayed per redo in the Postgres WAL redo process",
    1574             560 :         redo_histogram_count_buckets!(),
    1575             560 :     )
    1576             560 :     .expect("failed to define a metric")
    1577             560 : });
    1578                 : 
    1579             560 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    1580             560 :     register_histogram!(
    1581             560 :         "pageserver_wal_redo_bytes_histogram",
    1582             560 :         "Histogram of number of records replayed per redo sent to Postgres",
    1583             560 :         redo_bytes_histogram_count_buckets!(),
    1584             560 :     )
    1585             560 :     .expect("failed to define a metric")
    1586             560 : });
    1587                 : 
    1588                 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
    1589             359 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
    1590             359 :     register_int_counter!(
    1591             359 :         "pageserver_replayed_wal_records_total",
    1592             359 :         "Number of WAL records replayed in WAL redo process"
    1593             359 :     )
    1594             359 :     .unwrap()
    1595             359 : });
    1596                 : 
    1597             560 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
    1598             560 :     register_histogram!(
    1599             560 :         "pageserver_wal_redo_process_launch_duration",
    1600             560 :         "Histogram of the duration of successful WalRedoProcess::launch calls",
    1601             560 :         redo_histogram_time_buckets!(),
    1602             560 :     )
    1603             560 :     .expect("failed to define a metric")
    1604             560 : });
    1605                 : 
    1606                 : pub(crate) struct WalRedoProcessCounters {
    1607                 :     pub(crate) started: IntCounter,
    1608                 :     pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
    1609                 :     pub(crate) active_stderr_logger_tasks_started: IntCounter,
    1610                 :     pub(crate) active_stderr_logger_tasks_finished: IntCounter,
    1611                 : }
    1612                 : 
    1613            1346 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
    1614                 : pub(crate) enum WalRedoKillCause {
    1615                 :     WalRedoProcessDrop,
    1616                 :     NoLeakChildDrop,
    1617                 :     Startup,
    1618                 : }
    1619                 : 
    1620                 : impl Default for WalRedoProcessCounters {
    1621             359 :     fn default() -> Self {
    1622             359 :         let started = register_int_counter!(
    1623             359 :             "pageserver_wal_redo_process_started_total",
    1624             359 :             "Number of WAL redo processes started",
    1625             359 :         )
    1626             359 :         .unwrap();
    1627             359 : 
    1628             359 :         let killed = register_int_counter_vec!(
    1629             359 :             "pageserver_wal_redo_process_stopped_total",
    1630             359 :             "Number of WAL redo processes stopped",
    1631             359 :             &["cause"],
    1632             359 :         )
    1633             359 :         .unwrap();
    1634             359 : 
    1635             359 :         let active_stderr_logger_tasks_started = register_int_counter!(
    1636             359 :             "pageserver_walredo_stderr_logger_tasks_started_total",
    1637             359 :             "Number of active walredo stderr logger tasks that have started",
    1638             359 :         )
    1639             359 :         .unwrap();
    1640             359 : 
    1641             359 :         let active_stderr_logger_tasks_finished = register_int_counter!(
    1642             359 :             "pageserver_walredo_stderr_logger_tasks_finished_total",
    1643             359 :             "Number of active walredo stderr logger tasks that have finished",
    1644             359 :         )
    1645             359 :         .unwrap();
    1646             359 : 
    1647             359 :         Self {
    1648             359 :             started,
    1649            1077 :             killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
    1650            1077 :                 let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
    1651            1077 :                 let cause_str: &'static str = cause.into();
    1652            1077 :                 killed.with_label_values(&[cause_str])
    1653            1077 :             })),
    1654             359 :             active_stderr_logger_tasks_started,
    1655             359 :             active_stderr_logger_tasks_finished,
    1656             359 :         }
    1657             359 :     }
    1658                 : }
    1659                 : 
    1660                 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
    1661                 :     Lazy::new(WalRedoProcessCounters::default);
    1662                 : 
    1663                 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
    1664                 : pub(crate) struct StorageTimeMetricsTimer {
    1665                 :     metrics: StorageTimeMetrics,
    1666                 :     start: Instant,
    1667                 : }
    1668                 : 
    1669                 : impl StorageTimeMetricsTimer {
    1670            8881 :     fn new(metrics: StorageTimeMetrics) -> Self {
    1671            8881 :         Self {
    1672            8881 :             metrics,
    1673            8881 :             start: Instant::now(),
    1674            8881 :         }
    1675            8881 :     }
    1676                 : 
    1677                 :     /// Record the time from creation to now.
    1678            8832 :     pub fn stop_and_record(self) {
    1679            8832 :         let duration = self.start.elapsed().as_secs_f64();
    1680            8832 :         self.metrics.timeline_sum.inc_by(duration);
    1681            8832 :         self.metrics.timeline_count.inc();
    1682            8832 :         self.metrics.global_histogram.observe(duration);
    1683            8832 :     }
    1684                 : }
    1685                 : 
    1686                 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
    1687                 : /// timeline total sum and count.
    1688            8881 : #[derive(Clone, Debug)]
    1689                 : pub(crate) struct StorageTimeMetrics {
    1690                 :     /// Sum of f64 seconds, per operation, tenant_id and timeline_id
    1691                 :     timeline_sum: Counter,
    1692                 :     /// Number of oeprations, per operation, tenant_id and timeline_id
    1693                 :     timeline_count: IntCounter,
    1694                 :     /// Global histogram having only the "operation" label.
    1695                 :     global_histogram: Histogram,
    1696                 : }
    1697                 : 
    1698                 : impl StorageTimeMetrics {
    1699            9030 :     pub fn new(operation: StorageTimeOperation, tenant_id: &str, timeline_id: &str) -> Self {
    1700            9030 :         let operation: &'static str = operation.into();
    1701            9030 : 
    1702            9030 :         let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
    1703            9030 :             .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
    1704            9030 :             .unwrap();
    1705            9030 :         let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
    1706            9030 :             .get_metric_with_label_values(&[operation, tenant_id, timeline_id])
    1707            9030 :             .unwrap();
    1708            9030 :         let global_histogram = STORAGE_TIME_GLOBAL
    1709            9030 :             .get_metric_with_label_values(&[operation])
    1710            9030 :             .unwrap();
    1711            9030 : 
    1712            9030 :         StorageTimeMetrics {
    1713            9030 :             timeline_sum,
    1714            9030 :             timeline_count,
    1715            9030 :             global_histogram,
    1716            9030 :         }
    1717            9030 :     }
    1718                 : 
    1719                 :     /// Starts timing a new operation.
    1720                 :     ///
    1721                 :     /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
    1722            8881 :     pub fn start_timer(&self) -> StorageTimeMetricsTimer {
    1723            8881 :         StorageTimeMetricsTimer::new(self.clone())
    1724            8881 :     }
    1725                 : }
    1726                 : 
    1727 UBC           0 : #[derive(Debug)]
    1728                 : pub(crate) struct TimelineMetrics {
    1729                 :     tenant_id: String,
    1730                 :     shard_id: String,
    1731                 :     timeline_id: String,
    1732                 :     pub flush_time_histo: StorageTimeMetrics,
    1733                 :     pub compact_time_histo: StorageTimeMetrics,
    1734                 :     pub create_images_time_histo: StorageTimeMetrics,
    1735                 :     pub logical_size_histo: StorageTimeMetrics,
    1736                 :     pub imitate_logical_size_histo: StorageTimeMetrics,
    1737                 :     pub load_layer_map_histo: StorageTimeMetrics,
    1738                 :     pub garbage_collect_histo: StorageTimeMetrics,
    1739                 :     pub last_record_gauge: IntGauge,
    1740                 :     resident_physical_size_gauge: UIntGauge,
    1741                 :     /// copy of LayeredTimeline.current_logical_size
    1742                 :     pub current_logical_size_gauge: UIntGauge,
    1743                 :     pub num_persistent_files_created: IntCounter,
    1744                 :     pub persistent_bytes_written: IntCounter,
    1745                 :     pub evictions: IntCounter,
    1746                 :     pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
    1747                 : }
    1748                 : 
    1749                 : impl TimelineMetrics {
    1750 CBC        1290 :     pub fn new(
    1751            1290 :         tenant_shard_id: &TenantShardId,
    1752            1290 :         timeline_id: &TimelineId,
    1753            1290 :         evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
    1754            1290 :     ) -> Self {
    1755            1290 :         let tenant_id = tenant_shard_id.tenant_id.to_string();
    1756            1290 :         let shard_id = format!("{}", tenant_shard_id.shard_slug());
    1757            1290 :         let timeline_id = timeline_id.to_string();
    1758            1290 :         let flush_time_histo =
    1759            1290 :             StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
    1760            1290 :         let compact_time_histo =
    1761            1290 :             StorageTimeMetrics::new(StorageTimeOperation::Compact, &tenant_id, &timeline_id);
    1762            1290 :         let create_images_time_histo =
    1763            1290 :             StorageTimeMetrics::new(StorageTimeOperation::CreateImages, &tenant_id, &timeline_id);
    1764            1290 :         let logical_size_histo =
    1765            1290 :             StorageTimeMetrics::new(StorageTimeOperation::LogicalSize, &tenant_id, &timeline_id);
    1766            1290 :         let imitate_logical_size_histo = StorageTimeMetrics::new(
    1767            1290 :             StorageTimeOperation::ImitateLogicalSize,
    1768            1290 :             &tenant_id,
    1769            1290 :             &timeline_id,
    1770            1290 :         );
    1771            1290 :         let load_layer_map_histo =
    1772            1290 :             StorageTimeMetrics::new(StorageTimeOperation::LoadLayerMap, &tenant_id, &timeline_id);
    1773            1290 :         let garbage_collect_histo =
    1774            1290 :             StorageTimeMetrics::new(StorageTimeOperation::Gc, &tenant_id, &timeline_id);
    1775            1290 :         let last_record_gauge = LAST_RECORD_LSN
    1776            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1777            1290 :             .unwrap();
    1778            1290 :         let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
    1779            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1780            1290 :             .unwrap();
    1781            1290 :         let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
    1782            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1783            1290 :             .unwrap();
    1784            1290 :         let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
    1785            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1786            1290 :             .unwrap();
    1787            1290 :         let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
    1788            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1789            1290 :             .unwrap();
    1790            1290 :         let evictions = EVICTIONS
    1791            1290 :             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
    1792            1290 :             .unwrap();
    1793            1290 :         let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
    1794            1290 :             .build(&tenant_id, &shard_id, &timeline_id);
    1795            1290 : 
    1796            1290 :         TimelineMetrics {
    1797            1290 :             tenant_id,
    1798            1290 :             shard_id,
    1799            1290 :             timeline_id,
    1800            1290 :             flush_time_histo,
    1801            1290 :             compact_time_histo,
    1802            1290 :             create_images_time_histo,
    1803            1290 :             logical_size_histo,
    1804            1290 :             imitate_logical_size_histo,
    1805            1290 :             garbage_collect_histo,
    1806            1290 :             load_layer_map_histo,
    1807            1290 :             last_record_gauge,
    1808            1290 :             resident_physical_size_gauge,
    1809            1290 :             current_logical_size_gauge,
    1810            1290 :             num_persistent_files_created,
    1811            1290 :             persistent_bytes_written,
    1812            1290 :             evictions,
    1813            1290 :             evictions_with_low_residence_duration: std::sync::RwLock::new(
    1814            1290 :                 evictions_with_low_residence_duration,
    1815            1290 :             ),
    1816            1290 :         }
    1817            1290 :     }
    1818                 : 
    1819           20419 :     pub(crate) fn record_new_file_metrics(&self, sz: u64) {
    1820           20419 :         self.resident_physical_size_add(sz);
    1821           20419 :         self.num_persistent_files_created.inc_by(1);
    1822           20419 :         self.persistent_bytes_written.inc_by(sz);
    1823           20419 :     }
    1824                 : 
    1825            6841 :     pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
    1826            6841 :         self.resident_physical_size_gauge.sub(sz);
    1827            6841 :         crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
    1828            6841 :     }
    1829                 : 
    1830           43901 :     pub(crate) fn resident_physical_size_add(&self, sz: u64) {
    1831           43901 :         self.resident_physical_size_gauge.add(sz);
    1832           43901 :         crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
    1833           43901 :     }
    1834                 : 
    1835             283 :     pub(crate) fn resident_physical_size_get(&self) -> u64 {
    1836             283 :         self.resident_physical_size_gauge.get()
    1837             283 :     }
    1838                 : }
    1839                 : 
    1840                 : impl Drop for TimelineMetrics {
    1841             265 :     fn drop(&mut self) {
    1842             265 :         let tenant_id = &self.tenant_id;
    1843             265 :         let timeline_id = &self.timeline_id;
    1844             265 :         let shard_id = &self.shard_id;
    1845             265 :         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
    1846             265 :         {
    1847             265 :             RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
    1848             265 :             let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
    1849             265 :         }
    1850             265 :         let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
    1851             265 :         let _ = NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, timeline_id]);
    1852             265 :         let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
    1853             265 :         let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]);
    1854             265 : 
    1855             265 :         self.evictions_with_low_residence_duration
    1856             265 :             .write()
    1857             265 :             .unwrap()
    1858             265 :             .remove(tenant_id, shard_id, timeline_id);
    1859                 : 
    1860                 :         // The following metrics are born outside of the TimelineMetrics lifecycle but still
    1861                 :         // removed at the end of it. The idea is to have the metrics outlive the
    1862                 :         // entity during which they're observed, e.g., the smgr metrics shall
    1863                 :         // outlive an individual smgr connection, but not the timeline.
    1864                 : 
    1865            2385 :         for op in StorageTimeOperation::VARIANTS {
    1866            2120 :             let _ =
    1867            2120 :                 STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
    1868            2120 :             let _ =
    1869            2120 :                 STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[op, tenant_id, timeline_id]);
    1870            2120 :         }
    1871                 : 
    1872             795 :         for op in STORAGE_IO_SIZE_OPERATIONS {
    1873             530 :             let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, timeline_id]);
    1874             530 :         }
    1875                 : 
    1876            1325 :         for op in SmgrQueryType::iter() {
    1877            1060 :             let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
    1878            1060 :                 op.into(),
    1879            1060 :                 tenant_id,
    1880            1060 :                 timeline_id,
    1881            1060 :             ]);
    1882            1060 :         }
    1883             265 :     }
    1884                 : }
    1885                 : 
    1886             199 : pub fn remove_tenant_metrics(tenant_id: &TenantId) {
    1887             199 :     let tid = tenant_id.to_string();
    1888             199 :     let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    1889             199 :     // we leave the BROKEN_TENANTS_SET entry if any
    1890             199 : }
    1891                 : 
    1892                 : use futures::Future;
    1893                 : use pin_project_lite::pin_project;
    1894                 : use std::collections::HashMap;
    1895                 : use std::pin::Pin;
    1896                 : use std::sync::{Arc, Mutex};
    1897                 : use std::task::{Context, Poll};
    1898                 : use std::time::{Duration, Instant};
    1899                 : 
    1900                 : use crate::context::{PageContentKind, RequestContext};
    1901                 : use crate::task_mgr::TaskKind;
    1902                 : 
    1903                 : /// Maintain a per timeline gauge in addition to the global gauge.
    1904                 : struct PerTimelineRemotePhysicalSizeGauge {
    1905                 :     last_set: u64,
    1906                 :     gauge: UIntGauge,
    1907                 : }
    1908                 : 
    1909                 : impl PerTimelineRemotePhysicalSizeGauge {
    1910            1290 :     fn new(per_timeline_gauge: UIntGauge) -> Self {
    1911            1290 :         Self {
    1912            1290 :             last_set: per_timeline_gauge.get(),
    1913            1290 :             gauge: per_timeline_gauge,
    1914            1290 :         }
    1915            1290 :     }
    1916            6403 :     fn set(&mut self, sz: u64) {
    1917            6403 :         self.gauge.set(sz);
    1918            6403 :         if sz < self.last_set {
    1919              68 :             REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
    1920            6335 :         } else {
    1921            6335 :             REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
    1922            6335 :         };
    1923            6403 :         self.last_set = sz;
    1924            6403 :     }
    1925              18 :     fn get(&self) -> u64 {
    1926              18 :         self.gauge.get()
    1927              18 :     }
    1928                 : }
    1929                 : 
    1930                 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
    1931             265 :     fn drop(&mut self) {
    1932             265 :         REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
    1933             265 :     }
    1934                 : }
    1935                 : 
    1936                 : pub(crate) struct RemoteTimelineClientMetrics {
    1937                 :     tenant_id: String,
    1938                 :     timeline_id: String,
    1939                 :     remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
    1940                 :     calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
    1941                 :     bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
    1942                 :     bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
    1943                 : }
    1944                 : 
    1945                 : impl RemoteTimelineClientMetrics {
    1946            1320 :     pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
    1947            1320 :         RemoteTimelineClientMetrics {
    1948            1320 :             tenant_id: tenant_shard_id.tenant_id.to_string(),
    1949            1320 :             timeline_id: timeline_id.to_string(),
    1950            1320 :             calls_unfinished_gauge: Mutex::new(HashMap::default()),
    1951            1320 :             bytes_started_counter: Mutex::new(HashMap::default()),
    1952            1320 :             bytes_finished_counter: Mutex::new(HashMap::default()),
    1953            1320 :             remote_physical_size_gauge: Mutex::new(None),
    1954            1320 :         }
    1955            1320 :     }
    1956                 : 
    1957            6403 :     pub(crate) fn remote_physical_size_set(&self, sz: u64) {
    1958            6403 :         let mut guard = self.remote_physical_size_gauge.lock().unwrap();
    1959            6403 :         let gauge = guard.get_or_insert_with(|| {
    1960            1290 :             PerTimelineRemotePhysicalSizeGauge::new(
    1961            1290 :                 REMOTE_PHYSICAL_SIZE
    1962            1290 :                     .get_metric_with_label_values(&[
    1963            1290 :                         &self.tenant_id.to_string(),
    1964            1290 :                         &self.timeline_id.to_string(),
    1965            1290 :                     ])
    1966            1290 :                     .unwrap(),
    1967            1290 :             )
    1968            6403 :         });
    1969            6403 :         gauge.set(sz);
    1970            6403 :     }
    1971                 : 
    1972              18 :     pub(crate) fn remote_physical_size_get(&self) -> u64 {
    1973              18 :         let guard = self.remote_physical_size_gauge.lock().unwrap();
    1974              18 :         guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
    1975              18 :     }
    1976                 : 
    1977           37045 :     pub fn remote_operation_time(
    1978           37045 :         &self,
    1979           37045 :         file_kind: &RemoteOpFileKind,
    1980           37045 :         op_kind: &RemoteOpKind,
    1981           37045 :         status: &'static str,
    1982           37045 :     ) -> Histogram {
    1983           37045 :         let key = (file_kind.as_str(), op_kind.as_str(), status);
    1984           37045 :         REMOTE_OPERATION_TIME
    1985           37045 :             .get_metric_with_label_values(&[key.0, key.1, key.2])
    1986           37045 :             .unwrap()
    1987           37045 :     }
    1988                 : 
    1989           67407 :     fn calls_unfinished_gauge(
    1990           67407 :         &self,
    1991           67407 :         file_kind: &RemoteOpFileKind,
    1992           67407 :         op_kind: &RemoteOpKind,
    1993           67407 :     ) -> IntGauge {
    1994           67407 :         let mut guard = self.calls_unfinished_gauge.lock().unwrap();
    1995           67407 :         let key = (file_kind.as_str(), op_kind.as_str());
    1996           67407 :         let metric = guard.entry(key).or_insert_with(move || {
    1997            2809 :             REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
    1998            2809 :                 .get_metric_with_label_values(&[
    1999            2809 :                     &self.tenant_id.to_string(),
    2000            2809 :                     &self.timeline_id.to_string(),
    2001            2809 :                     key.0,
    2002            2809 :                     key.1,
    2003            2809 :                 ])
    2004            2809 :                 .unwrap()
    2005           67407 :         });
    2006           67407 :         metric.clone()
    2007           67407 :     }
    2008                 : 
    2009           40143 :     fn calls_started_hist(
    2010           40143 :         &self,
    2011           40143 :         file_kind: &RemoteOpFileKind,
    2012           40143 :         op_kind: &RemoteOpKind,
    2013           40143 :     ) -> Histogram {
    2014           40143 :         let key = (file_kind.as_str(), op_kind.as_str());
    2015           40143 :         REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST
    2016           40143 :             .get_metric_with_label_values(&[key.0, key.1])
    2017           40143 :             .unwrap()
    2018           40143 :     }
    2019                 : 
    2020           20463 :     fn bytes_started_counter(
    2021           20463 :         &self,
    2022           20463 :         file_kind: &RemoteOpFileKind,
    2023           20463 :         op_kind: &RemoteOpKind,
    2024           20463 :     ) -> IntCounter {
    2025           20463 :         let mut guard = self.bytes_started_counter.lock().unwrap();
    2026           20463 :         let key = (file_kind.as_str(), op_kind.as_str());
    2027           20463 :         let metric = guard.entry(key).or_insert_with(move || {
    2028             783 :             REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
    2029             783 :                 .get_metric_with_label_values(&[
    2030             783 :                     &self.tenant_id.to_string(),
    2031             783 :                     &self.timeline_id.to_string(),
    2032             783 :                     key.0,
    2033             783 :                     key.1,
    2034             783 :                 ])
    2035             783 :                 .unwrap()
    2036           20463 :         });
    2037           20463 :         metric.clone()
    2038           20463 :     }
    2039                 : 
    2040           37907 :     fn bytes_finished_counter(
    2041           37907 :         &self,
    2042           37907 :         file_kind: &RemoteOpFileKind,
    2043           37907 :         op_kind: &RemoteOpKind,
    2044           37907 :     ) -> IntCounter {
    2045           37907 :         let mut guard = self.bytes_finished_counter.lock().unwrap();
    2046           37907 :         let key = (file_kind.as_str(), op_kind.as_str());
    2047           37907 :         let metric = guard.entry(key).or_insert_with(move || {
    2048             783 :             REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
    2049             783 :                 .get_metric_with_label_values(&[
    2050             783 :                     &self.tenant_id.to_string(),
    2051             783 :                     &self.timeline_id.to_string(),
    2052             783 :                     key.0,
    2053             783 :                     key.1,
    2054             783 :                 ])
    2055             783 :                 .unwrap()
    2056           37907 :         });
    2057           37907 :         metric.clone()
    2058           37907 :     }
    2059                 : }
    2060                 : 
    2061                 : #[cfg(test)]
    2062                 : impl RemoteTimelineClientMetrics {
    2063               3 :     pub fn get_bytes_started_counter_value(
    2064               3 :         &self,
    2065               3 :         file_kind: &RemoteOpFileKind,
    2066               3 :         op_kind: &RemoteOpKind,
    2067               3 :     ) -> Option<u64> {
    2068               3 :         let guard = self.bytes_started_counter.lock().unwrap();
    2069               3 :         let key = (file_kind.as_str(), op_kind.as_str());
    2070               3 :         guard.get(&key).map(|counter| counter.get())
    2071               3 :     }
    2072                 : 
    2073               3 :     pub fn get_bytes_finished_counter_value(
    2074               3 :         &self,
    2075               3 :         file_kind: &RemoteOpFileKind,
    2076               3 :         op_kind: &RemoteOpKind,
    2077               3 :     ) -> Option<u64> {
    2078               3 :         let guard = self.bytes_finished_counter.lock().unwrap();
    2079               3 :         let key = (file_kind.as_str(), op_kind.as_str());
    2080               3 :         guard.get(&key).map(|counter| counter.get())
    2081               3 :     }
    2082                 : }
    2083                 : 
    2084                 : /// See [`RemoteTimelineClientMetrics::call_begin`].
    2085                 : #[must_use]
    2086                 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
    2087                 :     /// Decremented on drop.
    2088                 :     calls_unfinished_metric: Option<IntGauge>,
    2089                 :     /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
    2090                 :     bytes_finished: Option<(IntCounter, u64)>,
    2091                 : }
    2092                 : 
    2093                 : impl RemoteTimelineClientCallMetricGuard {
    2094                 :     /// Consume this guard object without performing the metric updates it would do on `drop()`.
    2095                 :     /// The caller vouches to do the metric updates manually.
    2096           30331 :     pub fn will_decrement_manually(mut self) {
    2097           30331 :         let RemoteTimelineClientCallMetricGuard {
    2098           30331 :             calls_unfinished_metric,
    2099           30331 :             bytes_finished,
    2100           30331 :         } = &mut self;
    2101           30331 :         calls_unfinished_metric.take();
    2102           30331 :         bytes_finished.take();
    2103           30331 :     }
    2104                 : }
    2105                 : 
    2106                 : impl Drop for RemoteTimelineClientCallMetricGuard {
    2107           40140 :     fn drop(&mut self) {
    2108           40140 :         let RemoteTimelineClientCallMetricGuard {
    2109           40140 :             calls_unfinished_metric,
    2110           40140 :             bytes_finished,
    2111           40140 :         } = self;
    2112           40140 :         if let Some(guard) = calls_unfinished_metric.take() {
    2113            9809 :             guard.dec();
    2114           30331 :         }
    2115           40140 :         if let Some((bytes_finished_metric, value)) = bytes_finished {
    2116 UBC           0 :             bytes_finished_metric.inc_by(*value);
    2117 CBC       40140 :         }
    2118           40140 :     }
    2119                 : }
    2120                 : 
    2121                 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
    2122                 : /// track the byte size of this call in applicable metric(s).
    2123                 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
    2124                 :     /// Do not account for this call's byte size in any metrics.
    2125                 :     /// The `reason` field is there to make the call sites self-documenting
    2126                 :     /// about why they don't need the metric.
    2127                 :     DontTrackSize { reason: &'static str },
    2128                 :     /// Track the byte size of the call in applicable metric(s).
    2129                 :     Bytes(u64),
    2130                 : }
    2131                 : 
    2132                 : impl RemoteTimelineClientMetrics {
    2133                 :     /// Update the metrics that change when a call to the remote timeline client instance starts.
    2134                 :     ///
    2135                 :     /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
    2136                 :     /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
    2137                 :     /// is more suitable.
    2138                 :     /// Never do both.
    2139           40143 :     pub(crate) fn call_begin(
    2140           40143 :         &self,
    2141           40143 :         file_kind: &RemoteOpFileKind,
    2142           40143 :         op_kind: &RemoteOpKind,
    2143           40143 :         size: RemoteTimelineClientMetricsCallTrackSize,
    2144           40143 :     ) -> RemoteTimelineClientCallMetricGuard {
    2145           40143 :         let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
    2146           40143 :         self.calls_started_hist(file_kind, op_kind)
    2147           40143 :             .observe(calls_unfinished_metric.get() as f64);
    2148           40143 :         calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric
    2149                 : 
    2150           40143 :         let bytes_finished = match size {
    2151           19680 :             RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
    2152           19680 :                 // nothing to do
    2153           19680 :                 None
    2154                 :             }
    2155           20463 :             RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
    2156           20463 :                 self.bytes_started_counter(file_kind, op_kind).inc_by(size);
    2157           20463 :                 let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
    2158           20463 :                 Some((finished_counter, size))
    2159                 :             }
    2160                 :         };
    2161           40143 :         RemoteTimelineClientCallMetricGuard {
    2162           40143 :             calls_unfinished_metric: Some(calls_unfinished_metric),
    2163           40143 :             bytes_finished,
    2164           40143 :         }
    2165           40143 :     }
    2166                 : 
    2167                 :     /// Manually udpate the metrics that track completions, instead of using the guard object.
    2168                 :     /// Using the guard object is generally preferable.
    2169                 :     /// See [`call_begin`](Self::call_begin) for more context.
    2170           27264 :     pub(crate) fn call_end(
    2171           27264 :         &self,
    2172           27264 :         file_kind: &RemoteOpFileKind,
    2173           27264 :         op_kind: &RemoteOpKind,
    2174           27264 :         size: RemoteTimelineClientMetricsCallTrackSize,
    2175           27264 :     ) {
    2176           27264 :         let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
    2177                 :         debug_assert!(
    2178           27264 :             calls_unfinished_metric.get() > 0,
    2179 UBC           0 :             "begin and end should cancel out"
    2180                 :         );
    2181 CBC       27264 :         calls_unfinished_metric.dec();
    2182           27264 :         match size {
    2183            9820 :             RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
    2184           17444 :             RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
    2185           17444 :                 self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
    2186           17444 :             }
    2187                 :         }
    2188           27264 :     }
    2189                 : }
    2190                 : 
    2191                 : impl Drop for RemoteTimelineClientMetrics {
    2192             295 :     fn drop(&mut self) {
    2193             295 :         let RemoteTimelineClientMetrics {
    2194             295 :             tenant_id,
    2195             295 :             timeline_id,
    2196             295 :             remote_physical_size_gauge,
    2197             295 :             calls_unfinished_gauge,
    2198             295 :             bytes_started_counter,
    2199             295 :             bytes_finished_counter,
    2200             295 :         } = self;
    2201             725 :         for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
    2202             725 :             let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
    2203             725 :                 tenant_id,
    2204             725 :                 timeline_id,
    2205             725 :                 a,
    2206             725 :                 b,
    2207             725 :             ]);
    2208             725 :         }
    2209             295 :         for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
    2210             188 :             let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
    2211             188 :                 tenant_id,
    2212             188 :                 timeline_id,
    2213             188 :                 a,
    2214             188 :                 b,
    2215             188 :             ]);
    2216             188 :         }
    2217             295 :         for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
    2218             188 :             let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
    2219             188 :                 tenant_id,
    2220             188 :                 timeline_id,
    2221             188 :                 a,
    2222             188 :                 b,
    2223             188 :             ]);
    2224             188 :         }
    2225             295 :         {
    2226             295 :             let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
    2227             295 :             let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
    2228             295 :         }
    2229             295 :     }
    2230                 : }
    2231                 : 
    2232                 : /// Wrapper future that measures the time spent by a remote storage operation,
    2233                 : /// and records the time and success/failure as a prometheus metric.
    2234                 : pub(crate) trait MeasureRemoteOp: Sized {
    2235           37056 :     fn measure_remote_op(
    2236           37056 :         self,
    2237           37056 :         tenant_id: TenantId,
    2238           37056 :         timeline_id: TimelineId,
    2239           37056 :         file_kind: RemoteOpFileKind,
    2240           37056 :         op: RemoteOpKind,
    2241           37056 :         metrics: Arc<RemoteTimelineClientMetrics>,
    2242           37056 :     ) -> MeasuredRemoteOp<Self> {
    2243           37056 :         let start = Instant::now();
    2244           37056 :         MeasuredRemoteOp {
    2245           37056 :             inner: self,
    2246           37056 :             tenant_id,
    2247           37056 :             timeline_id,
    2248           37056 :             file_kind,
    2249           37056 :             op,
    2250           37056 :             start,
    2251           37056 :             metrics,
    2252           37056 :         }
    2253           37056 :     }
    2254                 : }
    2255                 : 
    2256                 : impl<T: Sized> MeasureRemoteOp for T {}
    2257                 : 
    2258                 : pin_project! {
    2259                 :     pub(crate) struct MeasuredRemoteOp<F>
    2260                 :     {
    2261                 :         #[pin]
    2262                 :         inner: F,
    2263                 :         tenant_id: TenantId,
    2264                 :         timeline_id: TimelineId,
    2265                 :         file_kind: RemoteOpFileKind,
    2266                 :         op: RemoteOpKind,
    2267                 :         start: Instant,
    2268                 :         metrics: Arc<RemoteTimelineClientMetrics>,
    2269                 :     }
    2270                 : }
    2271                 : 
    2272                 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
    2273                 :     type Output = Result<O, E>;
    2274                 : 
    2275         1283569 :     fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
    2276         1283569 :         let this = self.project();
    2277         1283569 :         let poll_result = this.inner.poll(cx);
    2278         1283569 :         if let Poll::Ready(ref res) = poll_result {
    2279           37046 :             let duration = this.start.elapsed();
    2280           37046 :             let status = if res.is_ok() { &"success" } else { &"failure" };
    2281           37045 :             this.metrics
    2282           37045 :                 .remote_operation_time(this.file_kind, this.op, status)
    2283           37045 :                 .observe(duration.as_secs_f64());
    2284         1246523 :         }
    2285         1283568 :         poll_result
    2286         1283568 :     }
    2287                 : }
    2288                 : 
    2289             557 : pub fn preinitialize_metrics() {
    2290             557 :     // Python tests need these and on some we do alerting.
    2291             557 :     //
    2292             557 :     // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
    2293             557 :     // order:
    2294             557 :     // - global metrics reside in a Lazy<PageserverMetrics>
    2295             557 :     //   - access via crate::metrics::PS_METRICS.materialized_page_cache_hit.inc()
    2296             557 :     // - could move the statics into TimelineMetrics::new()?
    2297             557 : 
    2298             557 :     // counters
    2299             557 :     [
    2300             557 :         &MATERIALIZED_PAGE_CACHE_HIT,
    2301             557 :         &MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
    2302             557 :         &UNEXPECTED_ONDEMAND_DOWNLOADS,
    2303             557 :         &WALRECEIVER_STARTED_CONNECTIONS,
    2304             557 :         &WALRECEIVER_BROKER_UPDATES,
    2305             557 :         &WALRECEIVER_CANDIDATES_ADDED,
    2306             557 :         &WALRECEIVER_CANDIDATES_REMOVED,
    2307             557 :     ]
    2308             557 :     .into_iter()
    2309            3899 :     .for_each(|c| {
    2310            3899 :         Lazy::force(c);
    2311            3899 :     });
    2312             557 : 
    2313             557 :     // Deletion queue stats
    2314             557 :     Lazy::force(&DELETION_QUEUE);
    2315             557 : 
    2316             557 :     // Tenant stats
    2317             557 :     Lazy::force(&TENANT);
    2318             557 : 
    2319             557 :     // Tenant manager stats
    2320             557 :     Lazy::force(&TENANT_MANAGER);
    2321             557 : 
    2322             557 :     Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
    2323             557 : 
    2324             557 :     // countervecs
    2325             557 :     [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
    2326             557 :         .into_iter()
    2327             557 :         .for_each(|c| {
    2328             557 :             Lazy::force(c);
    2329             557 :         });
    2330             557 : 
    2331             557 :     // gauges
    2332             557 :     WALRECEIVER_ACTIVE_MANAGERS.get();
    2333             557 : 
    2334             557 :     // histograms
    2335             557 :     [
    2336             557 :         &READ_NUM_FS_LAYERS,
    2337             557 :         &WAIT_LSN_TIME,
    2338             557 :         &WAL_REDO_TIME,
    2339             557 :         &WAL_REDO_RECORDS_HISTOGRAM,
    2340             557 :         &WAL_REDO_BYTES_HISTOGRAM,
    2341             557 :         &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
    2342             557 :     ]
    2343             557 :     .into_iter()
    2344            3342 :     .for_each(|h| {
    2345            3342 :         Lazy::force(h);
    2346            3342 :     });
    2347             557 : 
    2348             557 :     // Custom
    2349             557 :     Lazy::force(&RECONSTRUCT_TIME);
    2350             557 : }
        

Generated by: LCOV version 2.1-beta