Line data Source code
1 : use std::collections::HashMap;
2 : use std::num::NonZeroUsize;
3 : use std::os::fd::RawFd;
4 : use std::pin::Pin;
5 : use std::sync::atomic::AtomicU64;
6 : use std::sync::{Arc, Mutex};
7 : use std::task::{Context, Poll};
8 : use std::time::{Duration, Instant};
9 :
10 : use enum_map::{Enum as _, EnumMap};
11 : use futures::Future;
12 : use metrics::{
13 : Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
14 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
15 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
16 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
17 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
18 : };
19 : use once_cell::sync::Lazy;
20 : use pageserver_api::config::{
21 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
22 : PageServiceProtocolPipelinedExecutionStrategy,
23 : };
24 : use pageserver_api::models::InMemoryLayerInfo;
25 : use pageserver_api::shard::TenantShardId;
26 : use pin_project_lite::pin_project;
27 : use postgres_backend::{QueryError, is_expected_io_error};
28 : use pq_proto::framed::ConnectionError;
29 : use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
30 : use strum_macros::{IntoStaticStr, VariantNames};
31 : use utils::id::TimelineId;
32 :
33 : use crate::config::PageServerConf;
34 : use crate::context::{PageContentKind, RequestContext};
35 : use crate::pgdatadir_mapping::DatadirModificationStats;
36 : use crate::task_mgr::TaskKind;
37 : use crate::tenant::Timeline;
38 : use crate::tenant::layer_map::LayerMap;
39 : use crate::tenant::mgr::TenantSlot;
40 : use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
41 : use crate::tenant::tasks::BackgroundLoopKind;
42 : use crate::tenant::throttle::ThrottleResult;
43 :
44 : /// Prometheus histogram buckets (in seconds) for operations in the critical
45 : /// path. In other words, operations that directly affect that latency of user
46 : /// queries.
47 : ///
48 : /// The buckets capture the majority of latencies in the microsecond and
49 : /// millisecond range but also extend far enough up to distinguish "bad" from
50 : /// "really bad".
51 : const CRITICAL_OP_BUCKETS: &[f64] = &[
52 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
53 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
54 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
55 : ];
56 :
57 : // Metrics collected on operations on the storage repository.
58 : #[derive(Debug, VariantNames, IntoStaticStr)]
59 : #[strum(serialize_all = "kebab_case")]
60 : pub(crate) enum StorageTimeOperation {
61 : #[strum(serialize = "layer flush")]
62 : LayerFlush,
63 :
64 : #[strum(serialize = "layer flush delay")]
65 : LayerFlushDelay,
66 :
67 : #[strum(serialize = "compact")]
68 : Compact,
69 :
70 : #[strum(serialize = "create images")]
71 : CreateImages,
72 :
73 : #[strum(serialize = "logical size")]
74 : LogicalSize,
75 :
76 : #[strum(serialize = "imitate logical size")]
77 : ImitateLogicalSize,
78 :
79 : #[strum(serialize = "load layer map")]
80 : LoadLayerMap,
81 :
82 : #[strum(serialize = "gc")]
83 : Gc,
84 :
85 : #[strum(serialize = "find gc cutoffs")]
86 : FindGcCutoffs,
87 : }
88 :
89 412 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
90 412 : register_counter_vec!(
91 412 : "pageserver_storage_operations_seconds_sum",
92 412 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
93 412 : &["operation", "tenant_id", "shard_id", "timeline_id"],
94 412 : )
95 412 : .expect("failed to define a metric")
96 412 : });
97 :
98 412 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
99 412 : register_int_counter_vec!(
100 412 : "pageserver_storage_operations_seconds_count",
101 412 : "Count of storage operations with operation, tenant and timeline dimensions",
102 412 : &["operation", "tenant_id", "shard_id", "timeline_id"],
103 412 : )
104 412 : .expect("failed to define a metric")
105 412 : });
106 :
107 : // Buckets for background operation duration in seconds, like compaction, GC, size calculation.
108 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
109 :
110 412 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
111 412 : register_histogram_vec!(
112 412 : "pageserver_storage_operations_seconds_global",
113 412 : "Time spent on storage operations",
114 412 : &["operation"],
115 412 : STORAGE_OP_BUCKETS.into(),
116 412 : )
117 412 : .expect("failed to define a metric")
118 412 : });
119 :
120 : /// Measures layers visited per read (i.e. read amplification).
121 : ///
122 : /// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits
123 : /// are amortized across the batch, and some layers may not intersect with a given key, each visited
124 : /// layer contributes directly to the observed latency for every read in the batch, which is what we
125 : /// care about.
126 412 : pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
127 412 : register_histogram_vec!(
128 412 : "pageserver_layers_per_read",
129 412 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
130 412 : &["tenant_id", "shard_id", "timeline_id"],
131 412 : // Low resolution to reduce cardinality.
132 412 : vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
133 412 : )
134 412 : .expect("failed to define a metric")
135 412 : });
136 :
137 404 : pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
138 404 : register_histogram!(
139 404 : "pageserver_layers_per_read_global",
140 404 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
141 404 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
142 404 : )
143 404 : .expect("failed to define a metric")
144 404 : });
145 :
146 404 : pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
147 404 : register_histogram!(
148 404 : "pageserver_layers_per_read_batch_global",
149 404 : "Layers visited to serve a single read batch (read amplification), regardless of number of reads.",
150 404 : vec![
151 404 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
152 404 : ],
153 404 : )
154 404 : .expect("failed to define a metric")
155 404 : });
156 :
157 404 : pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
158 404 : register_histogram!(
159 404 : "pageserver_layers_per_read_amortized_global",
160 404 : "Layers visited to serve a single read (read amplification). Amortized across a batch: \
161 404 : all visited layers are divided by number of reads.",
162 404 : vec![
163 404 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
164 404 : ],
165 404 : )
166 404 : .expect("failed to define a metric")
167 404 : });
168 :
169 404 : pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
170 404 : // We expect this to be low because of Postgres checkpoints. Let's see if that holds.
171 404 : register_histogram!(
172 404 : "pageserver_deltas_per_read_global",
173 404 : "Number of delta pages applied to image page per read",
174 404 : vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
175 404 : )
176 404 : .expect("failed to define a metric")
177 404 : });
178 :
179 0 : pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
180 0 : register_uint_gauge!(
181 0 : "pageserver_concurrent_initdb",
182 0 : "Number of initdb processes running"
183 0 : )
184 0 : .expect("failed to define a metric")
185 0 : });
186 :
187 0 : pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
188 0 : register_histogram!(
189 0 : "pageserver_initdb_semaphore_seconds_global",
190 0 : "Time spent getting a permit from the global initdb semaphore",
191 0 : STORAGE_OP_BUCKETS.into()
192 0 : )
193 0 : .expect("failed to define metric")
194 0 : });
195 :
196 0 : pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
197 0 : register_histogram!(
198 0 : "pageserver_initdb_seconds_global",
199 0 : "Time spent performing initdb",
200 0 : STORAGE_OP_BUCKETS.into()
201 0 : )
202 0 : .expect("failed to define metric")
203 0 : });
204 :
205 : pub(crate) struct GetVectoredLatency {
206 : map: EnumMap<TaskKind, Option<Histogram>>,
207 : }
208 :
209 : #[allow(dead_code)]
210 : pub(crate) struct ScanLatency {
211 : map: EnumMap<TaskKind, Option<Histogram>>,
212 : }
213 :
214 : impl GetVectoredLatency {
215 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
216 : // cardinality of the metric.
217 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
218 :
219 39432 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
220 39432 : self.map[task_kind].as_ref()
221 39432 : }
222 : }
223 :
224 : impl ScanLatency {
225 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
226 : // cardinality of the metric.
227 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
228 :
229 24 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
230 24 : self.map[task_kind].as_ref()
231 24 : }
232 : }
233 :
234 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
235 : parent: &'a Histogram,
236 : start: std::time::Instant,
237 : }
238 :
239 : impl<'a> ScanLatencyOngoingRecording<'a> {
240 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
241 0 : let start = Instant::now();
242 0 : ScanLatencyOngoingRecording { parent, start }
243 0 : }
244 :
245 0 : pub(crate) fn observe(self) {
246 0 : let elapsed = self.start.elapsed();
247 0 : self.parent.observe(elapsed.as_secs_f64());
248 0 : }
249 : }
250 :
251 396 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
252 396 : let inner = register_histogram_vec!(
253 396 : "pageserver_get_vectored_seconds",
254 396 : "Time spent in get_vectored.",
255 396 : &["task_kind"],
256 396 : CRITICAL_OP_BUCKETS.into(),
257 396 : )
258 396 : .expect("failed to define a metric");
259 396 :
260 396 : GetVectoredLatency {
261 12276 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
262 12276 : let task_kind = TaskKind::from_usize(task_kind_idx);
263 12276 :
264 12276 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
265 792 : let task_kind = task_kind.into();
266 792 : Some(inner.with_label_values(&[task_kind]))
267 : } else {
268 11484 : None
269 : }
270 12276 : })),
271 396 : }
272 396 : });
273 :
274 8 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
275 8 : let inner = register_histogram_vec!(
276 8 : "pageserver_scan_seconds",
277 8 : "Time spent in scan.",
278 8 : &["task_kind"],
279 8 : CRITICAL_OP_BUCKETS.into(),
280 8 : )
281 8 : .expect("failed to define a metric");
282 8 :
283 8 : ScanLatency {
284 248 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
285 248 : let task_kind = TaskKind::from_usize(task_kind_idx);
286 248 :
287 248 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
288 8 : let task_kind = task_kind.into();
289 8 : Some(inner.with_label_values(&[task_kind]))
290 : } else {
291 240 : None
292 : }
293 248 : })),
294 8 : }
295 8 : });
296 :
297 : pub(crate) struct PageCacheMetricsForTaskKind {
298 : pub read_accesses_immutable: IntCounter,
299 : pub read_hits_immutable: IntCounter,
300 : }
301 :
302 : pub(crate) struct PageCacheMetrics {
303 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
304 : }
305 :
306 188 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
307 188 : register_int_counter_vec!(
308 188 : "pageserver_page_cache_read_hits_total",
309 188 : "Number of read accesses to the page cache that hit",
310 188 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
311 188 : )
312 188 : .expect("failed to define a metric")
313 188 : });
314 :
315 188 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
316 188 : register_int_counter_vec!(
317 188 : "pageserver_page_cache_read_accesses_total",
318 188 : "Number of read accesses to the page cache",
319 188 : &["task_kind", "key_kind", "content_kind"]
320 188 : )
321 188 : .expect("failed to define a metric")
322 188 : });
323 :
324 188 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
325 5828 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
326 5828 : let task_kind = TaskKind::from_usize(task_kind);
327 5828 : let task_kind: &'static str = task_kind.into();
328 46624 : EnumMap::from_array(std::array::from_fn(|content_kind| {
329 46624 : let content_kind = PageContentKind::from_usize(content_kind);
330 46624 : let content_kind: &'static str = content_kind.into();
331 46624 : PageCacheMetricsForTaskKind {
332 46624 : read_accesses_immutable: {
333 46624 : PAGE_CACHE_READ_ACCESSES
334 46624 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
335 46624 : .unwrap()
336 46624 : },
337 46624 :
338 46624 : read_hits_immutable: {
339 46624 : PAGE_CACHE_READ_HITS
340 46624 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
341 46624 : .unwrap()
342 46624 : },
343 46624 : }
344 46624 : }))
345 5828 : })),
346 188 : });
347 :
348 : impl PageCacheMetrics {
349 1944556 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
350 1944556 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
351 1944556 : }
352 : }
353 :
354 : pub(crate) struct PageCacheSizeMetrics {
355 : pub max_bytes: UIntGauge,
356 :
357 : pub current_bytes_immutable: UIntGauge,
358 : }
359 :
360 188 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
361 188 : register_uint_gauge_vec!(
362 188 : "pageserver_page_cache_size_current_bytes",
363 188 : "Current size of the page cache in bytes, by key kind",
364 188 : &["key_kind"]
365 188 : )
366 188 : .expect("failed to define a metric")
367 188 : });
368 :
369 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
370 188 : Lazy::new(|| PageCacheSizeMetrics {
371 188 : max_bytes: {
372 188 : register_uint_gauge!(
373 188 : "pageserver_page_cache_size_max_bytes",
374 188 : "Maximum size of the page cache in bytes"
375 188 : )
376 188 : .expect("failed to define a metric")
377 188 : },
378 188 : current_bytes_immutable: {
379 188 : PAGE_CACHE_SIZE_CURRENT_BYTES
380 188 : .get_metric_with_label_values(&["immutable"])
381 188 : .unwrap()
382 188 : },
383 188 : });
384 :
385 : pub(crate) mod page_cache_eviction_metrics {
386 : use std::num::NonZeroUsize;
387 :
388 : use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
389 : use once_cell::sync::Lazy;
390 :
391 : #[derive(Clone, Copy)]
392 : pub(crate) enum Outcome {
393 : FoundSlotUnused { iters: NonZeroUsize },
394 : FoundSlotEvicted { iters: NonZeroUsize },
395 : ItersExceeded { iters: NonZeroUsize },
396 : }
397 :
398 188 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
399 188 : register_int_counter_vec!(
400 188 : "pageserver_page_cache_find_victim_iters_total",
401 188 : "Counter for the number of iterations in the find_victim loop",
402 188 : &["outcome"],
403 188 : )
404 188 : .expect("failed to define a metric")
405 188 : });
406 :
407 188 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
408 188 : register_int_counter_vec!(
409 188 : "pageserver_page_cache_find_victim_calls",
410 188 : "Incremented at the end of each find_victim() call.\
411 188 : Filter by outcome to get e.g., eviction rate.",
412 188 : &["outcome"]
413 188 : )
414 188 : .unwrap()
415 188 : });
416 :
417 63398 : pub(crate) fn observe(outcome: Outcome) {
418 : macro_rules! dry {
419 : ($label:literal, $iters:expr) => {{
420 : static LABEL: &'static str = $label;
421 : static ITERS_TOTAL: Lazy<IntCounter> =
422 228 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
423 : static CALLS: Lazy<IntCounter> =
424 228 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
425 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
426 : CALLS.inc();
427 : }};
428 : }
429 63398 : match outcome {
430 3288 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
431 60110 : Outcome::FoundSlotEvicted { iters } => {
432 60110 : dry!("found_evicted", iters)
433 : }
434 0 : Outcome::ItersExceeded { iters } => {
435 0 : dry!("err_iters_exceeded", iters);
436 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
437 0 : }
438 : }
439 63398 : }
440 : }
441 :
442 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
443 0 : register_int_counter_vec!(
444 0 : "page_cache_errors_total",
445 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
446 0 : &["error_kind"]
447 0 : )
448 0 : .expect("failed to define a metric")
449 0 : });
450 :
451 : #[derive(IntoStaticStr)]
452 : #[strum(serialize_all = "kebab_case")]
453 : pub(crate) enum PageCacheErrorKind {
454 : AcquirePinnedSlotTimeout,
455 : EvictIterLimit,
456 : }
457 :
458 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
459 0 : PAGE_CACHE_ERRORS
460 0 : .get_metric_with_label_values(&[error_kind.into()])
461 0 : .unwrap()
462 0 : .inc();
463 0 : }
464 :
465 40 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
466 40 : register_histogram!(
467 40 : "pageserver_wait_lsn_seconds",
468 40 : "Time spent waiting for WAL to arrive",
469 40 : CRITICAL_OP_BUCKETS.into(),
470 40 : )
471 40 : .expect("failed to define a metric")
472 40 : });
473 :
474 412 : static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
475 412 : register_gauge_vec!(
476 412 : "pageserver_flush_wait_upload_seconds",
477 412 : "Time spent waiting for preceding uploads during layer flush",
478 412 : &["tenant_id", "shard_id", "timeline_id"]
479 412 : )
480 412 : .expect("failed to define a metric")
481 412 : });
482 :
483 412 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
484 412 : register_int_gauge_vec!(
485 412 : "pageserver_last_record_lsn",
486 412 : "Last record LSN grouped by timeline",
487 412 : &["tenant_id", "shard_id", "timeline_id"]
488 412 : )
489 412 : .expect("failed to define a metric")
490 412 : });
491 :
492 412 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
493 412 : register_int_gauge_vec!(
494 412 : "pageserver_disk_consistent_lsn",
495 412 : "Disk consistent LSN grouped by timeline",
496 412 : &["tenant_id", "shard_id", "timeline_id"]
497 412 : )
498 412 : .expect("failed to define a metric")
499 412 : });
500 :
501 412 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
502 412 : register_uint_gauge_vec!(
503 412 : "pageserver_projected_remote_consistent_lsn",
504 412 : "Projected remote consistent LSN grouped by timeline",
505 412 : &["tenant_id", "shard_id", "timeline_id"]
506 412 : )
507 412 : .expect("failed to define a metric")
508 412 : });
509 :
510 412 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
511 412 : register_uint_gauge_vec!(
512 412 : "pageserver_pitr_history_size",
513 412 : "Data written since PITR cutoff on this timeline",
514 412 : &["tenant_id", "shard_id", "timeline_id"]
515 412 : )
516 412 : .expect("failed to define a metric")
517 412 : });
518 :
519 : #[derive(
520 240 : strum_macros::EnumIter,
521 0 : strum_macros::EnumString,
522 : strum_macros::Display,
523 : strum_macros::IntoStaticStr,
524 : )]
525 : #[strum(serialize_all = "kebab_case")]
526 : pub(crate) enum LayerKind {
527 : Delta,
528 : Image,
529 : }
530 :
531 : #[derive(
532 100 : strum_macros::EnumIter,
533 0 : strum_macros::EnumString,
534 : strum_macros::Display,
535 : strum_macros::IntoStaticStr,
536 : )]
537 : #[strum(serialize_all = "kebab_case")]
538 : pub(crate) enum LayerLevel {
539 : // We don't track the currently open ephemeral layer, since there's always exactly 1 and its
540 : // size changes. See `TIMELINE_EPHEMERAL_BYTES`.
541 : Frozen,
542 : L0,
543 : L1,
544 : }
545 :
546 404 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
547 404 : register_uint_gauge_vec!(
548 404 : "pageserver_layer_bytes",
549 404 : "Sum of frozen, L0, and L1 layer physical sizes in bytes (excluding the open ephemeral layer)",
550 404 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
551 404 : )
552 404 : .expect("failed to define a metric")
553 404 : });
554 :
555 404 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
556 404 : register_uint_gauge_vec!(
557 404 : "pageserver_layer_count",
558 404 : "Number of frozen, L0, and L1 layers (excluding the open ephemeral layer)",
559 404 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
560 404 : )
561 404 : .expect("failed to define a metric")
562 404 : });
563 :
564 412 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
565 412 : register_uint_gauge_vec!(
566 412 : "pageserver_archive_size",
567 412 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
568 412 : &["tenant_id", "shard_id", "timeline_id"]
569 412 : )
570 412 : .expect("failed to define a metric")
571 412 : });
572 :
573 412 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
574 412 : register_int_gauge_vec!(
575 412 : "pageserver_standby_horizon",
576 412 : "Standby apply LSN for which GC is hold off, by timeline.",
577 412 : &["tenant_id", "shard_id", "timeline_id"]
578 412 : )
579 412 : .expect("failed to define a metric")
580 412 : });
581 :
582 412 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
583 412 : register_uint_gauge_vec!(
584 412 : "pageserver_resident_physical_size",
585 412 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
586 412 : &["tenant_id", "shard_id", "timeline_id"]
587 412 : )
588 412 : .expect("failed to define a metric")
589 412 : });
590 :
591 412 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
592 412 : register_uint_gauge_vec!(
593 412 : "pageserver_visible_physical_size",
594 412 : "The size of the layer files present in the pageserver's filesystem.",
595 412 : &["tenant_id", "shard_id", "timeline_id"]
596 412 : )
597 412 : .expect("failed to define a metric")
598 412 : });
599 :
600 404 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
601 404 : register_uint_gauge!(
602 404 : "pageserver_resident_physical_size_global",
603 404 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
604 404 : )
605 404 : .expect("failed to define a metric")
606 404 : });
607 :
608 412 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
609 412 : register_uint_gauge_vec!(
610 412 : "pageserver_remote_physical_size",
611 412 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
612 412 : // Corollary: If any files are missing from the index part, they won't be included here.
613 412 : &["tenant_id", "shard_id", "timeline_id"]
614 412 : )
615 412 : .expect("failed to define a metric")
616 412 : });
617 :
618 412 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
619 412 : register_uint_gauge!(
620 412 : "pageserver_remote_physical_size_global",
621 412 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
622 412 : )
623 412 : .expect("failed to define a metric")
624 412 : });
625 :
626 12 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
627 12 : register_int_counter!(
628 12 : "pageserver_remote_ondemand_downloaded_layers_total",
629 12 : "Total on-demand downloaded layers"
630 12 : )
631 12 : .unwrap()
632 12 : });
633 :
634 12 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
635 12 : register_int_counter!(
636 12 : "pageserver_remote_ondemand_downloaded_bytes_total",
637 12 : "Total bytes of layers on-demand downloaded",
638 12 : )
639 12 : .unwrap()
640 12 : });
641 :
642 412 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
643 412 : register_uint_gauge_vec!(
644 412 : "pageserver_current_logical_size",
645 412 : "Current logical size grouped by timeline",
646 412 : &["tenant_id", "shard_id", "timeline_id"]
647 412 : )
648 412 : .expect("failed to define current logical size metric")
649 412 : });
650 :
651 412 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
652 412 : register_int_gauge_vec!(
653 412 : "pageserver_aux_file_estimated_size",
654 412 : "The size of all aux files for a timeline in aux file v2 store.",
655 412 : &["tenant_id", "shard_id", "timeline_id"]
656 412 : )
657 412 : .expect("failed to define a metric")
658 412 : });
659 :
660 412 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
661 412 : register_uint_gauge_vec!(
662 412 : "pageserver_valid_lsn_lease_count",
663 412 : "The number of valid leases after refreshing gc info.",
664 412 : &["tenant_id", "shard_id", "timeline_id"],
665 412 : )
666 412 : .expect("failed to define a metric")
667 412 : });
668 :
669 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
670 0 : register_int_counter!(
671 0 : "pageserver_circuit_breaker_broken",
672 0 : "How many times a circuit breaker has broken"
673 0 : )
674 0 : .expect("failed to define a metric")
675 0 : });
676 :
677 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
678 0 : register_int_counter!(
679 0 : "pageserver_circuit_breaker_unbroken",
680 0 : "How many times a circuit breaker has been un-broken (recovered)"
681 0 : )
682 0 : .expect("failed to define a metric")
683 0 : });
684 :
685 396 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
686 396 : register_int_counter!(
687 396 : "pageserver_compression_image_in_bytes_total",
688 396 : "Size of data written into image layers before compression"
689 396 : )
690 396 : .expect("failed to define a metric")
691 396 : });
692 :
693 396 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
694 396 : register_int_counter!(
695 396 : "pageserver_compression_image_in_bytes_considered",
696 396 : "Size of potentially compressible data written into image layers before compression"
697 396 : )
698 396 : .expect("failed to define a metric")
699 396 : });
700 :
701 396 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
702 396 : register_int_counter!(
703 396 : "pageserver_compression_image_in_bytes_chosen",
704 396 : "Size of data whose compressed form was written into image layers"
705 396 : )
706 396 : .expect("failed to define a metric")
707 396 : });
708 :
709 396 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
710 396 : register_int_counter!(
711 396 : "pageserver_compression_image_out_bytes_total",
712 396 : "Size of compressed image layer written"
713 396 : )
714 396 : .expect("failed to define a metric")
715 396 : });
716 :
717 20 : pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
718 20 : register_uint_gauge!(
719 20 : "pageserver_relsize_cache_entries",
720 20 : "Number of entries in the relation size cache",
721 20 : )
722 20 : .expect("failed to define a metric")
723 20 : });
724 :
725 20 : pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
726 20 : register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
727 20 : .expect("failed to define a metric")
728 20 : });
729 :
730 20 : pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
731 20 : register_int_counter!(
732 20 : "pageserver_relsize_cache_misses",
733 20 : "Relation size cache misses",
734 20 : )
735 20 : .expect("failed to define a metric")
736 20 : });
737 :
738 8 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
739 8 : register_int_counter!(
740 8 : "pageserver_relsize_cache_misses_old",
741 8 : "Relation size cache misses where the lookup LSN is older than the last relation update"
742 8 : )
743 8 : .expect("failed to define a metric")
744 8 : });
745 :
746 : pub(crate) mod initial_logical_size {
747 : use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
748 : use once_cell::sync::Lazy;
749 :
750 : pub(crate) struct StartCalculation(IntCounterVec);
751 412 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
752 412 : StartCalculation(
753 412 : register_int_counter_vec!(
754 412 : "pageserver_initial_logical_size_start_calculation",
755 412 : "Incremented each time we start an initial logical size calculation attempt. \
756 412 : The `circumstances` label provides some additional details.",
757 412 : &["attempt", "circumstances"]
758 412 : )
759 412 : .unwrap(),
760 412 : )
761 412 : });
762 :
763 : struct DropCalculation {
764 : first: IntCounter,
765 : retry: IntCounter,
766 : }
767 :
768 412 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
769 412 : let vec = register_int_counter_vec!(
770 412 : "pageserver_initial_logical_size_drop_calculation",
771 412 : "Incremented each time we abort a started size calculation attmpt.",
772 412 : &["attempt"]
773 412 : )
774 412 : .unwrap();
775 412 : DropCalculation {
776 412 : first: vec.with_label_values(&["first"]),
777 412 : retry: vec.with_label_values(&["retry"]),
778 412 : }
779 412 : });
780 :
781 : pub(crate) struct Calculated {
782 : pub(crate) births: IntCounter,
783 : pub(crate) deaths: IntCounter,
784 : }
785 :
786 412 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
787 412 : births: register_int_counter!(
788 412 : "pageserver_initial_logical_size_finish_calculation",
789 412 : "Incremented every time we finish calculation of initial logical size.\
790 412 : If everything is working well, this should happen at most once per Timeline object."
791 412 : )
792 412 : .unwrap(),
793 412 : deaths: register_int_counter!(
794 412 : "pageserver_initial_logical_size_drop_finished_calculation",
795 412 : "Incremented when we drop a finished initial logical size calculation result.\
796 412 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
797 412 : )
798 412 : .unwrap(),
799 412 : });
800 :
801 : pub(crate) struct OngoingCalculationGuard {
802 : inc_drop_calculation: Option<IntCounter>,
803 : }
804 :
805 : #[derive(strum_macros::IntoStaticStr)]
806 : pub(crate) enum StartCircumstances {
807 : EmptyInitial,
808 : SkippedConcurrencyLimiter,
809 : AfterBackgroundTasksRateLimit,
810 : }
811 :
812 : impl StartCalculation {
813 436 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
814 436 : let circumstances_label: &'static str = circumstances.into();
815 436 : self.0
816 436 : .with_label_values(&["first", circumstances_label])
817 436 : .inc();
818 436 : OngoingCalculationGuard {
819 436 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
820 436 : }
821 436 : }
822 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
823 0 : let circumstances_label: &'static str = circumstances.into();
824 0 : self.0
825 0 : .with_label_values(&["retry", circumstances_label])
826 0 : .inc();
827 0 : OngoingCalculationGuard {
828 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
829 0 : }
830 0 : }
831 : }
832 :
833 : impl Drop for OngoingCalculationGuard {
834 436 : fn drop(&mut self) {
835 436 : if let Some(counter) = self.inc_drop_calculation.take() {
836 0 : counter.inc();
837 436 : }
838 436 : }
839 : }
840 :
841 : impl OngoingCalculationGuard {
842 436 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
843 436 : drop(self.inc_drop_calculation.take());
844 436 : CALCULATED.births.inc();
845 436 : FinishedCalculationGuard {
846 436 : inc_on_drop: CALCULATED.deaths.clone(),
847 436 : }
848 436 : }
849 : }
850 :
851 : pub(crate) struct FinishedCalculationGuard {
852 : inc_on_drop: IntCounter,
853 : }
854 :
855 : impl Drop for FinishedCalculationGuard {
856 12 : fn drop(&mut self) {
857 12 : self.inc_on_drop.inc();
858 12 : }
859 : }
860 :
861 : // context: https://github.com/neondatabase/neon/issues/5963
862 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
863 0 : Lazy::new(|| {
864 0 : register_int_counter!(
865 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
866 0 : "Counter for the following event: walreceiver calls\
867 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
868 0 : )
869 0 : .unwrap()
870 0 : });
871 : }
872 :
873 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
874 0 : register_uint_gauge_vec!(
875 0 : "pageserver_directory_entries_count",
876 0 : "Sum of the entries in pageserver-stored directory listings",
877 0 : &["tenant_id", "shard_id", "timeline_id"]
878 0 : )
879 0 : .expect("failed to define a metric")
880 0 : });
881 :
882 416 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
883 416 : register_uint_gauge_vec!(
884 416 : "pageserver_tenant_states_count",
885 416 : "Count of tenants per state",
886 416 : &["state"]
887 416 : )
888 416 : .expect("Failed to register pageserver_tenant_states_count metric")
889 416 : });
890 :
891 : /// A set of broken tenants.
892 : ///
893 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
894 : /// tenant.
895 20 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
896 20 : register_uint_gauge_vec!(
897 20 : "pageserver_broken_tenants_count",
898 20 : "Set of broken tenants",
899 20 : &["tenant_id", "shard_id"]
900 20 : )
901 20 : .expect("Failed to register pageserver_tenant_states_count metric")
902 20 : });
903 :
904 12 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
905 12 : register_uint_gauge_vec!(
906 12 : "pageserver_tenant_synthetic_cached_size_bytes",
907 12 : "Synthetic size of each tenant in bytes",
908 12 : &["tenant_id"]
909 12 : )
910 12 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
911 12 : });
912 :
913 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
914 0 : register_histogram_vec!(
915 0 : "pageserver_eviction_iteration_duration_seconds_global",
916 0 : "Time spent on a single eviction iteration",
917 0 : &["period_secs", "threshold_secs"],
918 0 : STORAGE_OP_BUCKETS.into(),
919 0 : )
920 0 : .expect("failed to define a metric")
921 0 : });
922 :
923 412 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
924 412 : register_int_counter_vec!(
925 412 : "pageserver_evictions",
926 412 : "Number of layers evicted from the pageserver",
927 412 : &["tenant_id", "shard_id", "timeline_id"]
928 412 : )
929 412 : .expect("failed to define a metric")
930 412 : });
931 :
932 412 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
933 412 : register_int_counter_vec!(
934 412 : "pageserver_evictions_with_low_residence_duration",
935 412 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
936 412 : Residence duration is determined using the `residence_duration_data_source`.",
937 412 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
938 412 : )
939 412 : .expect("failed to define a metric")
940 412 : });
941 :
942 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
943 0 : register_int_counter!(
944 0 : "pageserver_unexpected_ondemand_downloads_count",
945 0 : "Number of unexpected on-demand downloads. \
946 0 : We log more context for each increment, so, forgo any labels in this metric.",
947 0 : )
948 0 : .expect("failed to define a metric")
949 0 : });
950 :
951 : /// How long did we take to start up? Broken down by labels to describe
952 : /// different phases of startup.
953 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
954 0 : register_gauge_vec!(
955 0 : "pageserver_startup_duration_seconds",
956 0 : "Time taken by phases of pageserver startup, in seconds",
957 0 : &["phase"]
958 0 : )
959 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
960 0 : });
961 :
962 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
963 0 : register_uint_gauge!(
964 0 : "pageserver_startup_is_loading",
965 0 : "1 while in initial startup load of tenants, 0 at other times"
966 0 : )
967 0 : .expect("Failed to register pageserver_startup_is_loading")
968 0 : });
969 :
970 404 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
971 404 : register_uint_gauge!(
972 404 : "pageserver_timeline_ephemeral_bytes",
973 404 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
974 404 : )
975 404 : .expect("Failed to register metric")
976 404 : });
977 :
978 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
979 : /// like how long it took to load.
980 : ///
981 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
982 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
983 : /// at a timeline level than tenant level.
984 : pub(crate) struct TenantMetrics {
985 : /// How long did tenants take to go from construction to active state?
986 : pub(crate) activation: Histogram,
987 : pub(crate) preload: Histogram,
988 : pub(crate) attach: Histogram,
989 :
990 : /// How many tenants are included in the initial startup of the pagesrever?
991 : pub(crate) startup_scheduled: IntCounter,
992 : pub(crate) startup_complete: IntCounter,
993 : }
994 :
995 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
996 0 : TenantMetrics {
997 0 : activation: register_histogram!(
998 0 : "pageserver_tenant_activation_seconds",
999 0 : "Time taken by tenants to activate, in seconds",
1000 0 : CRITICAL_OP_BUCKETS.into()
1001 0 : )
1002 0 : .expect("Failed to register metric"),
1003 0 : preload: register_histogram!(
1004 0 : "pageserver_tenant_preload_seconds",
1005 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
1006 0 : CRITICAL_OP_BUCKETS.into()
1007 0 : )
1008 0 : .expect("Failed to register metric"),
1009 0 : attach: register_histogram!(
1010 0 : "pageserver_tenant_attach_seconds",
1011 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
1012 0 : CRITICAL_OP_BUCKETS.into()
1013 0 : )
1014 0 : .expect("Failed to register metric"),
1015 0 : startup_scheduled: register_int_counter!(
1016 0 : "pageserver_tenant_startup_scheduled",
1017 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
1018 0 : ).expect("Failed to register metric"),
1019 0 : startup_complete: register_int_counter!(
1020 0 : "pageserver_tenant_startup_complete",
1021 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
1022 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
1023 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
1024 0 : ).expect("Failed to register metric"),
1025 0 : }
1026 0 : });
1027 :
1028 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
1029 : #[derive(Debug)]
1030 : pub(crate) struct EvictionsWithLowResidenceDuration {
1031 : data_source: &'static str,
1032 : threshold: Duration,
1033 : counter: Option<IntCounter>,
1034 : }
1035 :
1036 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
1037 : data_source: &'static str,
1038 : threshold: Duration,
1039 : }
1040 :
1041 : impl EvictionsWithLowResidenceDurationBuilder {
1042 904 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
1043 904 : Self {
1044 904 : data_source,
1045 904 : threshold,
1046 904 : }
1047 904 : }
1048 :
1049 904 : fn build(
1050 904 : &self,
1051 904 : tenant_id: &str,
1052 904 : shard_id: &str,
1053 904 : timeline_id: &str,
1054 904 : ) -> EvictionsWithLowResidenceDuration {
1055 904 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
1056 904 : .get_metric_with_label_values(&[
1057 904 : tenant_id,
1058 904 : shard_id,
1059 904 : timeline_id,
1060 904 : self.data_source,
1061 904 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
1062 904 : ])
1063 904 : .unwrap();
1064 904 : EvictionsWithLowResidenceDuration {
1065 904 : data_source: self.data_source,
1066 904 : threshold: self.threshold,
1067 904 : counter: Some(counter),
1068 904 : }
1069 904 : }
1070 : }
1071 :
1072 : impl EvictionsWithLowResidenceDuration {
1073 924 : fn threshold_label_value(threshold: Duration) -> String {
1074 924 : format!("{}", threshold.as_secs())
1075 924 : }
1076 :
1077 8 : pub fn observe(&self, observed_value: Duration) {
1078 8 : if observed_value < self.threshold {
1079 8 : self.counter
1080 8 : .as_ref()
1081 8 : .expect("nobody calls this function after `remove_from_vec`")
1082 8 : .inc();
1083 8 : }
1084 8 : }
1085 :
1086 0 : pub fn change_threshold(
1087 0 : &mut self,
1088 0 : tenant_id: &str,
1089 0 : shard_id: &str,
1090 0 : timeline_id: &str,
1091 0 : new_threshold: Duration,
1092 0 : ) {
1093 0 : if new_threshold == self.threshold {
1094 0 : return;
1095 0 : }
1096 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1097 0 : self.data_source,
1098 0 : new_threshold,
1099 0 : )
1100 0 : .build(tenant_id, shard_id, timeline_id);
1101 0 : std::mem::swap(self, &mut with_new);
1102 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1103 0 : }
1104 :
1105 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1106 20 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1107 20 : let Some(_counter) = self.counter.take() else {
1108 0 : return;
1109 : };
1110 :
1111 20 : let threshold = Self::threshold_label_value(self.threshold);
1112 20 :
1113 20 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1114 20 : tenant_id,
1115 20 : shard_id,
1116 20 : timeline_id,
1117 20 : self.data_source,
1118 20 : &threshold,
1119 20 : ]);
1120 20 :
1121 20 : match removed {
1122 0 : Err(e) => {
1123 0 : // this has been hit in staging as
1124 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1125 0 : // because we can be in the drop path already, don't risk:
1126 0 : // - "double-panic => illegal instruction" or
1127 0 : // - future "drop panick => abort"
1128 0 : //
1129 0 : // so just nag: (the error has the labels)
1130 0 : tracing::warn!(
1131 0 : "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
1132 : );
1133 : }
1134 : Ok(()) => {
1135 : // to help identify cases where we double-remove the same values, let's log all
1136 : // deletions?
1137 20 : tracing::info!(
1138 0 : "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
1139 : self.data_source
1140 : );
1141 : }
1142 : }
1143 20 : }
1144 : }
1145 :
1146 : // Metrics collected on disk IO operations
1147 : //
1148 : // Roughly logarithmic scale.
1149 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1150 : 0.000030, // 30 usec
1151 : 0.001000, // 1000 usec
1152 : 0.030, // 30 ms
1153 : 1.000, // 1000 ms
1154 : 30.000, // 30000 ms
1155 : ];
1156 :
1157 : /// VirtualFile fs operation variants.
1158 : ///
1159 : /// Operations:
1160 : /// - open ([`std::fs::OpenOptions::open`])
1161 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1162 : /// - close-by-replace (close by replacement algorithm)
1163 : /// - read (`read_at`)
1164 : /// - write (`write_at`)
1165 : /// - seek (modify internal position or file length query)
1166 : /// - fsync ([`std::fs::File::sync_all`])
1167 : /// - metadata ([`std::fs::File::metadata`])
1168 : #[derive(
1169 0 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1170 : )]
1171 : pub(crate) enum StorageIoOperation {
1172 : Open,
1173 : OpenAfterReplace,
1174 : Close,
1175 : CloseByReplace,
1176 : Read,
1177 : Write,
1178 : Seek,
1179 : Fsync,
1180 : Metadata,
1181 : }
1182 :
1183 : impl StorageIoOperation {
1184 4248 : pub fn as_str(&self) -> &'static str {
1185 4248 : match self {
1186 472 : StorageIoOperation::Open => "open",
1187 472 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1188 472 : StorageIoOperation::Close => "close",
1189 472 : StorageIoOperation::CloseByReplace => "close-by-replace",
1190 472 : StorageIoOperation::Read => "read",
1191 472 : StorageIoOperation::Write => "write",
1192 472 : StorageIoOperation::Seek => "seek",
1193 472 : StorageIoOperation::Fsync => "fsync",
1194 472 : StorageIoOperation::Metadata => "metadata",
1195 : }
1196 4248 : }
1197 : }
1198 :
1199 : /// Tracks time taken by fs operations near VirtualFile.
1200 : #[derive(Debug)]
1201 : pub(crate) struct StorageIoTime {
1202 : metrics: [Histogram; StorageIoOperation::COUNT],
1203 : }
1204 :
1205 : impl StorageIoTime {
1206 472 : fn new() -> Self {
1207 472 : let storage_io_histogram_vec = register_histogram_vec!(
1208 472 : "pageserver_io_operations_seconds",
1209 472 : "Time spent in IO operations",
1210 472 : &["operation"],
1211 472 : STORAGE_IO_TIME_BUCKETS.into()
1212 472 : )
1213 472 : .expect("failed to define a metric");
1214 4248 : let metrics = std::array::from_fn(|i| {
1215 4248 : let op = StorageIoOperation::from_repr(i).unwrap();
1216 4248 : storage_io_histogram_vec
1217 4248 : .get_metric_with_label_values(&[op.as_str()])
1218 4248 : .unwrap()
1219 4248 : });
1220 472 : Self { metrics }
1221 472 : }
1222 :
1223 4032729 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1224 4032729 : &self.metrics[op as usize]
1225 4032729 : }
1226 : }
1227 :
1228 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1229 :
1230 : #[derive(Clone, Copy)]
1231 : #[repr(usize)]
1232 : enum StorageIoSizeOperation {
1233 : Read,
1234 : Write,
1235 : }
1236 :
1237 : impl StorageIoSizeOperation {
1238 : const VARIANTS: &'static [&'static str] = &["read", "write"];
1239 :
1240 2904 : fn as_str(&self) -> &'static str {
1241 2904 : Self::VARIANTS[*self as usize]
1242 2904 : }
1243 : }
1244 :
1245 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1246 548 : static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1247 548 : register_uint_gauge_vec!(
1248 548 : "pageserver_io_operations_bytes_total",
1249 548 : "Total amount of bytes read/written in IO operations",
1250 548 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1251 548 : )
1252 548 : .expect("failed to define a metric")
1253 548 : });
1254 :
1255 : #[derive(Clone, Debug)]
1256 : pub(crate) struct StorageIoSizeMetrics {
1257 : pub read: UIntGauge,
1258 : pub write: UIntGauge,
1259 : }
1260 :
1261 : impl StorageIoSizeMetrics {
1262 1452 : pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
1263 1452 : let read = STORAGE_IO_SIZE
1264 1452 : .get_metric_with_label_values(&[
1265 1452 : StorageIoSizeOperation::Read.as_str(),
1266 1452 : tenant_id,
1267 1452 : shard_id,
1268 1452 : timeline_id,
1269 1452 : ])
1270 1452 : .unwrap();
1271 1452 : let write = STORAGE_IO_SIZE
1272 1452 : .get_metric_with_label_values(&[
1273 1452 : StorageIoSizeOperation::Write.as_str(),
1274 1452 : tenant_id,
1275 1452 : shard_id,
1276 1452 : timeline_id,
1277 1452 : ])
1278 1452 : .unwrap();
1279 1452 : Self { read, write }
1280 1452 : }
1281 : }
1282 :
1283 : #[cfg(not(test))]
1284 : pub(crate) mod virtual_file_descriptor_cache {
1285 : use super::*;
1286 :
1287 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1288 0 : register_uint_gauge!(
1289 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1290 0 : "Maximum number of open file descriptors in the cache."
1291 0 : )
1292 0 : .unwrap()
1293 0 : });
1294 :
1295 : // SIZE_CURRENT: derive it like so:
1296 : // ```
1297 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1298 : // -ignoring(operation)
1299 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1300 : // ```
1301 : }
1302 :
1303 : #[cfg(not(test))]
1304 : pub(crate) mod virtual_file_io_engine {
1305 : use super::*;
1306 :
1307 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1308 0 : register_uint_gauge_vec!(
1309 0 : "pageserver_virtual_file_io_engine_kind",
1310 0 : "The configured io engine for VirtualFile",
1311 0 : &["kind"],
1312 0 : )
1313 0 : .unwrap()
1314 0 : });
1315 : }
1316 :
1317 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1318 : pub(crate) struct SmgrOpTimerInner {
1319 : global_execution_latency_histo: Histogram,
1320 : per_timeline_execution_latency_histo: Option<Histogram>,
1321 :
1322 : global_batch_wait_time: Histogram,
1323 : per_timeline_batch_wait_time: Histogram,
1324 :
1325 : global_flush_in_progress_micros: IntCounter,
1326 : per_timeline_flush_in_progress_micros: IntCounter,
1327 :
1328 : throttling: Arc<tenant_throttling::Pagestream>,
1329 :
1330 : timings: SmgrOpTimerState,
1331 : }
1332 :
1333 : /// The stages of request processing are represented by the enum variants.
1334 : /// Used as part of [`SmgrOpTimerInner::timings`].
1335 : ///
1336 : /// Request processing calls into the `SmgrOpTimer::observe_*` methods at the
1337 : /// transition points.
1338 : /// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]
1339 : /// to the next state.
1340 : ///
1341 : /// Each request goes through every stage, in all configurations.
1342 : ///
1343 : #[derive(Debug)]
1344 : enum SmgrOpTimerState {
1345 : Received {
1346 : // In the future, we may want to track the full time the request spent
1347 : // inside pageserver process (time spent in kernel buffers can't be tracked).
1348 : // `received_at` would be used for that.
1349 : #[allow(dead_code)]
1350 : received_at: Instant,
1351 : },
1352 : Throttling {
1353 : throttle_started_at: Instant,
1354 : },
1355 : Batching {
1356 : throttle_done_at: Instant,
1357 : },
1358 : Executing {
1359 : execution_started_at: Instant,
1360 : },
1361 : Flushing,
1362 : // NB: when adding observation points, remember to update the Drop impl.
1363 : }
1364 :
1365 : // NB: when adding observation points, remember to update the Drop impl.
1366 : impl SmgrOpTimer {
1367 : /// See [`SmgrOpTimerState`] for more context.
1368 0 : pub(crate) fn observe_throttle_start(&mut self, at: Instant) {
1369 0 : let Some(inner) = self.0.as_mut() else {
1370 0 : return;
1371 : };
1372 0 : let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {
1373 0 : return;
1374 : };
1375 0 : inner.throttling.count_accounted_start.inc();
1376 0 : inner.timings = SmgrOpTimerState::Throttling {
1377 0 : throttle_started_at: at,
1378 0 : };
1379 0 : }
1380 :
1381 : /// See [`SmgrOpTimerState`] for more context.
1382 0 : pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {
1383 0 : let Some(inner) = self.0.as_mut() else {
1384 0 : return;
1385 : };
1386 : let SmgrOpTimerState::Throttling {
1387 0 : throttle_started_at,
1388 0 : } = &inner.timings
1389 : else {
1390 0 : return;
1391 : };
1392 0 : inner.throttling.count_accounted_finish.inc();
1393 0 : match throttle {
1394 0 : ThrottleResult::NotThrottled { end } => {
1395 0 : inner.timings = SmgrOpTimerState::Batching {
1396 0 : throttle_done_at: end,
1397 0 : };
1398 0 : }
1399 0 : ThrottleResult::Throttled { end } => {
1400 0 : // update metrics
1401 0 : inner.throttling.count_throttled.inc();
1402 0 : inner
1403 0 : .throttling
1404 0 : .wait_time
1405 0 : .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());
1406 0 : // state transition
1407 0 : inner.timings = SmgrOpTimerState::Batching {
1408 0 : throttle_done_at: end,
1409 0 : };
1410 0 : }
1411 : }
1412 0 : }
1413 :
1414 : /// See [`SmgrOpTimerState`] for more context.
1415 0 : pub(crate) fn observe_execution_start(&mut self, at: Instant) {
1416 0 : let Some(inner) = self.0.as_mut() else {
1417 0 : return;
1418 : };
1419 0 : let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {
1420 0 : return;
1421 : };
1422 : // update metrics
1423 0 : let batch = at - *throttle_done_at;
1424 0 : inner.global_batch_wait_time.observe(batch.as_secs_f64());
1425 0 : inner
1426 0 : .per_timeline_batch_wait_time
1427 0 : .observe(batch.as_secs_f64());
1428 0 : // state transition
1429 0 : inner.timings = SmgrOpTimerState::Executing {
1430 0 : execution_started_at: at,
1431 0 : }
1432 0 : }
1433 :
1434 : /// For all but the first caller, this is a no-op.
1435 : /// The first callers receives Some, subsequent ones None.
1436 : ///
1437 : /// See [`SmgrOpTimerState`] for more context.
1438 0 : pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {
1439 : // NB: unlike the other observe_* methods, this one take()s.
1440 : #[allow(clippy::question_mark)] // maintain similar code pattern.
1441 0 : let Some(mut inner) = self.0.take() else {
1442 0 : return None;
1443 : };
1444 : let SmgrOpTimerState::Executing {
1445 0 : execution_started_at,
1446 0 : } = &inner.timings
1447 : else {
1448 0 : return None;
1449 : };
1450 : // update metrics
1451 0 : let execution = at - *execution_started_at;
1452 0 : inner
1453 0 : .global_execution_latency_histo
1454 0 : .observe(execution.as_secs_f64());
1455 0 : if let Some(per_timeline_execution_latency_histo) =
1456 0 : &inner.per_timeline_execution_latency_histo
1457 0 : {
1458 0 : per_timeline_execution_latency_histo.observe(execution.as_secs_f64());
1459 0 : }
1460 :
1461 : // state transition
1462 0 : inner.timings = SmgrOpTimerState::Flushing;
1463 0 :
1464 0 : // return the flush in progress object which
1465 0 : // will do the remaining metrics updates
1466 0 : let SmgrOpTimerInner {
1467 0 : global_flush_in_progress_micros,
1468 0 : per_timeline_flush_in_progress_micros,
1469 0 : ..
1470 0 : } = inner;
1471 0 : Some(SmgrOpFlushInProgress {
1472 0 : global_micros: global_flush_in_progress_micros,
1473 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1474 0 : })
1475 0 : }
1476 : }
1477 :
1478 : /// The last stage of request processing is serializing and flushing the request
1479 : /// into the TCP connection. We want to make slow flushes observable
1480 : /// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]
1481 : /// to periodically bump the metric.
1482 : ///
1483 : /// If in the future we decide that we're not interested in live updates, we can
1484 : /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
1485 : /// and remove this struct from the code base.
1486 : pub(crate) struct SmgrOpFlushInProgress {
1487 : global_micros: IntCounter,
1488 : per_timeline_micros: IntCounter,
1489 : }
1490 :
1491 : impl Drop for SmgrOpTimer {
1492 0 : fn drop(&mut self) {
1493 0 : // In case of early drop, update any of the remaining metrics with
1494 0 : // observations so that (started,finished) counter pairs balance out
1495 0 : // and all counters on the latency path have the the same number of
1496 0 : // observations.
1497 0 : // It's technically lying and it would be better if each metric had
1498 0 : // a separate label or similar for cancelled requests.
1499 0 : // But we don't have that right now and counter pairs balancing
1500 0 : // out is useful when using the metrics in panels and whatnot.
1501 0 : let now = Instant::now();
1502 0 : self.observe_throttle_start(now);
1503 0 : self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
1504 0 : self.observe_execution_start(now);
1505 0 : let maybe_flush_timer = self.observe_execution_end(now);
1506 0 : drop(maybe_flush_timer);
1507 0 : }
1508 : }
1509 :
1510 : impl SmgrOpFlushInProgress {
1511 : /// The caller must guarantee that `socket_fd`` outlives this function.
1512 0 : pub(crate) async fn measure<Fut, O>(
1513 0 : self,
1514 0 : started_at: Instant,
1515 0 : mut fut: Fut,
1516 0 : socket_fd: RawFd,
1517 0 : ) -> O
1518 0 : where
1519 0 : Fut: std::future::Future<Output = O>,
1520 0 : {
1521 0 : let mut fut = std::pin::pin!(fut);
1522 0 :
1523 0 : let mut logged = false;
1524 0 : let mut last_counter_increment_at = started_at;
1525 0 : let mut observe_guard = scopeguard::guard(
1526 0 : |is_timeout| {
1527 0 : let now = Instant::now();
1528 0 :
1529 0 : // Increment counter
1530 0 : {
1531 0 : let elapsed_since_last_observe = now - last_counter_increment_at;
1532 0 : self.global_micros
1533 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1534 0 : self.per_timeline_micros
1535 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1536 0 : last_counter_increment_at = now;
1537 0 : }
1538 0 :
1539 0 : // Log something on every timeout, and on completion but only if we hit a timeout.
1540 0 : if is_timeout || logged {
1541 0 : logged = true;
1542 0 : let elapsed_total = now - started_at;
1543 0 : let msg = if is_timeout {
1544 0 : "slow flush ongoing"
1545 : } else {
1546 0 : "slow flush completed or cancelled"
1547 : };
1548 :
1549 0 : let (inq, outq) = {
1550 0 : // SAFETY: caller guarantees that `socket_fd` outlives this function.
1551 0 : #[cfg(target_os = "linux")]
1552 0 : unsafe {
1553 0 : (
1554 0 : utils::linux_socket_ioctl::inq(socket_fd).unwrap_or(-2),
1555 0 : utils::linux_socket_ioctl::outq(socket_fd).unwrap_or(-2),
1556 0 : )
1557 0 : }
1558 0 : #[cfg(not(target_os = "linux"))]
1559 0 : {
1560 0 : _ = socket_fd; // appease unused lint on macOS
1561 0 : (-1, -1)
1562 0 : }
1563 0 : };
1564 0 :
1565 0 : let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64());
1566 0 : tracing::info!(elapsed_total_secs, inq, outq, msg);
1567 0 : }
1568 0 : },
1569 0 : |mut observe| {
1570 0 : observe(false);
1571 0 : },
1572 0 : );
1573 :
1574 : loop {
1575 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1576 0 : Ok(v) => return v,
1577 0 : Err(_timeout) => {
1578 0 : (*observe_guard)(true);
1579 0 : }
1580 : }
1581 : }
1582 0 : }
1583 : }
1584 :
1585 : #[derive(
1586 : Debug,
1587 : Clone,
1588 : Copy,
1589 : IntoStaticStr,
1590 : strum_macros::EnumCount,
1591 0 : strum_macros::EnumIter,
1592 : strum_macros::FromRepr,
1593 : enum_map::Enum,
1594 : )]
1595 : #[strum(serialize_all = "snake_case")]
1596 : pub enum SmgrQueryType {
1597 : GetRelExists,
1598 : GetRelSize,
1599 : GetPageAtLsn,
1600 : GetDbSize,
1601 : GetSlruSegment,
1602 : #[cfg(feature = "testing")]
1603 : Test,
1604 : }
1605 :
1606 : pub(crate) struct SmgrQueryTimePerTimeline {
1607 : global_started: [IntCounter; SmgrQueryType::COUNT],
1608 : global_latency: [Histogram; SmgrQueryType::COUNT],
1609 : per_timeline_getpage_started: IntCounter,
1610 : per_timeline_getpage_latency: Histogram,
1611 : global_batch_size: Histogram,
1612 : per_timeline_batch_size: Histogram,
1613 : global_flush_in_progress_micros: IntCounter,
1614 : per_timeline_flush_in_progress_micros: IntCounter,
1615 : global_batch_wait_time: Histogram,
1616 : per_timeline_batch_wait_time: Histogram,
1617 : throttling: Arc<tenant_throttling::Pagestream>,
1618 : }
1619 :
1620 412 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1621 412 : register_int_counter_vec!(
1622 412 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1623 412 : "pageserver_smgr_query_started_global_count",
1624 412 : "Number of smgr queries started, aggregated by query type.",
1625 412 : &["smgr_query_type"],
1626 412 : )
1627 412 : .expect("failed to define a metric")
1628 412 : });
1629 :
1630 412 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1631 412 : register_int_counter_vec!(
1632 412 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1633 412 : "pageserver_smgr_query_started_count",
1634 412 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1635 412 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1636 412 : )
1637 412 : .expect("failed to define a metric")
1638 412 : });
1639 :
1640 : // Alias so all histograms recording per-timeline smgr timings use the same buckets.
1641 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
1642 :
1643 412 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1644 412 : register_histogram_vec!(
1645 412 : "pageserver_smgr_query_seconds",
1646 412 : "Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
1647 412 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1648 412 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1649 412 : )
1650 412 : .expect("failed to define a metric")
1651 412 : });
1652 :
1653 412 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1654 412 : [
1655 412 : 1,
1656 412 : 10,
1657 412 : 20,
1658 412 : 40,
1659 412 : 60,
1660 412 : 80,
1661 412 : 100,
1662 412 : 200,
1663 412 : 300,
1664 412 : 400,
1665 412 : 500,
1666 412 : 600,
1667 412 : 700,
1668 412 : 800,
1669 412 : 900,
1670 412 : 1_000, // 1ms
1671 412 : 2_000,
1672 412 : 4_000,
1673 412 : 6_000,
1674 412 : 8_000,
1675 412 : 10_000, // 10ms
1676 412 : 20_000,
1677 412 : 40_000,
1678 412 : 60_000,
1679 412 : 80_000,
1680 412 : 100_000,
1681 412 : 200_000,
1682 412 : 400_000,
1683 412 : 600_000,
1684 412 : 800_000,
1685 412 : 1_000_000, // 1s
1686 412 : 2_000_000,
1687 412 : 4_000_000,
1688 412 : 6_000_000,
1689 412 : 8_000_000,
1690 412 : 10_000_000, // 10s
1691 412 : 20_000_000,
1692 412 : 50_000_000,
1693 412 : 100_000_000,
1694 412 : 200_000_000,
1695 412 : 1_000_000_000, // 1000s
1696 412 : ]
1697 412 : .into_iter()
1698 412 : .map(Duration::from_micros)
1699 16892 : .map(|d| d.as_secs_f64())
1700 412 : .collect()
1701 412 : });
1702 :
1703 412 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1704 412 : register_histogram_vec!(
1705 412 : "pageserver_smgr_query_seconds_global",
1706 412 : "Like pageserver_smgr_query_seconds, but aggregated to instance level.",
1707 412 : &["smgr_query_type"],
1708 412 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1709 412 : )
1710 412 : .expect("failed to define a metric")
1711 412 : });
1712 :
1713 412 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1714 412 : (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
1715 13184 : .map(|v| v.into())
1716 412 : .collect()
1717 412 : });
1718 :
1719 412 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1720 412 : register_histogram!(
1721 412 : "pageserver_page_service_batch_size_global",
1722 412 : "Batch size of pageserver page service requests",
1723 412 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1724 412 : )
1725 412 : .expect("failed to define a metric")
1726 412 : });
1727 :
1728 412 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1729 412 : let mut buckets = Vec::new();
1730 2884 : for i in 0.. {
1731 2884 : let bucket = 1 << i;
1732 2884 : if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
1733 412 : break;
1734 2472 : }
1735 2472 : buckets.push(bucket.into());
1736 : }
1737 412 : buckets
1738 412 : });
1739 :
1740 412 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1741 412 : register_histogram_vec!(
1742 412 : "pageserver_page_service_batch_size",
1743 412 : "Batch size of pageserver page service requests",
1744 412 : &["tenant_id", "shard_id", "timeline_id"],
1745 412 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1746 412 : )
1747 412 : .expect("failed to define a metric")
1748 412 : });
1749 :
1750 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1751 0 : register_int_gauge_vec!(
1752 0 : "pageserver_page_service_config_max_batch_size",
1753 0 : "Configured maximum batch size for the server-side batching functionality of page_service. \
1754 0 : Labels expose more of the configuration parameters.",
1755 0 : &["mode", "execution"]
1756 0 : )
1757 0 : .expect("failed to define a metric")
1758 0 : });
1759 :
1760 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
1761 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
1762 0 : let (label_values, value) = match conf {
1763 0 : PageServicePipeliningConfig::Serial => (["serial", "-"], 1),
1764 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
1765 0 : max_batch_size,
1766 0 : execution,
1767 0 : }) => {
1768 0 : let mode = "pipelined";
1769 0 : let execution = match execution {
1770 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
1771 0 : "concurrent-futures"
1772 : }
1773 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
1774 : };
1775 0 : ([mode, execution], max_batch_size.get())
1776 : }
1777 : };
1778 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
1779 0 : .with_label_values(&label_values)
1780 0 : .set(value.try_into().unwrap());
1781 0 : }
1782 :
1783 412 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
1784 412 : register_int_counter_vec!(
1785 412 : "pageserver_page_service_pagestream_flush_in_progress_micros",
1786 412 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
1787 412 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
1788 412 : easily discoverable in monitoring. \
1789 412 : Hence, this is NOT a completion latency historgram.",
1790 412 : &["tenant_id", "shard_id", "timeline_id"],
1791 412 : )
1792 412 : .expect("failed to define a metric")
1793 412 : });
1794 :
1795 412 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
1796 412 : register_int_counter!(
1797 412 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
1798 412 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
1799 412 : )
1800 412 : .expect("failed to define a metric")
1801 412 : });
1802 :
1803 412 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1804 412 : register_histogram_vec!(
1805 412 : "pageserver_page_service_pagestream_batch_wait_time_seconds",
1806 412 : "Time a request spent waiting in its batch until the batch moved to throttle&execution.",
1807 412 : &["tenant_id", "shard_id", "timeline_id"],
1808 412 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1809 412 : )
1810 412 : .expect("failed to define a metric")
1811 412 : });
1812 :
1813 412 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1814 412 : register_histogram!(
1815 412 : "pageserver_page_service_pagestream_batch_wait_time_seconds_global",
1816 412 : "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
1817 412 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
1818 412 : )
1819 412 : .expect("failed to define a metric")
1820 412 : });
1821 :
1822 : impl SmgrQueryTimePerTimeline {
1823 904 : pub(crate) fn new(
1824 904 : tenant_shard_id: &TenantShardId,
1825 904 : timeline_id: &TimelineId,
1826 904 : pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,
1827 904 : ) -> Self {
1828 904 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1829 904 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1830 904 : let timeline_id = timeline_id.to_string();
1831 5424 : let global_started = std::array::from_fn(|i| {
1832 5424 : let op = SmgrQueryType::from_repr(i).unwrap();
1833 5424 : SMGR_QUERY_STARTED_GLOBAL
1834 5424 : .get_metric_with_label_values(&[op.into()])
1835 5424 : .unwrap()
1836 5424 : });
1837 5424 : let global_latency = std::array::from_fn(|i| {
1838 5424 : let op = SmgrQueryType::from_repr(i).unwrap();
1839 5424 : SMGR_QUERY_TIME_GLOBAL
1840 5424 : .get_metric_with_label_values(&[op.into()])
1841 5424 : .unwrap()
1842 5424 : });
1843 904 :
1844 904 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
1845 904 : .get_metric_with_label_values(&[
1846 904 : SmgrQueryType::GetPageAtLsn.into(),
1847 904 : &tenant_id,
1848 904 : &shard_slug,
1849 904 : &timeline_id,
1850 904 : ])
1851 904 : .unwrap();
1852 904 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1853 904 : .get_metric_with_label_values(&[
1854 904 : SmgrQueryType::GetPageAtLsn.into(),
1855 904 : &tenant_id,
1856 904 : &shard_slug,
1857 904 : &timeline_id,
1858 904 : ])
1859 904 : .unwrap();
1860 904 :
1861 904 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
1862 904 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
1863 904 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1864 904 : .unwrap();
1865 904 :
1866 904 : let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
1867 904 : let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
1868 904 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1869 904 : .unwrap();
1870 904 :
1871 904 : let global_flush_in_progress_micros =
1872 904 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
1873 904 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
1874 904 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1875 904 : .unwrap();
1876 904 :
1877 904 : Self {
1878 904 : global_started,
1879 904 : global_latency,
1880 904 : per_timeline_getpage_latency,
1881 904 : per_timeline_getpage_started,
1882 904 : global_batch_size,
1883 904 : per_timeline_batch_size,
1884 904 : global_flush_in_progress_micros,
1885 904 : per_timeline_flush_in_progress_micros,
1886 904 : global_batch_wait_time,
1887 904 : per_timeline_batch_wait_time,
1888 904 : throttling: pagestream_throttle_metrics,
1889 904 : }
1890 904 : }
1891 0 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
1892 0 : self.global_started[op as usize].inc();
1893 :
1894 0 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1895 0 : self.per_timeline_getpage_started.inc();
1896 0 : Some(self.per_timeline_getpage_latency.clone())
1897 : } else {
1898 0 : None
1899 : };
1900 :
1901 0 : SmgrOpTimer(Some(SmgrOpTimerInner {
1902 0 : global_execution_latency_histo: self.global_latency[op as usize].clone(),
1903 0 : per_timeline_execution_latency_histo: per_timeline_latency_histo,
1904 0 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
1905 0 : per_timeline_flush_in_progress_micros: self
1906 0 : .per_timeline_flush_in_progress_micros
1907 0 : .clone(),
1908 0 : global_batch_wait_time: self.global_batch_wait_time.clone(),
1909 0 : per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
1910 0 : throttling: self.throttling.clone(),
1911 0 : timings: SmgrOpTimerState::Received { received_at },
1912 0 : }))
1913 0 : }
1914 :
1915 : /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
1916 0 : pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
1917 0 : self.global_batch_size.observe(batch_size as f64);
1918 0 : self.per_timeline_batch_size.observe(batch_size as f64);
1919 0 : }
1920 : }
1921 :
1922 : // keep in sync with control plane Go code so that we can validate
1923 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1924 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1925 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1926 0 : [
1927 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1928 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1929 0 : ]
1930 0 : .map(|ms| (ms as f64) / 1000.0)
1931 0 : });
1932 :
1933 : pub(crate) struct BasebackupQueryTime {
1934 : ok: Histogram,
1935 : error: Histogram,
1936 : client_error: Histogram,
1937 : }
1938 :
1939 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1940 0 : let vec = register_histogram_vec!(
1941 0 : "pageserver_basebackup_query_seconds",
1942 0 : "Histogram of basebackup queries durations, by result type",
1943 0 : &["result"],
1944 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1945 0 : )
1946 0 : .expect("failed to define a metric");
1947 0 : BasebackupQueryTime {
1948 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1949 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1950 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
1951 0 : }
1952 0 : });
1953 :
1954 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
1955 : parent: &'a BasebackupQueryTime,
1956 : start: std::time::Instant,
1957 : }
1958 :
1959 : impl BasebackupQueryTime {
1960 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
1961 0 : let start = Instant::now();
1962 0 : BasebackupQueryTimeOngoingRecording {
1963 0 : parent: self,
1964 0 : start,
1965 0 : }
1966 0 : }
1967 : }
1968 :
1969 : impl BasebackupQueryTimeOngoingRecording<'_> {
1970 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
1971 0 : let elapsed = self.start.elapsed().as_secs_f64();
1972 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
1973 0 : let metric = match res {
1974 0 : Ok(_) => &self.parent.ok,
1975 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
1976 0 : if is_expected_io_error(io_error) =>
1977 0 : {
1978 0 : &self.parent.client_error
1979 : }
1980 0 : Err(_) => &self.parent.error,
1981 : };
1982 0 : metric.observe(elapsed);
1983 0 : }
1984 : }
1985 :
1986 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1987 0 : register_int_counter_pair_vec!(
1988 0 : "pageserver_live_connections_started",
1989 0 : "Number of network connections that we started handling",
1990 0 : "pageserver_live_connections_finished",
1991 0 : "Number of network connections that we finished handling",
1992 0 : &["pageserver_connection_kind"]
1993 0 : )
1994 0 : .expect("failed to define a metric")
1995 0 : });
1996 :
1997 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1998 : pub(crate) enum ComputeCommandKind {
1999 : PageStreamV3,
2000 : PageStreamV2,
2001 : Basebackup,
2002 : Fullbackup,
2003 : LeaseLsn,
2004 : }
2005 :
2006 : pub(crate) struct ComputeCommandCounters {
2007 : map: EnumMap<ComputeCommandKind, IntCounter>,
2008 : }
2009 :
2010 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
2011 0 : let inner = register_int_counter_vec!(
2012 0 : "pageserver_compute_commands",
2013 0 : "Number of compute -> pageserver commands processed",
2014 0 : &["command"]
2015 0 : )
2016 0 : .expect("failed to define a metric");
2017 0 :
2018 0 : ComputeCommandCounters {
2019 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
2020 0 : let command = ComputeCommandKind::from_usize(i);
2021 0 : let command_str: &'static str = command.into();
2022 0 : inner.with_label_values(&[command_str])
2023 0 : })),
2024 0 : }
2025 0 : });
2026 :
2027 : impl ComputeCommandCounters {
2028 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
2029 0 : &self.map[command]
2030 0 : }
2031 : }
2032 :
2033 : // remote storage metrics
2034 :
2035 404 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
2036 404 : register_int_counter_pair_vec!(
2037 404 : "pageserver_remote_timeline_client_calls_started",
2038 404 : "Number of started calls to remote timeline client.",
2039 404 : "pageserver_remote_timeline_client_calls_finished",
2040 404 : "Number of finshed calls to remote timeline client.",
2041 404 : &[
2042 404 : "tenant_id",
2043 404 : "shard_id",
2044 404 : "timeline_id",
2045 404 : "file_kind",
2046 404 : "op_kind"
2047 404 : ],
2048 404 : )
2049 404 : .unwrap()
2050 404 : });
2051 :
2052 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
2053 400 : Lazy::new(|| {
2054 400 : register_int_counter_vec!(
2055 400 : "pageserver_remote_timeline_client_bytes_started",
2056 400 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2057 400 : The increment happens when the operation is scheduled.",
2058 400 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2059 400 : )
2060 400 : .expect("failed to define a metric")
2061 400 : });
2062 :
2063 400 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
2064 400 : register_int_counter_vec!(
2065 400 : "pageserver_remote_timeline_client_bytes_finished",
2066 400 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2067 400 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
2068 400 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2069 400 : )
2070 400 : .expect("failed to define a metric")
2071 400 : });
2072 :
2073 : pub(crate) struct TenantManagerMetrics {
2074 : tenant_slots_attached: UIntGauge,
2075 : tenant_slots_secondary: UIntGauge,
2076 : tenant_slots_inprogress: UIntGauge,
2077 : pub(crate) tenant_slot_writes: IntCounter,
2078 : pub(crate) unexpected_errors: IntCounter,
2079 : }
2080 :
2081 : impl TenantManagerMetrics {
2082 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
2083 : /// exactly: they track the lifetime of the slots _in the tenant map_.
2084 4 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
2085 4 : match slot {
2086 0 : TenantSlot::Attached(_) => {
2087 0 : self.tenant_slots_attached.inc();
2088 0 : }
2089 0 : TenantSlot::Secondary(_) => {
2090 0 : self.tenant_slots_secondary.inc();
2091 0 : }
2092 4 : TenantSlot::InProgress(_) => {
2093 4 : self.tenant_slots_inprogress.inc();
2094 4 : }
2095 : }
2096 4 : }
2097 :
2098 4 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
2099 4 : match slot {
2100 4 : TenantSlot::Attached(_) => {
2101 4 : self.tenant_slots_attached.dec();
2102 4 : }
2103 0 : TenantSlot::Secondary(_) => {
2104 0 : self.tenant_slots_secondary.dec();
2105 0 : }
2106 0 : TenantSlot::InProgress(_) => {
2107 0 : self.tenant_slots_inprogress.dec();
2108 0 : }
2109 : }
2110 4 : }
2111 :
2112 : #[cfg(all(debug_assertions, not(test)))]
2113 0 : pub(crate) fn slots_total(&self) -> u64 {
2114 0 : self.tenant_slots_attached.get()
2115 0 : + self.tenant_slots_secondary.get()
2116 0 : + self.tenant_slots_inprogress.get()
2117 0 : }
2118 : }
2119 :
2120 4 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
2121 4 : let tenant_slots = register_uint_gauge_vec!(
2122 4 : "pageserver_tenant_manager_slots",
2123 4 : "How many slots currently exist, including all attached, secondary and in-progress operations",
2124 4 : &["mode"]
2125 4 : )
2126 4 : .expect("failed to define a metric");
2127 4 : TenantManagerMetrics {
2128 4 : tenant_slots_attached: tenant_slots
2129 4 : .get_metric_with_label_values(&["attached"])
2130 4 : .unwrap(),
2131 4 : tenant_slots_secondary: tenant_slots
2132 4 : .get_metric_with_label_values(&["secondary"])
2133 4 : .unwrap(),
2134 4 : tenant_slots_inprogress: tenant_slots
2135 4 : .get_metric_with_label_values(&["inprogress"])
2136 4 : .unwrap(),
2137 4 : tenant_slot_writes: register_int_counter!(
2138 4 : "pageserver_tenant_manager_slot_writes",
2139 4 : "Writes to a tenant slot, including all of create/attach/detach/delete"
2140 4 : )
2141 4 : .expect("failed to define a metric"),
2142 4 : unexpected_errors: register_int_counter!(
2143 4 : "pageserver_tenant_manager_unexpected_errors_total",
2144 4 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
2145 4 : )
2146 4 : .expect("failed to define a metric"),
2147 4 : }
2148 4 : });
2149 :
2150 : pub(crate) struct DeletionQueueMetrics {
2151 : pub(crate) keys_submitted: IntCounter,
2152 : pub(crate) keys_dropped: IntCounter,
2153 : pub(crate) keys_executed: IntCounter,
2154 : pub(crate) keys_validated: IntCounter,
2155 : pub(crate) dropped_lsn_updates: IntCounter,
2156 : pub(crate) unexpected_errors: IntCounter,
2157 : pub(crate) remote_errors: IntCounterVec,
2158 : }
2159 63 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
2160 63 : DeletionQueueMetrics{
2161 63 :
2162 63 : keys_submitted: register_int_counter!(
2163 63 : "pageserver_deletion_queue_submitted_total",
2164 63 : "Number of objects submitted for deletion"
2165 63 : )
2166 63 : .expect("failed to define a metric"),
2167 63 :
2168 63 : keys_dropped: register_int_counter!(
2169 63 : "pageserver_deletion_queue_dropped_total",
2170 63 : "Number of object deletions dropped due to stale generation."
2171 63 : )
2172 63 : .expect("failed to define a metric"),
2173 63 :
2174 63 : keys_executed: register_int_counter!(
2175 63 : "pageserver_deletion_queue_executed_total",
2176 63 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
2177 63 : )
2178 63 : .expect("failed to define a metric"),
2179 63 :
2180 63 : keys_validated: register_int_counter!(
2181 63 : "pageserver_deletion_queue_validated_total",
2182 63 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
2183 63 : )
2184 63 : .expect("failed to define a metric"),
2185 63 :
2186 63 : dropped_lsn_updates: register_int_counter!(
2187 63 : "pageserver_deletion_queue_dropped_lsn_updates_total",
2188 63 : "Updates to remote_consistent_lsn dropped due to stale generation number."
2189 63 : )
2190 63 : .expect("failed to define a metric"),
2191 63 : unexpected_errors: register_int_counter!(
2192 63 : "pageserver_deletion_queue_unexpected_errors_total",
2193 63 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
2194 63 : )
2195 63 : .expect("failed to define a metric"),
2196 63 : remote_errors: register_int_counter_vec!(
2197 63 : "pageserver_deletion_queue_remote_errors_total",
2198 63 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
2199 63 : &["op_kind"],
2200 63 : )
2201 63 : .expect("failed to define a metric")
2202 63 : }
2203 63 : });
2204 :
2205 : pub(crate) struct SecondaryModeMetrics {
2206 : pub(crate) upload_heatmap: IntCounter,
2207 : pub(crate) upload_heatmap_errors: IntCounter,
2208 : pub(crate) upload_heatmap_duration: Histogram,
2209 : pub(crate) download_heatmap: IntCounter,
2210 : pub(crate) download_layer: IntCounter,
2211 : }
2212 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2213 0 : SecondaryModeMetrics {
2214 0 : upload_heatmap: register_int_counter!(
2215 0 : "pageserver_secondary_upload_heatmap",
2216 0 : "Number of heatmaps written to remote storage by attached tenants"
2217 0 : )
2218 0 : .expect("failed to define a metric"),
2219 0 : upload_heatmap_errors: register_int_counter!(
2220 0 : "pageserver_secondary_upload_heatmap_errors",
2221 0 : "Failures writing heatmap to remote storage"
2222 0 : )
2223 0 : .expect("failed to define a metric"),
2224 0 : upload_heatmap_duration: register_histogram!(
2225 0 : "pageserver_secondary_upload_heatmap_duration",
2226 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2227 0 : )
2228 0 : .expect("failed to define a metric"),
2229 0 : download_heatmap: register_int_counter!(
2230 0 : "pageserver_secondary_download_heatmap",
2231 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2232 0 : )
2233 0 : .expect("failed to define a metric"),
2234 0 : download_layer: register_int_counter!(
2235 0 : "pageserver_secondary_download_layer",
2236 0 : "Number of downloads of layers by secondary mode locations"
2237 0 : )
2238 0 : .expect("failed to define a metric"),
2239 0 : }
2240 0 : });
2241 :
2242 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2243 0 : register_uint_gauge_vec!(
2244 0 : "pageserver_secondary_resident_physical_size",
2245 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2246 0 : &["tenant_id", "shard_id"]
2247 0 : )
2248 0 : .expect("failed to define a metric")
2249 0 : });
2250 :
2251 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2252 0 : register_uint_gauge!(
2253 0 : "pageserver_utilization_score",
2254 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2255 0 : )
2256 0 : .expect("failed to define a metric")
2257 0 : });
2258 :
2259 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2260 0 : register_uint_gauge_vec!(
2261 0 : "pageserver_secondary_heatmap_total_size",
2262 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2263 0 : &["tenant_id", "shard_id"]
2264 0 : )
2265 0 : .expect("failed to define a metric")
2266 0 : });
2267 :
2268 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2269 : pub enum RemoteOpKind {
2270 : Upload,
2271 : Download,
2272 : Delete,
2273 : }
2274 : impl RemoteOpKind {
2275 30490 : pub fn as_str(&self) -> &'static str {
2276 30490 : match self {
2277 28709 : Self::Upload => "upload",
2278 136 : Self::Download => "download",
2279 1645 : Self::Delete => "delete",
2280 : }
2281 30490 : }
2282 : }
2283 :
2284 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2285 : pub enum RemoteOpFileKind {
2286 : Layer,
2287 : Index,
2288 : }
2289 : impl RemoteOpFileKind {
2290 30490 : pub fn as_str(&self) -> &'static str {
2291 30490 : match self {
2292 21387 : Self::Layer => "layer",
2293 9103 : Self::Index => "index",
2294 : }
2295 30490 : }
2296 : }
2297 :
2298 400 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
2299 400 : register_histogram_vec!(
2300 400 : "pageserver_remote_operation_seconds",
2301 400 : "Time spent on remote storage operations. \
2302 400 : Grouped by tenant, timeline, operation_kind and status. \
2303 400 : Does not account for time spent waiting in remote timeline client's queues.",
2304 400 : &["file_kind", "op_kind", "status"]
2305 400 : )
2306 400 : .expect("failed to define a metric")
2307 400 : });
2308 :
2309 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2310 0 : register_int_counter_vec!(
2311 0 : "pageserver_tenant_task_events",
2312 0 : "Number of task start/stop/fail events.",
2313 0 : &["event"],
2314 0 : )
2315 0 : .expect("Failed to register tenant_task_events metric")
2316 0 : });
2317 :
2318 : pub struct BackgroundLoopSemaphoreMetrics {
2319 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2320 : durations: EnumMap<BackgroundLoopKind, Histogram>,
2321 : waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2322 : running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2323 : }
2324 :
2325 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =
2326 40 : Lazy::new(|| {
2327 40 : let counters = register_int_counter_pair_vec!(
2328 40 : "pageserver_background_loop_semaphore_wait_start_count",
2329 40 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2330 40 : "pageserver_background_loop_semaphore_wait_finish_count",
2331 40 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2332 40 : &["task"],
2333 40 : )
2334 40 : .unwrap();
2335 40 :
2336 40 : let durations = register_histogram_vec!(
2337 40 : "pageserver_background_loop_semaphore_wait_seconds",
2338 40 : "Seconds spent waiting on background loop semaphore acquisition",
2339 40 : &["task"],
2340 40 : vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],
2341 40 : )
2342 40 : .unwrap();
2343 40 :
2344 40 : let waiting_tasks = register_int_gauge_vec!(
2345 40 : "pageserver_background_loop_semaphore_waiting_tasks",
2346 40 : "Number of background loop tasks waiting for semaphore",
2347 40 : &["task"],
2348 40 : )
2349 40 : .unwrap();
2350 40 :
2351 40 : let running_tasks = register_int_gauge_vec!(
2352 40 : "pageserver_background_loop_semaphore_running_tasks",
2353 40 : "Number of background loop tasks running concurrently",
2354 40 : &["task"],
2355 40 : )
2356 40 : .unwrap();
2357 40 :
2358 40 : BackgroundLoopSemaphoreMetrics {
2359 400 : counters: EnumMap::from_array(std::array::from_fn(|i| {
2360 400 : let kind = BackgroundLoopKind::from_usize(i);
2361 400 : counters.with_label_values(&[kind.into()])
2362 400 : })),
2363 400 : durations: EnumMap::from_array(std::array::from_fn(|i| {
2364 400 : let kind = BackgroundLoopKind::from_usize(i);
2365 400 : durations.with_label_values(&[kind.into()])
2366 400 : })),
2367 400 : waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2368 400 : let kind = BackgroundLoopKind::from_usize(i);
2369 400 : waiting_tasks.with_label_values(&[kind.into()])
2370 400 : })),
2371 400 : running_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2372 400 : let kind = BackgroundLoopKind::from_usize(i);
2373 400 : running_tasks.with_label_values(&[kind.into()])
2374 400 : })),
2375 40 : }
2376 40 : });
2377 :
2378 : impl BackgroundLoopSemaphoreMetrics {
2379 : /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the
2380 : /// semaphore is acquired, and drop it when the task completes or is cancelled.
2381 728 : pub(crate) fn record(
2382 728 : &self,
2383 728 : task: BackgroundLoopKind,
2384 728 : ) -> BackgroundLoopSemaphoreMetricsRecorder {
2385 728 : BackgroundLoopSemaphoreMetricsRecorder::start(self, task)
2386 728 : }
2387 : }
2388 :
2389 : /// Records metrics for a background task.
2390 : pub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {
2391 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2392 : task: BackgroundLoopKind,
2393 : start: Instant,
2394 : wait_counter_guard: Option<metrics::IntCounterPairGuard>,
2395 : }
2396 :
2397 : impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {
2398 : /// Starts recording semaphore metrics, by recording wait time and incrementing
2399 : /// `wait_start_count` and `waiting_tasks`.
2400 728 : fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {
2401 728 : metrics.waiting_tasks[task].inc();
2402 728 : Self {
2403 728 : metrics,
2404 728 : task,
2405 728 : start: Instant::now(),
2406 728 : wait_counter_guard: Some(metrics.counters[task].guard()),
2407 728 : }
2408 728 : }
2409 :
2410 : /// Signals that the semaphore has been acquired, and updates relevant metrics.
2411 728 : pub fn acquired(&mut self) -> Duration {
2412 728 : let waited = self.start.elapsed();
2413 728 : self.wait_counter_guard.take().expect("already acquired");
2414 728 : self.metrics.durations[self.task].observe(waited.as_secs_f64());
2415 728 : self.metrics.waiting_tasks[self.task].dec();
2416 728 : self.metrics.running_tasks[self.task].inc();
2417 728 : waited
2418 728 : }
2419 : }
2420 :
2421 : impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {
2422 : /// The task either completed or was cancelled.
2423 728 : fn drop(&mut self) {
2424 728 : if self.wait_counter_guard.take().is_some() {
2425 0 : // Waiting.
2426 0 : self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());
2427 0 : self.metrics.waiting_tasks[self.task].dec();
2428 728 : } else {
2429 728 : // Running.
2430 728 : self.metrics.running_tasks[self.task].dec();
2431 728 : }
2432 728 : }
2433 : }
2434 :
2435 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2436 0 : register_int_counter_vec!(
2437 0 : "pageserver_background_loop_period_overrun_count",
2438 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2439 0 : &["task", "period"],
2440 0 : )
2441 0 : .expect("failed to define a metric")
2442 0 : });
2443 :
2444 : // walreceiver metrics
2445 :
2446 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2447 0 : register_int_counter!(
2448 0 : "pageserver_walreceiver_started_connections_total",
2449 0 : "Number of started walreceiver connections"
2450 0 : )
2451 0 : .expect("failed to define a metric")
2452 0 : });
2453 :
2454 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2455 0 : register_int_gauge!(
2456 0 : "pageserver_walreceiver_active_managers",
2457 0 : "Number of active walreceiver managers"
2458 0 : )
2459 0 : .expect("failed to define a metric")
2460 0 : });
2461 :
2462 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2463 0 : register_int_counter_vec!(
2464 0 : "pageserver_walreceiver_switches_total",
2465 0 : "Number of walreceiver manager change_connection calls",
2466 0 : &["reason"]
2467 0 : )
2468 0 : .expect("failed to define a metric")
2469 0 : });
2470 :
2471 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2472 0 : register_int_counter!(
2473 0 : "pageserver_walreceiver_broker_updates_total",
2474 0 : "Number of received broker updates in walreceiver"
2475 0 : )
2476 0 : .expect("failed to define a metric")
2477 0 : });
2478 :
2479 4 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2480 4 : register_int_counter_vec!(
2481 4 : "pageserver_walreceiver_candidates_events_total",
2482 4 : "Number of walreceiver candidate events",
2483 4 : &["event"]
2484 4 : )
2485 4 : .expect("failed to define a metric")
2486 4 : });
2487 :
2488 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2489 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2490 :
2491 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2492 4 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2493 :
2494 : // Metrics collected on WAL redo operations
2495 : //
2496 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2497 : // for access to the postgres process ('wait') since there is only one for
2498 : // each tenant.
2499 :
2500 : /// Time buckets are small because we want to be able to measure the
2501 : /// smallest redo processing times. These buckets allow us to measure down
2502 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2503 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2504 : ///
2505 : /// Values up to 1s are recorded because metrics show that we have redo
2506 : /// durations and lock times larger than 0.250s.
2507 : macro_rules! redo_histogram_time_buckets {
2508 : () => {
2509 : vec![
2510 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2511 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2512 : 1.000_000,
2513 : ]
2514 : };
2515 : }
2516 :
2517 : /// While we're at it, also measure the amount of records replayed in each
2518 : /// operation. We have a global 'total replayed' counter, but that's not
2519 : /// as useful as 'what is the skew for how many records we replay in one
2520 : /// operation'.
2521 : macro_rules! redo_histogram_count_buckets {
2522 : () => {
2523 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2524 : };
2525 : }
2526 :
2527 : macro_rules! redo_bytes_histogram_count_buckets {
2528 : () => {
2529 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2530 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2531 : vec![
2532 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2533 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2534 : ]
2535 : };
2536 : }
2537 :
2538 : pub(crate) struct WalIngestMetrics {
2539 : pub(crate) bytes_received: IntCounter,
2540 : pub(crate) records_received: IntCounter,
2541 : pub(crate) records_observed: IntCounter,
2542 : pub(crate) records_committed: IntCounter,
2543 : pub(crate) records_filtered: IntCounter,
2544 : pub(crate) values_committed_metadata_images: IntCounter,
2545 : pub(crate) values_committed_metadata_deltas: IntCounter,
2546 : pub(crate) values_committed_data_images: IntCounter,
2547 : pub(crate) values_committed_data_deltas: IntCounter,
2548 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2549 : }
2550 :
2551 : impl WalIngestMetrics {
2552 0 : pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
2553 0 : if stats.metadata_images > 0 {
2554 0 : self.values_committed_metadata_images
2555 0 : .inc_by(stats.metadata_images);
2556 0 : }
2557 0 : if stats.metadata_deltas > 0 {
2558 0 : self.values_committed_metadata_deltas
2559 0 : .inc_by(stats.metadata_deltas);
2560 0 : }
2561 0 : if stats.data_images > 0 {
2562 0 : self.values_committed_data_images.inc_by(stats.data_images);
2563 0 : }
2564 0 : if stats.data_deltas > 0 {
2565 0 : self.values_committed_data_deltas.inc_by(stats.data_deltas);
2566 0 : }
2567 0 : }
2568 : }
2569 :
2570 20 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
2571 20 : let values_committed = register_int_counter_vec!(
2572 20 : "pageserver_wal_ingest_values_committed",
2573 20 : "Number of values committed to pageserver storage from WAL records",
2574 20 : &["class", "kind"],
2575 20 : )
2576 20 : .expect("failed to define a metric");
2577 20 :
2578 20 : WalIngestMetrics {
2579 20 : bytes_received: register_int_counter!(
2580 20 : "pageserver_wal_ingest_bytes_received",
2581 20 : "Bytes of WAL ingested from safekeepers",
2582 20 : )
2583 20 : .unwrap(),
2584 20 : records_received: register_int_counter!(
2585 20 : "pageserver_wal_ingest_records_received",
2586 20 : "Number of WAL records received from safekeepers"
2587 20 : )
2588 20 : .expect("failed to define a metric"),
2589 20 : records_observed: register_int_counter!(
2590 20 : "pageserver_wal_ingest_records_observed",
2591 20 : "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
2592 20 : )
2593 20 : .expect("failed to define a metric"),
2594 20 : records_committed: register_int_counter!(
2595 20 : "pageserver_wal_ingest_records_committed",
2596 20 : "Number of WAL records which resulted in writes to pageserver storage"
2597 20 : )
2598 20 : .expect("failed to define a metric"),
2599 20 : records_filtered: register_int_counter!(
2600 20 : "pageserver_wal_ingest_records_filtered",
2601 20 : "Number of WAL records filtered out due to sharding"
2602 20 : )
2603 20 : .expect("failed to define a metric"),
2604 20 : values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
2605 20 : values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
2606 20 : values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
2607 20 : values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
2608 20 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2609 20 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2610 20 : "Total number of zero gap blocks written on relation extends"
2611 20 : )
2612 20 : .expect("failed to define a metric"),
2613 20 : }
2614 20 : });
2615 :
2616 412 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2617 412 : register_int_counter_vec!(
2618 412 : "pageserver_timeline_wal_records_received",
2619 412 : "Number of WAL records received per shard",
2620 412 : &["tenant_id", "shard_id", "timeline_id"]
2621 412 : )
2622 412 : .expect("failed to define a metric")
2623 412 : });
2624 :
2625 12 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2626 12 : register_histogram!(
2627 12 : "pageserver_wal_redo_seconds",
2628 12 : "Time spent on WAL redo",
2629 12 : redo_histogram_time_buckets!()
2630 12 : )
2631 12 : .expect("failed to define a metric")
2632 12 : });
2633 :
2634 12 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2635 12 : register_histogram!(
2636 12 : "pageserver_wal_redo_records_histogram",
2637 12 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2638 12 : redo_histogram_count_buckets!(),
2639 12 : )
2640 12 : .expect("failed to define a metric")
2641 12 : });
2642 :
2643 12 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2644 12 : register_histogram!(
2645 12 : "pageserver_wal_redo_bytes_histogram",
2646 12 : "Histogram of number of records replayed per redo sent to Postgres",
2647 12 : redo_bytes_histogram_count_buckets!(),
2648 12 : )
2649 12 : .expect("failed to define a metric")
2650 12 : });
2651 :
2652 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2653 12 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2654 12 : register_int_counter!(
2655 12 : "pageserver_replayed_wal_records_total",
2656 12 : "Number of WAL records replayed in WAL redo process"
2657 12 : )
2658 12 : .unwrap()
2659 12 : });
2660 :
2661 : #[rustfmt::skip]
2662 16 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2663 16 : register_histogram!(
2664 16 : "pageserver_wal_redo_process_launch_duration",
2665 16 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2666 16 : vec![
2667 16 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2668 16 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2669 16 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2670 16 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2671 16 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2672 16 : ],
2673 16 : )
2674 16 : .expect("failed to define a metric")
2675 16 : });
2676 :
2677 : pub(crate) struct WalRedoProcessCounters {
2678 : pub(crate) started: IntCounter,
2679 : pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,
2680 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2681 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2682 : }
2683 :
2684 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2685 : pub(crate) enum WalRedoKillCause {
2686 : WalRedoProcessDrop,
2687 : NoLeakChildDrop,
2688 : Startup,
2689 : }
2690 :
2691 : impl Default for WalRedoProcessCounters {
2692 16 : fn default() -> Self {
2693 16 : let started = register_int_counter!(
2694 16 : "pageserver_wal_redo_process_started_total",
2695 16 : "Number of WAL redo processes started",
2696 16 : )
2697 16 : .unwrap();
2698 16 :
2699 16 : let killed = register_int_counter_vec!(
2700 16 : "pageserver_wal_redo_process_stopped_total",
2701 16 : "Number of WAL redo processes stopped",
2702 16 : &["cause"],
2703 16 : )
2704 16 : .unwrap();
2705 16 :
2706 16 : let active_stderr_logger_tasks_started = register_int_counter!(
2707 16 : "pageserver_walredo_stderr_logger_tasks_started_total",
2708 16 : "Number of active walredo stderr logger tasks that have started",
2709 16 : )
2710 16 : .unwrap();
2711 16 :
2712 16 : let active_stderr_logger_tasks_finished = register_int_counter!(
2713 16 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2714 16 : "Number of active walredo stderr logger tasks that have finished",
2715 16 : )
2716 16 : .unwrap();
2717 16 :
2718 16 : Self {
2719 16 : started,
2720 48 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2721 48 : let cause = WalRedoKillCause::from_usize(i);
2722 48 : let cause_str: &'static str = cause.into();
2723 48 : killed.with_label_values(&[cause_str])
2724 48 : })),
2725 16 : active_stderr_logger_tasks_started,
2726 16 : active_stderr_logger_tasks_finished,
2727 16 : }
2728 16 : }
2729 : }
2730 :
2731 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2732 : Lazy::new(WalRedoProcessCounters::default);
2733 :
2734 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2735 : pub(crate) struct StorageTimeMetricsTimer {
2736 : metrics: StorageTimeMetrics,
2737 : start: Instant,
2738 : }
2739 :
2740 : impl StorageTimeMetricsTimer {
2741 4260 : fn new(metrics: StorageTimeMetrics) -> Self {
2742 4260 : Self {
2743 4260 : metrics,
2744 4260 : start: Instant::now(),
2745 4260 : }
2746 4260 : }
2747 :
2748 : /// Returns the elapsed duration of the timer.
2749 4260 : pub fn elapsed(&self) -> Duration {
2750 4260 : self.start.elapsed()
2751 4260 : }
2752 :
2753 : /// Record the time from creation to now and return it.
2754 4260 : pub fn stop_and_record(self) -> Duration {
2755 4260 : let duration = self.elapsed();
2756 4260 : let seconds = duration.as_secs_f64();
2757 4260 : self.metrics.timeline_sum.inc_by(seconds);
2758 4260 : self.metrics.timeline_count.inc();
2759 4260 : self.metrics.global_histogram.observe(seconds);
2760 4260 : duration
2761 4260 : }
2762 :
2763 : /// Turns this timer into a timer, which will always record -- usually this means recording
2764 : /// regardless an early `?` path was taken in a function.
2765 8 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2766 8 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2767 8 : }
2768 : }
2769 :
2770 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2771 :
2772 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2773 8 : fn drop(&mut self) {
2774 8 : if let Some(inner) = self.0.take() {
2775 8 : inner.stop_and_record();
2776 8 : }
2777 8 : }
2778 : }
2779 :
2780 : impl AlwaysRecordingStorageTimeMetricsTimer {
2781 : /// Returns the elapsed duration of the timer.
2782 0 : pub fn elapsed(&self) -> Duration {
2783 0 : self.0.as_ref().expect("not dropped yet").elapsed()
2784 0 : }
2785 : }
2786 :
2787 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2788 : /// timeline total sum and count.
2789 : #[derive(Clone, Debug)]
2790 : pub(crate) struct StorageTimeMetrics {
2791 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2792 : timeline_sum: Counter,
2793 : /// Number of oeprations, per operation, tenant_id and timeline_id
2794 : timeline_count: IntCounter,
2795 : /// Global histogram having only the "operation" label.
2796 : global_histogram: Histogram,
2797 : }
2798 :
2799 : impl StorageTimeMetrics {
2800 8136 : pub fn new(
2801 8136 : operation: StorageTimeOperation,
2802 8136 : tenant_id: &str,
2803 8136 : shard_id: &str,
2804 8136 : timeline_id: &str,
2805 8136 : ) -> Self {
2806 8136 : let operation: &'static str = operation.into();
2807 8136 :
2808 8136 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2809 8136 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2810 8136 : .unwrap();
2811 8136 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2812 8136 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2813 8136 : .unwrap();
2814 8136 : let global_histogram = STORAGE_TIME_GLOBAL
2815 8136 : .get_metric_with_label_values(&[operation])
2816 8136 : .unwrap();
2817 8136 :
2818 8136 : StorageTimeMetrics {
2819 8136 : timeline_sum,
2820 8136 : timeline_count,
2821 8136 : global_histogram,
2822 8136 : }
2823 8136 : }
2824 :
2825 : /// Starts timing a new operation.
2826 : ///
2827 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2828 4260 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2829 4260 : StorageTimeMetricsTimer::new(self.clone())
2830 4260 : }
2831 : }
2832 :
2833 : #[derive(Debug)]
2834 : pub(crate) struct TimelineMetrics {
2835 : tenant_id: String,
2836 : shard_id: String,
2837 : timeline_id: String,
2838 : pub flush_time_histo: StorageTimeMetrics,
2839 : pub flush_delay_histo: StorageTimeMetrics,
2840 : pub flush_wait_upload_time_gauge: Gauge,
2841 : pub compact_time_histo: StorageTimeMetrics,
2842 : pub create_images_time_histo: StorageTimeMetrics,
2843 : pub logical_size_histo: StorageTimeMetrics,
2844 : pub imitate_logical_size_histo: StorageTimeMetrics,
2845 : pub load_layer_map_histo: StorageTimeMetrics,
2846 : pub garbage_collect_histo: StorageTimeMetrics,
2847 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2848 : pub last_record_lsn_gauge: IntGauge,
2849 : pub disk_consistent_lsn_gauge: IntGauge,
2850 : pub pitr_history_size: UIntGauge,
2851 : pub archival_size: UIntGauge,
2852 : pub layers_per_read: Histogram,
2853 : pub standby_horizon_gauge: IntGauge,
2854 : pub resident_physical_size_gauge: UIntGauge,
2855 : pub visible_physical_size_gauge: UIntGauge,
2856 : /// copy of LayeredTimeline.current_logical_size
2857 : pub current_logical_size_gauge: UIntGauge,
2858 : pub aux_file_size_gauge: IntGauge,
2859 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2860 : pub evictions: IntCounter,
2861 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2862 : /// Number of valid LSN leases.
2863 : pub valid_lsn_lease_count_gauge: UIntGauge,
2864 : pub wal_records_received: IntCounter,
2865 : pub storage_io_size: StorageIoSizeMetrics,
2866 : shutdown: std::sync::atomic::AtomicBool,
2867 : }
2868 :
2869 : impl TimelineMetrics {
2870 904 : pub fn new(
2871 904 : tenant_shard_id: &TenantShardId,
2872 904 : timeline_id_raw: &TimelineId,
2873 904 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2874 904 : ) -> Self {
2875 904 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2876 904 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2877 904 : let timeline_id = timeline_id_raw.to_string();
2878 904 : let flush_time_histo = StorageTimeMetrics::new(
2879 904 : StorageTimeOperation::LayerFlush,
2880 904 : &tenant_id,
2881 904 : &shard_id,
2882 904 : &timeline_id,
2883 904 : );
2884 904 : let flush_delay_histo = StorageTimeMetrics::new(
2885 904 : StorageTimeOperation::LayerFlushDelay,
2886 904 : &tenant_id,
2887 904 : &shard_id,
2888 904 : &timeline_id,
2889 904 : );
2890 904 : let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
2891 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2892 904 : .unwrap();
2893 904 : let compact_time_histo = StorageTimeMetrics::new(
2894 904 : StorageTimeOperation::Compact,
2895 904 : &tenant_id,
2896 904 : &shard_id,
2897 904 : &timeline_id,
2898 904 : );
2899 904 : let create_images_time_histo = StorageTimeMetrics::new(
2900 904 : StorageTimeOperation::CreateImages,
2901 904 : &tenant_id,
2902 904 : &shard_id,
2903 904 : &timeline_id,
2904 904 : );
2905 904 : let logical_size_histo = StorageTimeMetrics::new(
2906 904 : StorageTimeOperation::LogicalSize,
2907 904 : &tenant_id,
2908 904 : &shard_id,
2909 904 : &timeline_id,
2910 904 : );
2911 904 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2912 904 : StorageTimeOperation::ImitateLogicalSize,
2913 904 : &tenant_id,
2914 904 : &shard_id,
2915 904 : &timeline_id,
2916 904 : );
2917 904 : let load_layer_map_histo = StorageTimeMetrics::new(
2918 904 : StorageTimeOperation::LoadLayerMap,
2919 904 : &tenant_id,
2920 904 : &shard_id,
2921 904 : &timeline_id,
2922 904 : );
2923 904 : let garbage_collect_histo = StorageTimeMetrics::new(
2924 904 : StorageTimeOperation::Gc,
2925 904 : &tenant_id,
2926 904 : &shard_id,
2927 904 : &timeline_id,
2928 904 : );
2929 904 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2930 904 : StorageTimeOperation::FindGcCutoffs,
2931 904 : &tenant_id,
2932 904 : &shard_id,
2933 904 : &timeline_id,
2934 904 : );
2935 904 : let last_record_lsn_gauge = LAST_RECORD_LSN
2936 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2937 904 : .unwrap();
2938 904 :
2939 904 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
2940 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2941 904 : .unwrap();
2942 904 :
2943 904 : let pitr_history_size = PITR_HISTORY_SIZE
2944 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2945 904 : .unwrap();
2946 904 :
2947 904 : let archival_size = TIMELINE_ARCHIVE_SIZE
2948 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2949 904 : .unwrap();
2950 904 :
2951 904 : let layers_per_read = LAYERS_PER_READ
2952 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2953 904 : .unwrap();
2954 904 :
2955 904 : let standby_horizon_gauge = STANDBY_HORIZON
2956 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2957 904 : .unwrap();
2958 904 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2959 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2960 904 : .unwrap();
2961 904 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
2962 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2963 904 : .unwrap();
2964 904 : // TODO: we shouldn't expose this metric
2965 904 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2966 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2967 904 : .unwrap();
2968 904 : let aux_file_size_gauge = AUX_FILE_SIZE
2969 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2970 904 : .unwrap();
2971 904 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2972 904 : let directory_entries_count_gauge_closure = {
2973 904 : let tenant_shard_id = *tenant_shard_id;
2974 904 : let timeline_id_raw = *timeline_id_raw;
2975 0 : move || {
2976 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2977 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2978 0 : let timeline_id = timeline_id_raw.to_string();
2979 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2980 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2981 0 : .unwrap();
2982 0 : gauge
2983 0 : }
2984 : };
2985 904 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2986 904 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2987 904 : let evictions = EVICTIONS
2988 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2989 904 : .unwrap();
2990 904 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2991 904 : .build(&tenant_id, &shard_id, &timeline_id);
2992 904 :
2993 904 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2994 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2995 904 : .unwrap();
2996 904 :
2997 904 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
2998 904 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2999 904 : .unwrap();
3000 904 :
3001 904 : let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
3002 904 :
3003 904 : TimelineMetrics {
3004 904 : tenant_id,
3005 904 : shard_id,
3006 904 : timeline_id,
3007 904 : flush_time_histo,
3008 904 : flush_delay_histo,
3009 904 : flush_wait_upload_time_gauge,
3010 904 : compact_time_histo,
3011 904 : create_images_time_histo,
3012 904 : logical_size_histo,
3013 904 : imitate_logical_size_histo,
3014 904 : garbage_collect_histo,
3015 904 : find_gc_cutoffs_histo,
3016 904 : load_layer_map_histo,
3017 904 : last_record_lsn_gauge,
3018 904 : disk_consistent_lsn_gauge,
3019 904 : pitr_history_size,
3020 904 : archival_size,
3021 904 : layers_per_read,
3022 904 : standby_horizon_gauge,
3023 904 : resident_physical_size_gauge,
3024 904 : visible_physical_size_gauge,
3025 904 : current_logical_size_gauge,
3026 904 : aux_file_size_gauge,
3027 904 : directory_entries_count_gauge,
3028 904 : evictions,
3029 904 : evictions_with_low_residence_duration: std::sync::RwLock::new(
3030 904 : evictions_with_low_residence_duration,
3031 904 : ),
3032 904 : storage_io_size,
3033 904 : valid_lsn_lease_count_gauge,
3034 904 : wal_records_received,
3035 904 : shutdown: std::sync::atomic::AtomicBool::default(),
3036 904 : }
3037 904 : }
3038 :
3039 3156 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
3040 3156 : self.resident_physical_size_add(sz);
3041 3156 : }
3042 :
3043 1076 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
3044 1076 : self.resident_physical_size_gauge.sub(sz);
3045 1076 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
3046 1076 : }
3047 :
3048 3428 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
3049 3428 : self.resident_physical_size_gauge.add(sz);
3050 3428 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
3051 3428 : }
3052 :
3053 20 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
3054 20 : self.resident_physical_size_gauge.get()
3055 20 : }
3056 :
3057 2356 : pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
3058 2356 : self.flush_wait_upload_time_gauge.add(duration);
3059 2356 : crate::metrics::FLUSH_WAIT_UPLOAD_TIME
3060 2356 : .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
3061 2356 : .unwrap()
3062 2356 : .add(duration);
3063 2356 : }
3064 :
3065 : /// Generates TIMELINE_LAYER labels for a persistent layer.
3066 5232 : fn make_layer_labels(&self, layer_desc: &PersistentLayerDesc) -> [&str; 5] {
3067 5232 : let level = match LayerMap::is_l0(&layer_desc.key_range, layer_desc.is_delta()) {
3068 2848 : true => LayerLevel::L0,
3069 2384 : false => LayerLevel::L1,
3070 : };
3071 5232 : let kind = match layer_desc.is_delta() {
3072 4357 : true => LayerKind::Delta,
3073 875 : false => LayerKind::Image,
3074 : };
3075 5232 : [
3076 5232 : &self.tenant_id,
3077 5232 : &self.shard_id,
3078 5232 : &self.timeline_id,
3079 5232 : level.into(),
3080 5232 : kind.into(),
3081 5232 : ]
3082 5232 : }
3083 :
3084 : /// Generates TIMELINE_LAYER labels for a frozen ephemeral layer.
3085 4712 : fn make_frozen_layer_labels(&self, _layer: &InMemoryLayer) -> [&str; 5] {
3086 4712 : [
3087 4712 : &self.tenant_id,
3088 4712 : &self.shard_id,
3089 4712 : &self.timeline_id,
3090 4712 : LayerLevel::Frozen.into(),
3091 4712 : LayerKind::Delta.into(), // by definition
3092 4712 : ]
3093 4712 : }
3094 :
3095 : /// Removes a frozen ephemeral layer to TIMELINE_LAYER metrics.
3096 2356 : pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
3097 2356 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3098 2356 : let labels = self.make_frozen_layer_labels(layer);
3099 2356 : let size = layer.try_len().expect("frozen layer should have no writer");
3100 2356 : TIMELINE_LAYER_COUNT
3101 2356 : .get_metric_with_label_values(&labels)
3102 2356 : .unwrap()
3103 2356 : .dec();
3104 2356 : TIMELINE_LAYER_SIZE
3105 2356 : .get_metric_with_label_values(&labels)
3106 2356 : .unwrap()
3107 2356 : .sub(size);
3108 2356 : }
3109 :
3110 : /// Adds a frozen ephemeral layer to TIMELINE_LAYER metrics.
3111 2356 : pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
3112 2356 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3113 2356 : let labels = self.make_frozen_layer_labels(layer);
3114 2356 : let size = layer.try_len().expect("frozen layer should have no writer");
3115 2356 : TIMELINE_LAYER_COUNT
3116 2356 : .get_metric_with_label_values(&labels)
3117 2356 : .unwrap()
3118 2356 : .inc();
3119 2356 : TIMELINE_LAYER_SIZE
3120 2356 : .get_metric_with_label_values(&labels)
3121 2356 : .unwrap()
3122 2356 : .add(size);
3123 2356 : }
3124 :
3125 : /// Removes a persistent layer from TIMELINE_LAYER metrics.
3126 1372 : pub fn dec_layer(&self, layer_desc: &PersistentLayerDesc) {
3127 1372 : let labels = self.make_layer_labels(layer_desc);
3128 1372 : TIMELINE_LAYER_COUNT
3129 1372 : .get_metric_with_label_values(&labels)
3130 1372 : .unwrap()
3131 1372 : .dec();
3132 1372 : TIMELINE_LAYER_SIZE
3133 1372 : .get_metric_with_label_values(&labels)
3134 1372 : .unwrap()
3135 1372 : .sub(layer_desc.file_size);
3136 1372 : }
3137 :
3138 : /// Adds a persistent layer to TIMELINE_LAYER metrics.
3139 3860 : pub fn inc_layer(&self, layer_desc: &PersistentLayerDesc) {
3140 3860 : let labels = self.make_layer_labels(layer_desc);
3141 3860 : TIMELINE_LAYER_COUNT
3142 3860 : .get_metric_with_label_values(&labels)
3143 3860 : .unwrap()
3144 3860 : .inc();
3145 3860 : TIMELINE_LAYER_SIZE
3146 3860 : .get_metric_with_label_values(&labels)
3147 3860 : .unwrap()
3148 3860 : .add(layer_desc.file_size);
3149 3860 : }
3150 :
3151 20 : pub(crate) fn shutdown(&self) {
3152 20 : let was_shutdown = self
3153 20 : .shutdown
3154 20 : .swap(true, std::sync::atomic::Ordering::Relaxed);
3155 20 :
3156 20 : if was_shutdown {
3157 : // this happens on tenant deletion because tenant first shuts down timelines, then
3158 : // invokes timeline deletion which first shuts down the timeline again.
3159 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
3160 0 : return;
3161 20 : }
3162 20 :
3163 20 : let tenant_id = &self.tenant_id;
3164 20 : let timeline_id = &self.timeline_id;
3165 20 : let shard_id = &self.shard_id;
3166 20 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3167 20 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3168 20 : let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3169 20 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3170 20 : {
3171 20 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
3172 20 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3173 20 : }
3174 20 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3175 20 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3176 20 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
3177 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3178 20 : }
3179 :
3180 20 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3181 20 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3182 :
3183 80 : for ref level in LayerLevel::iter() {
3184 180 : for ref kind in LayerKind::iter() {
3185 120 : let labels: [&str; 5] =
3186 120 : [tenant_id, shard_id, timeline_id, level.into(), kind.into()];
3187 120 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&labels);
3188 120 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&labels);
3189 120 : }
3190 : }
3191 :
3192 20 : let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3193 20 :
3194 20 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3195 20 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3196 20 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3197 20 :
3198 20 : self.evictions_with_low_residence_duration
3199 20 : .write()
3200 20 : .unwrap()
3201 20 : .remove(tenant_id, shard_id, timeline_id);
3202 :
3203 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
3204 : // removed at the end of it. The idea is to have the metrics outlive the
3205 : // entity during which they're observed, e.g., the smgr metrics shall
3206 : // outlive an individual smgr connection, but not the timeline.
3207 :
3208 200 : for op in StorageTimeOperation::VARIANTS {
3209 180 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
3210 180 : op,
3211 180 : tenant_id,
3212 180 : shard_id,
3213 180 : timeline_id,
3214 180 : ]);
3215 180 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
3216 180 : op,
3217 180 : tenant_id,
3218 180 : shard_id,
3219 180 : timeline_id,
3220 180 : ]);
3221 180 : }
3222 :
3223 60 : for op in StorageIoSizeOperation::VARIANTS {
3224 40 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
3225 40 : }
3226 :
3227 20 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
3228 20 : SmgrQueryType::GetPageAtLsn.into(),
3229 20 : tenant_id,
3230 20 : shard_id,
3231 20 : timeline_id,
3232 20 : ]);
3233 20 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
3234 20 : SmgrQueryType::GetPageAtLsn.into(),
3235 20 : tenant_id,
3236 20 : shard_id,
3237 20 : timeline_id,
3238 20 : ]);
3239 20 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
3240 20 : tenant_id,
3241 20 : shard_id,
3242 20 : timeline_id,
3243 20 : ]);
3244 20 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
3245 20 : tenant_id,
3246 20 : shard_id,
3247 20 : timeline_id,
3248 20 : ]);
3249 20 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
3250 20 : tenant_id,
3251 20 : shard_id,
3252 20 : timeline_id,
3253 20 : ]);
3254 20 : let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
3255 20 : tenant_id,
3256 20 : shard_id,
3257 20 : timeline_id,
3258 20 : ]);
3259 20 : }
3260 : }
3261 :
3262 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3263 12 : // Only shard zero deals in synthetic sizes
3264 12 : if tenant_shard_id.is_shard_zero() {
3265 12 : let tid = tenant_shard_id.tenant_id.to_string();
3266 12 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
3267 12 : }
3268 :
3269 12 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
3270 12 :
3271 12 : // we leave the BROKEN_TENANTS_SET entry if any
3272 12 : }
3273 :
3274 : /// Maintain a per timeline gauge in addition to the global gauge.
3275 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
3276 : last_set: AtomicU64,
3277 : gauge: UIntGauge,
3278 : }
3279 :
3280 : impl PerTimelineRemotePhysicalSizeGauge {
3281 924 : fn new(per_timeline_gauge: UIntGauge) -> Self {
3282 924 : Self {
3283 924 : last_set: AtomicU64::new(0),
3284 924 : gauge: per_timeline_gauge,
3285 924 : }
3286 924 : }
3287 3869 : pub(crate) fn set(&self, sz: u64) {
3288 3869 : self.gauge.set(sz);
3289 3869 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
3290 3869 : if sz < prev {
3291 69 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
3292 3800 : } else {
3293 3800 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
3294 3800 : };
3295 3869 : }
3296 4 : pub(crate) fn get(&self) -> u64 {
3297 4 : self.gauge.get()
3298 4 : }
3299 : }
3300 :
3301 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
3302 40 : fn drop(&mut self) {
3303 40 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
3304 40 : }
3305 : }
3306 :
3307 : pub(crate) struct RemoteTimelineClientMetrics {
3308 : tenant_id: String,
3309 : shard_id: String,
3310 : timeline_id: String,
3311 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
3312 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
3313 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3314 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3315 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
3316 : }
3317 :
3318 : impl RemoteTimelineClientMetrics {
3319 924 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
3320 924 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
3321 924 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
3322 924 : let timeline_id_str = timeline_id.to_string();
3323 924 :
3324 924 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
3325 924 : REMOTE_PHYSICAL_SIZE
3326 924 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3327 924 : .unwrap(),
3328 924 : );
3329 924 :
3330 924 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
3331 924 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3332 924 : .unwrap();
3333 924 :
3334 924 : RemoteTimelineClientMetrics {
3335 924 : tenant_id: tenant_id_str,
3336 924 : shard_id: shard_id_str,
3337 924 : timeline_id: timeline_id_str,
3338 924 : calls: Mutex::new(HashMap::default()),
3339 924 : bytes_started_counter: Mutex::new(HashMap::default()),
3340 924 : bytes_finished_counter: Mutex::new(HashMap::default()),
3341 924 : remote_physical_size_gauge,
3342 924 : projected_remote_consistent_lsn_gauge,
3343 924 : }
3344 924 : }
3345 :
3346 6103 : pub fn remote_operation_time(
3347 6103 : &self,
3348 6103 : file_kind: &RemoteOpFileKind,
3349 6103 : op_kind: &RemoteOpKind,
3350 6103 : status: &'static str,
3351 6103 : ) -> Histogram {
3352 6103 : let key = (file_kind.as_str(), op_kind.as_str(), status);
3353 6103 : REMOTE_OPERATION_TIME
3354 6103 : .get_metric_with_label_values(&[key.0, key.1, key.2])
3355 6103 : .unwrap()
3356 6103 : }
3357 :
3358 14325 : fn calls_counter_pair(
3359 14325 : &self,
3360 14325 : file_kind: &RemoteOpFileKind,
3361 14325 : op_kind: &RemoteOpKind,
3362 14325 : ) -> IntCounterPair {
3363 14325 : let mut guard = self.calls.lock().unwrap();
3364 14325 : let key = (file_kind.as_str(), op_kind.as_str());
3365 14325 : let metric = guard.entry(key).or_insert_with(move || {
3366 1661 : REMOTE_TIMELINE_CLIENT_CALLS
3367 1661 : .get_metric_with_label_values(&[
3368 1661 : &self.tenant_id,
3369 1661 : &self.shard_id,
3370 1661 : &self.timeline_id,
3371 1661 : key.0,
3372 1661 : key.1,
3373 1661 : ])
3374 1661 : .unwrap()
3375 14325 : });
3376 14325 : metric.clone()
3377 14325 : }
3378 :
3379 3484 : fn bytes_started_counter(
3380 3484 : &self,
3381 3484 : file_kind: &RemoteOpFileKind,
3382 3484 : op_kind: &RemoteOpKind,
3383 3484 : ) -> IntCounter {
3384 3484 : let mut guard = self.bytes_started_counter.lock().unwrap();
3385 3484 : let key = (file_kind.as_str(), op_kind.as_str());
3386 3484 : let metric = guard.entry(key).or_insert_with(move || {
3387 656 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3388 656 : .get_metric_with_label_values(&[
3389 656 : &self.tenant_id,
3390 656 : &self.shard_id,
3391 656 : &self.timeline_id,
3392 656 : key.0,
3393 656 : key.1,
3394 656 : ])
3395 656 : .unwrap()
3396 3484 : });
3397 3484 : metric.clone()
3398 3484 : }
3399 :
3400 6554 : fn bytes_finished_counter(
3401 6554 : &self,
3402 6554 : file_kind: &RemoteOpFileKind,
3403 6554 : op_kind: &RemoteOpKind,
3404 6554 : ) -> IntCounter {
3405 6554 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3406 6554 : let key = (file_kind.as_str(), op_kind.as_str());
3407 6554 : let metric = guard.entry(key).or_insert_with(move || {
3408 656 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3409 656 : .get_metric_with_label_values(&[
3410 656 : &self.tenant_id,
3411 656 : &self.shard_id,
3412 656 : &self.timeline_id,
3413 656 : key.0,
3414 656 : key.1,
3415 656 : ])
3416 656 : .unwrap()
3417 6554 : });
3418 6554 : metric.clone()
3419 6554 : }
3420 : }
3421 :
3422 : #[cfg(test)]
3423 : impl RemoteTimelineClientMetrics {
3424 12 : pub fn get_bytes_started_counter_value(
3425 12 : &self,
3426 12 : file_kind: &RemoteOpFileKind,
3427 12 : op_kind: &RemoteOpKind,
3428 12 : ) -> Option<u64> {
3429 12 : let guard = self.bytes_started_counter.lock().unwrap();
3430 12 : let key = (file_kind.as_str(), op_kind.as_str());
3431 12 : guard.get(&key).map(|counter| counter.get())
3432 12 : }
3433 :
3434 12 : pub fn get_bytes_finished_counter_value(
3435 12 : &self,
3436 12 : file_kind: &RemoteOpFileKind,
3437 12 : op_kind: &RemoteOpKind,
3438 12 : ) -> Option<u64> {
3439 12 : let guard = self.bytes_finished_counter.lock().unwrap();
3440 12 : let key = (file_kind.as_str(), op_kind.as_str());
3441 12 : guard.get(&key).map(|counter| counter.get())
3442 12 : }
3443 : }
3444 :
3445 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3446 : #[must_use]
3447 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3448 : /// Decremented on drop.
3449 : calls_counter_pair: Option<IntCounterPair>,
3450 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3451 : bytes_finished: Option<(IntCounter, u64)>,
3452 : }
3453 :
3454 : impl RemoteTimelineClientCallMetricGuard {
3455 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3456 : /// The caller vouches to do the metric updates manually.
3457 7588 : pub fn will_decrement_manually(mut self) {
3458 7588 : let RemoteTimelineClientCallMetricGuard {
3459 7588 : calls_counter_pair,
3460 7588 : bytes_finished,
3461 7588 : } = &mut self;
3462 7588 : calls_counter_pair.take();
3463 7588 : bytes_finished.take();
3464 7588 : }
3465 : }
3466 :
3467 : impl Drop for RemoteTimelineClientCallMetricGuard {
3468 7656 : fn drop(&mut self) {
3469 7656 : let RemoteTimelineClientCallMetricGuard {
3470 7656 : calls_counter_pair,
3471 7656 : bytes_finished,
3472 7656 : } = self;
3473 7656 : if let Some(guard) = calls_counter_pair.take() {
3474 68 : guard.dec();
3475 7588 : }
3476 7656 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3477 0 : bytes_finished_metric.inc_by(*value);
3478 7656 : }
3479 7656 : }
3480 : }
3481 :
3482 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3483 : /// track the byte size of this call in applicable metric(s).
3484 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3485 : /// Do not account for this call's byte size in any metrics.
3486 : /// The `reason` field is there to make the call sites self-documenting
3487 : /// about why they don't need the metric.
3488 : DontTrackSize { reason: &'static str },
3489 : /// Track the byte size of the call in applicable metric(s).
3490 : Bytes(u64),
3491 : }
3492 :
3493 : impl RemoteTimelineClientMetrics {
3494 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3495 : ///
3496 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3497 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3498 : /// is more suitable.
3499 : /// Never do both.
3500 7656 : pub(crate) fn call_begin(
3501 7656 : &self,
3502 7656 : file_kind: &RemoteOpFileKind,
3503 7656 : op_kind: &RemoteOpKind,
3504 7656 : size: RemoteTimelineClientMetricsCallTrackSize,
3505 7656 : ) -> RemoteTimelineClientCallMetricGuard {
3506 7656 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3507 7656 : calls_counter_pair.inc();
3508 :
3509 7656 : let bytes_finished = match size {
3510 4172 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3511 4172 : // nothing to do
3512 4172 : None
3513 : }
3514 3484 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3515 3484 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3516 3484 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3517 3484 : Some((finished_counter, size))
3518 : }
3519 : };
3520 7656 : RemoteTimelineClientCallMetricGuard {
3521 7656 : calls_counter_pair: Some(calls_counter_pair),
3522 7656 : bytes_finished,
3523 7656 : }
3524 7656 : }
3525 :
3526 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3527 : /// Using the guard object is generally preferable.
3528 : /// See [`call_begin`](Self::call_begin) for more context.
3529 6669 : pub(crate) fn call_end(
3530 6669 : &self,
3531 6669 : file_kind: &RemoteOpFileKind,
3532 6669 : op_kind: &RemoteOpKind,
3533 6669 : size: RemoteTimelineClientMetricsCallTrackSize,
3534 6669 : ) {
3535 6669 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3536 6669 : calls_counter_pair.dec();
3537 6669 : match size {
3538 3599 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3539 3070 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3540 3070 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3541 3070 : }
3542 : }
3543 6669 : }
3544 : }
3545 :
3546 : impl Drop for RemoteTimelineClientMetrics {
3547 40 : fn drop(&mut self) {
3548 40 : let RemoteTimelineClientMetrics {
3549 40 : tenant_id,
3550 40 : shard_id,
3551 40 : timeline_id,
3552 40 : remote_physical_size_gauge,
3553 40 : calls,
3554 40 : bytes_started_counter,
3555 40 : bytes_finished_counter,
3556 40 : projected_remote_consistent_lsn_gauge,
3557 40 : } = self;
3558 48 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3559 48 : let mut res = [Ok(()), Ok(())];
3560 48 : REMOTE_TIMELINE_CLIENT_CALLS
3561 48 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3562 48 : // don't care about results
3563 48 : }
3564 40 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3565 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3566 12 : tenant_id,
3567 12 : shard_id,
3568 12 : timeline_id,
3569 12 : a,
3570 12 : b,
3571 12 : ]);
3572 12 : }
3573 40 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3574 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3575 12 : tenant_id,
3576 12 : shard_id,
3577 12 : timeline_id,
3578 12 : a,
3579 12 : b,
3580 12 : ]);
3581 12 : }
3582 40 : {
3583 40 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3584 40 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3585 40 : }
3586 40 : {
3587 40 : let _ = projected_remote_consistent_lsn_gauge;
3588 40 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3589 40 : tenant_id,
3590 40 : shard_id,
3591 40 : timeline_id,
3592 40 : ]);
3593 40 : }
3594 40 : }
3595 : }
3596 :
3597 : /// Wrapper future that measures the time spent by a remote storage operation,
3598 : /// and records the time and success/failure as a prometheus metric.
3599 : pub(crate) trait MeasureRemoteOp: Sized {
3600 6418 : fn measure_remote_op(
3601 6418 : self,
3602 6418 : file_kind: RemoteOpFileKind,
3603 6418 : op: RemoteOpKind,
3604 6418 : metrics: Arc<RemoteTimelineClientMetrics>,
3605 6418 : ) -> MeasuredRemoteOp<Self> {
3606 6418 : let start = Instant::now();
3607 6418 : MeasuredRemoteOp {
3608 6418 : inner: self,
3609 6418 : file_kind,
3610 6418 : op,
3611 6418 : start,
3612 6418 : metrics,
3613 6418 : }
3614 6418 : }
3615 : }
3616 :
3617 : impl<T: Sized> MeasureRemoteOp for T {}
3618 :
3619 : pin_project! {
3620 : pub(crate) struct MeasuredRemoteOp<F>
3621 : {
3622 : #[pin]
3623 : inner: F,
3624 : file_kind: RemoteOpFileKind,
3625 : op: RemoteOpKind,
3626 : start: Instant,
3627 : metrics: Arc<RemoteTimelineClientMetrics>,
3628 : }
3629 : }
3630 :
3631 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
3632 : type Output = Result<O, E>;
3633 :
3634 99115 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
3635 99115 : let this = self.project();
3636 99115 : let poll_result = this.inner.poll(cx);
3637 99115 : if let Poll::Ready(ref res) = poll_result {
3638 6103 : let duration = this.start.elapsed();
3639 6103 : let status = if res.is_ok() { &"success" } else { &"failure" };
3640 6103 : this.metrics
3641 6103 : .remote_operation_time(this.file_kind, this.op, status)
3642 6103 : .observe(duration.as_secs_f64());
3643 93012 : }
3644 99115 : poll_result
3645 99115 : }
3646 : }
3647 :
3648 : pub mod tokio_epoll_uring {
3649 : use std::collections::HashMap;
3650 : use std::sync::{Arc, Mutex};
3651 :
3652 : use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
3653 : use once_cell::sync::Lazy;
3654 :
3655 : /// Shared storage for tokio-epoll-uring thread local metrics.
3656 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3657 238 : Lazy::new(|| {
3658 238 : let slots_submission_queue_depth = register_histogram!(
3659 238 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3660 238 : "The slots waiters queue depth of each tokio_epoll_uring system",
3661 238 : vec![
3662 238 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
3663 238 : ],
3664 238 : )
3665 238 : .expect("failed to define a metric");
3666 238 : ThreadLocalMetricsStorage {
3667 238 : observers: Mutex::new(HashMap::new()),
3668 238 : slots_submission_queue_depth,
3669 238 : }
3670 238 : });
3671 :
3672 : pub struct ThreadLocalMetricsStorage {
3673 : /// List of thread local metrics observers.
3674 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3675 : /// A histogram shared between all thread local systems
3676 : /// for collecting slots submission queue depth.
3677 : slots_submission_queue_depth: Histogram,
3678 : }
3679 :
3680 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3681 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3682 : ///
3683 : /// The System makes observations into [`Self`] and periodically, the collector
3684 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3685 : ///
3686 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3687 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3688 : /// for cache coherence protocol to get an exclusive cache line.
3689 : pub struct ThreadLocalMetrics {
3690 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3691 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3692 : }
3693 :
3694 : impl ThreadLocalMetricsStorage {
3695 : /// Registers a new thread local system. Returns a thread local metrics observer.
3696 1033 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3697 1033 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3698 1033 : self.slots_submission_queue_depth.local(),
3699 1033 : ));
3700 1033 : let mut g = self.observers.lock().unwrap();
3701 1033 : g.insert(id, Arc::clone(&per_system_metrics));
3702 1033 : per_system_metrics
3703 1033 : }
3704 :
3705 : /// Removes metrics observer for a thread local system.
3706 : /// This should be called before dropping a thread local system.
3707 238 : pub fn remove_system(&self, id: u64) {
3708 238 : let mut g = self.observers.lock().unwrap();
3709 238 : g.remove(&id);
3710 238 : }
3711 :
3712 : /// Flush all thread local metrics to the shared storage.
3713 0 : pub fn flush_thread_local_metrics(&self) {
3714 0 : let g = self.observers.lock().unwrap();
3715 0 : g.values().for_each(|local| {
3716 0 : local.flush();
3717 0 : });
3718 0 : }
3719 : }
3720 :
3721 : impl ThreadLocalMetrics {
3722 1033 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3723 1033 : ThreadLocalMetrics {
3724 1033 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3725 1033 : }
3726 1033 : }
3727 :
3728 : /// Flushes the thread local metrics to shared aggregator.
3729 0 : pub fn flush(&self) {
3730 0 : let Self {
3731 0 : slots_submission_queue_depth,
3732 0 : } = self;
3733 0 : slots_submission_queue_depth.lock().unwrap().flush();
3734 0 : }
3735 : }
3736 :
3737 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3738 1820292 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3739 1820292 : let Self {
3740 1820292 : slots_submission_queue_depth,
3741 1820292 : } = self;
3742 1820292 : slots_submission_queue_depth
3743 1820292 : .lock()
3744 1820292 : .unwrap()
3745 1820292 : .observe(queue_depth as f64);
3746 1820292 : }
3747 : }
3748 :
3749 : pub struct Collector {
3750 : descs: Vec<metrics::core::Desc>,
3751 : systems_created: UIntGauge,
3752 : systems_destroyed: UIntGauge,
3753 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3754 : }
3755 :
3756 : impl metrics::core::Collector for Collector {
3757 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3758 0 : self.descs.iter().collect()
3759 0 : }
3760 :
3761 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3762 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3763 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3764 0 : systems_created,
3765 0 : systems_destroyed,
3766 0 : } = tokio_epoll_uring::metrics::global();
3767 0 : self.systems_created.set(systems_created);
3768 0 : mfs.extend(self.systems_created.collect());
3769 0 : self.systems_destroyed.set(systems_destroyed);
3770 0 : mfs.extend(self.systems_destroyed.collect());
3771 0 :
3772 0 : self.thread_local_metrics_storage
3773 0 : .flush_thread_local_metrics();
3774 0 :
3775 0 : mfs.extend(
3776 0 : self.thread_local_metrics_storage
3777 0 : .slots_submission_queue_depth
3778 0 : .collect(),
3779 0 : );
3780 0 : mfs
3781 0 : }
3782 : }
3783 :
3784 : impl Collector {
3785 : const NMETRICS: usize = 3;
3786 :
3787 : #[allow(clippy::new_without_default)]
3788 0 : pub fn new() -> Self {
3789 0 : let mut descs = Vec::new();
3790 0 :
3791 0 : let systems_created = UIntGauge::new(
3792 0 : "pageserver_tokio_epoll_uring_systems_created",
3793 0 : "counter of tokio-epoll-uring systems that were created",
3794 0 : )
3795 0 : .unwrap();
3796 0 : descs.extend(
3797 0 : metrics::core::Collector::desc(&systems_created)
3798 0 : .into_iter()
3799 0 : .cloned(),
3800 0 : );
3801 0 :
3802 0 : let systems_destroyed = UIntGauge::new(
3803 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
3804 0 : "counter of tokio-epoll-uring systems that were destroyed",
3805 0 : )
3806 0 : .unwrap();
3807 0 : descs.extend(
3808 0 : metrics::core::Collector::desc(&systems_destroyed)
3809 0 : .into_iter()
3810 0 : .cloned(),
3811 0 : );
3812 0 :
3813 0 : Self {
3814 0 : descs,
3815 0 : systems_created,
3816 0 : systems_destroyed,
3817 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
3818 0 : }
3819 0 : }
3820 : }
3821 :
3822 238 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3823 238 : register_int_counter!(
3824 238 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
3825 238 : "Number of times where thread_local_system creation spanned multiple executor threads",
3826 238 : )
3827 238 : .unwrap()
3828 238 : });
3829 :
3830 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3831 0 : register_int_counter!(
3832 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
3833 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
3834 0 : )
3835 0 : .unwrap()
3836 0 : });
3837 : }
3838 :
3839 : pub(crate) mod tenant_throttling {
3840 : use metrics::{IntCounter, register_int_counter_vec};
3841 : use once_cell::sync::Lazy;
3842 : use utils::shard::TenantShardId;
3843 :
3844 : pub(crate) struct GlobalAndPerTenantIntCounter {
3845 : global: IntCounter,
3846 : per_tenant: IntCounter,
3847 : }
3848 :
3849 : impl GlobalAndPerTenantIntCounter {
3850 : #[inline(always)]
3851 0 : pub(crate) fn inc(&self) {
3852 0 : self.inc_by(1)
3853 0 : }
3854 : #[inline(always)]
3855 0 : pub(crate) fn inc_by(&self, n: u64) {
3856 0 : self.global.inc_by(n);
3857 0 : self.per_tenant.inc_by(n);
3858 0 : }
3859 : }
3860 :
3861 : pub(crate) struct Metrics<const KIND: usize> {
3862 : pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
3863 : pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,
3864 : pub(super) wait_time: GlobalAndPerTenantIntCounter,
3865 : pub(super) count_throttled: GlobalAndPerTenantIntCounter,
3866 : }
3867 :
3868 416 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3869 416 : register_int_counter_vec!(
3870 416 : "pageserver_tenant_throttling_count_accounted_start_global",
3871 416 : "Count of tenant throttling starts, by kind of throttle.",
3872 416 : &["kind"]
3873 416 : )
3874 416 : .unwrap()
3875 416 : });
3876 416 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3877 416 : register_int_counter_vec!(
3878 416 : "pageserver_tenant_throttling_count_accounted_start",
3879 416 : "Count of tenant throttling starts, by kind of throttle.",
3880 416 : &["kind", "tenant_id", "shard_id"]
3881 416 : )
3882 416 : .unwrap()
3883 416 : });
3884 416 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3885 416 : register_int_counter_vec!(
3886 416 : "pageserver_tenant_throttling_count_accounted_finish_global",
3887 416 : "Count of tenant throttling finishes, by kind of throttle.",
3888 416 : &["kind"]
3889 416 : )
3890 416 : .unwrap()
3891 416 : });
3892 416 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3893 416 : register_int_counter_vec!(
3894 416 : "pageserver_tenant_throttling_count_accounted_finish",
3895 416 : "Count of tenant throttling finishes, by kind of throttle.",
3896 416 : &["kind", "tenant_id", "shard_id"]
3897 416 : )
3898 416 : .unwrap()
3899 416 : });
3900 416 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3901 416 : register_int_counter_vec!(
3902 416 : "pageserver_tenant_throttling_wait_usecs_sum_global",
3903 416 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3904 416 : &["kind"]
3905 416 : )
3906 416 : .unwrap()
3907 416 : });
3908 416 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3909 416 : register_int_counter_vec!(
3910 416 : "pageserver_tenant_throttling_wait_usecs_sum",
3911 416 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3912 416 : &["kind", "tenant_id", "shard_id"]
3913 416 : )
3914 416 : .unwrap()
3915 416 : });
3916 :
3917 416 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3918 416 : register_int_counter_vec!(
3919 416 : "pageserver_tenant_throttling_count_global",
3920 416 : "Count of tenant throttlings, by kind of throttle.",
3921 416 : &["kind"]
3922 416 : )
3923 416 : .unwrap()
3924 416 : });
3925 416 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3926 416 : register_int_counter_vec!(
3927 416 : "pageserver_tenant_throttling_count",
3928 416 : "Count of tenant throttlings, by kind of throttle.",
3929 416 : &["kind", "tenant_id", "shard_id"]
3930 416 : )
3931 416 : .unwrap()
3932 416 : });
3933 :
3934 : const KINDS: &[&str] = &["pagestream"];
3935 : pub type Pagestream = Metrics<0>;
3936 :
3937 : impl<const KIND: usize> Metrics<KIND> {
3938 452 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
3939 452 : let per_tenant_label_values = &[
3940 452 : KINDS[KIND],
3941 452 : &tenant_shard_id.tenant_id.to_string(),
3942 452 : &tenant_shard_id.shard_slug().to_string(),
3943 452 : ];
3944 452 : Metrics {
3945 452 : count_accounted_start: {
3946 452 : GlobalAndPerTenantIntCounter {
3947 452 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
3948 452 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
3949 452 : .with_label_values(per_tenant_label_values),
3950 452 : }
3951 452 : },
3952 452 : count_accounted_finish: {
3953 452 : GlobalAndPerTenantIntCounter {
3954 452 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
3955 452 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
3956 452 : .with_label_values(per_tenant_label_values),
3957 452 : }
3958 452 : },
3959 452 : wait_time: {
3960 452 : GlobalAndPerTenantIntCounter {
3961 452 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
3962 452 : per_tenant: WAIT_USECS_PER_TENANT
3963 452 : .with_label_values(per_tenant_label_values),
3964 452 : }
3965 452 : },
3966 452 : count_throttled: {
3967 452 : GlobalAndPerTenantIntCounter {
3968 452 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
3969 452 : per_tenant: WAIT_COUNT_PER_TENANT
3970 452 : .with_label_values(per_tenant_label_values),
3971 452 : }
3972 452 : },
3973 452 : }
3974 452 : }
3975 : }
3976 :
3977 0 : pub(crate) fn preinitialize_global_metrics() {
3978 0 : Lazy::force(&COUNT_ACCOUNTED_START);
3979 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
3980 0 : Lazy::force(&WAIT_USECS);
3981 0 : Lazy::force(&WAIT_COUNT);
3982 0 : }
3983 :
3984 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3985 48 : for m in &[
3986 12 : &COUNT_ACCOUNTED_START_PER_TENANT,
3987 12 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
3988 12 : &WAIT_USECS_PER_TENANT,
3989 12 : &WAIT_COUNT_PER_TENANT,
3990 12 : ] {
3991 96 : for kind in KINDS {
3992 48 : let _ = m.remove_label_values(&[
3993 48 : kind,
3994 48 : &tenant_shard_id.tenant_id.to_string(),
3995 48 : &tenant_shard_id.shard_slug().to_string(),
3996 48 : ]);
3997 48 : }
3998 : }
3999 12 : }
4000 : }
4001 :
4002 : pub(crate) mod disk_usage_based_eviction {
4003 : use super::*;
4004 :
4005 : pub(crate) struct Metrics {
4006 : pub(crate) tenant_collection_time: Histogram,
4007 : pub(crate) tenant_layer_count: Histogram,
4008 : pub(crate) layers_collected: IntCounter,
4009 : pub(crate) layers_selected: IntCounter,
4010 : pub(crate) layers_evicted: IntCounter,
4011 : }
4012 :
4013 : impl Default for Metrics {
4014 0 : fn default() -> Self {
4015 0 : let tenant_collection_time = register_histogram!(
4016 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
4017 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
4018 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
4019 0 : )
4020 0 : .unwrap();
4021 0 :
4022 0 : let tenant_layer_count = register_histogram!(
4023 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
4024 0 : "Amount of layers gathered from a tenant",
4025 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
4026 0 : )
4027 0 : .unwrap();
4028 0 :
4029 0 : let layers_collected = register_int_counter!(
4030 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
4031 0 : "Amount of layers collected"
4032 0 : )
4033 0 : .unwrap();
4034 0 :
4035 0 : let layers_selected = register_int_counter!(
4036 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
4037 0 : "Amount of layers selected"
4038 0 : )
4039 0 : .unwrap();
4040 0 :
4041 0 : let layers_evicted = register_int_counter!(
4042 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
4043 0 : "Amount of layers successfully evicted"
4044 0 : )
4045 0 : .unwrap();
4046 0 :
4047 0 : Self {
4048 0 : tenant_collection_time,
4049 0 : tenant_layer_count,
4050 0 : layers_collected,
4051 0 : layers_selected,
4052 0 : layers_evicted,
4053 0 : }
4054 0 : }
4055 : }
4056 :
4057 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
4058 : }
4059 :
4060 404 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
4061 404 : register_uint_gauge_vec!(
4062 404 : "pageserver_tokio_executor_thread_configured_count",
4063 404 : "Total number of configued tokio executor threads in the process.
4064 404 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
4065 404 : &["setup"],
4066 404 : )
4067 404 : .unwrap()
4068 404 : });
4069 :
4070 404 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
4071 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
4072 404 : let _guard = SERIALIZE.lock().unwrap();
4073 404 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
4074 404 : TOKIO_EXECUTOR_THREAD_COUNT
4075 404 : .get_metric_with_label_values(&[setup])
4076 404 : .unwrap()
4077 404 : .set(u64::try_from(num_threads.get()).unwrap());
4078 404 : }
4079 :
4080 0 : pub fn preinitialize_metrics(conf: &'static PageServerConf) {
4081 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
4082 0 :
4083 0 : // Python tests need these and on some we do alerting.
4084 0 : //
4085 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
4086 0 : // order:
4087 0 : // - global metrics reside in a Lazy<PageserverMetrics>
4088 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
4089 0 : // - could move the statics into TimelineMetrics::new()?
4090 0 :
4091 0 : // counters
4092 0 : [
4093 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
4094 0 : &WALRECEIVER_STARTED_CONNECTIONS,
4095 0 : &WALRECEIVER_BROKER_UPDATES,
4096 0 : &WALRECEIVER_CANDIDATES_ADDED,
4097 0 : &WALRECEIVER_CANDIDATES_REMOVED,
4098 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
4099 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
4100 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
4101 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
4102 0 : &CIRCUIT_BREAKERS_BROKEN,
4103 0 : &CIRCUIT_BREAKERS_UNBROKEN,
4104 0 : &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
4105 0 : ]
4106 0 : .into_iter()
4107 0 : .for_each(|c| {
4108 0 : Lazy::force(c);
4109 0 : });
4110 0 :
4111 0 : // Deletion queue stats
4112 0 : Lazy::force(&DELETION_QUEUE);
4113 0 :
4114 0 : // Tenant stats
4115 0 : Lazy::force(&TENANT);
4116 0 :
4117 0 : // Tenant manager stats
4118 0 : Lazy::force(&TENANT_MANAGER);
4119 0 :
4120 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
4121 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
4122 :
4123 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
4124 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
4125 0 : // values from last restart.
4126 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
4127 0 : }
4128 :
4129 : // countervecs
4130 0 : [
4131 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
4132 0 : &SMGR_QUERY_STARTED_GLOBAL,
4133 0 : ]
4134 0 : .into_iter()
4135 0 : .for_each(|c| {
4136 0 : Lazy::force(c);
4137 0 : });
4138 0 :
4139 0 : // gauges
4140 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
4141 0 :
4142 0 : // histograms
4143 0 : [
4144 0 : &LAYERS_PER_READ_GLOBAL,
4145 0 : &LAYERS_PER_READ_BATCH_GLOBAL,
4146 0 : &LAYERS_PER_READ_AMORTIZED_GLOBAL,
4147 0 : &DELTAS_PER_READ_GLOBAL,
4148 0 : &WAIT_LSN_TIME,
4149 0 : &WAL_REDO_TIME,
4150 0 : &WAL_REDO_RECORDS_HISTOGRAM,
4151 0 : &WAL_REDO_BYTES_HISTOGRAM,
4152 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
4153 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
4154 0 : &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
4155 0 : ]
4156 0 : .into_iter()
4157 0 : .for_each(|h| {
4158 0 : Lazy::force(h);
4159 0 : });
4160 0 :
4161 0 : // Custom
4162 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
4163 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
4164 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
4165 0 :
4166 0 : tenant_throttling::preinitialize_global_metrics();
4167 0 : }
|