Line data Source code
1 : use std::collections::HashMap;
2 : use std::num::NonZeroUsize;
3 : use std::os::fd::RawFd;
4 : use std::sync::atomic::AtomicU64;
5 : use std::sync::{Arc, Mutex};
6 : use std::time::{Duration, Instant};
7 :
8 : use enum_map::{Enum as _, EnumMap};
9 : use futures::Future;
10 : use metrics::{
11 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
12 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
13 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
14 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
15 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
16 : };
17 : use once_cell::sync::Lazy;
18 : use pageserver_api::config::defaults::DEFAULT_MAX_GET_VECTORED_KEYS;
19 : use pageserver_api::config::{
20 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
21 : PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
22 : };
23 : use pageserver_api::models::InMemoryLayerInfo;
24 : use pageserver_api::shard::TenantShardId;
25 : use postgres_backend::{QueryError, is_expected_io_error};
26 : use pq_proto::framed::ConnectionError;
27 : use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
28 : use strum_macros::{IntoStaticStr, VariantNames};
29 : use utils::id::TimelineId;
30 :
31 : use crate::config;
32 : use crate::config::PageServerConf;
33 : use crate::context::{PageContentKind, RequestContext};
34 : use crate::pgdatadir_mapping::DatadirModificationStats;
35 : use crate::task_mgr::TaskKind;
36 : use crate::tenant::layer_map::LayerMap;
37 : use crate::tenant::mgr::TenantSlot;
38 : use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
39 : use crate::tenant::tasks::BackgroundLoopKind;
40 : use crate::tenant::throttle::ThrottleResult;
41 :
42 : /// Prometheus histogram buckets (in seconds) for operations in the critical
43 : /// path. In other words, operations that directly affect that latency of user
44 : /// queries.
45 : ///
46 : /// The buckets capture the majority of latencies in the microsecond and
47 : /// millisecond range but also extend far enough up to distinguish "bad" from
48 : /// "really bad".
49 : const CRITICAL_OP_BUCKETS: &[f64] = &[
50 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
51 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
52 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
53 : ];
54 :
55 : // Metrics collected on operations on the storage repository.
56 : #[derive(Debug, VariantNames, IntoStaticStr)]
57 : #[strum(serialize_all = "kebab_case")]
58 : pub(crate) enum StorageTimeOperation {
59 : #[strum(serialize = "layer flush")]
60 : LayerFlush,
61 :
62 : #[strum(serialize = "layer flush delay")]
63 : LayerFlushDelay,
64 :
65 : #[strum(serialize = "compact")]
66 : Compact,
67 :
68 : #[strum(serialize = "create images")]
69 : CreateImages,
70 :
71 : #[strum(serialize = "logical size")]
72 : LogicalSize,
73 :
74 : #[strum(serialize = "imitate logical size")]
75 : ImitateLogicalSize,
76 :
77 : #[strum(serialize = "load layer map")]
78 : LoadLayerMap,
79 :
80 : #[strum(serialize = "gc")]
81 : Gc,
82 :
83 : #[strum(serialize = "find gc cutoffs")]
84 : FindGcCutoffs,
85 : }
86 :
87 108 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
88 108 : register_counter_vec!(
89 : "pageserver_storage_operations_seconds_sum",
90 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
91 108 : &["operation", "tenant_id", "shard_id", "timeline_id"],
92 : )
93 108 : .expect("failed to define a metric")
94 108 : });
95 :
96 108 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
97 108 : register_int_counter_vec!(
98 : "pageserver_storage_operations_seconds_count",
99 : "Count of storage operations with operation, tenant and timeline dimensions",
100 108 : &["operation", "tenant_id", "shard_id", "timeline_id"],
101 : )
102 108 : .expect("failed to define a metric")
103 108 : });
104 :
105 : // Buckets for background operation duration in seconds, like compaction, GC, size calculation.
106 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
107 :
108 108 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
109 108 : register_histogram_vec!(
110 : "pageserver_storage_operations_seconds_global",
111 : "Time spent on storage operations",
112 108 : &["operation"],
113 108 : STORAGE_OP_BUCKETS.into(),
114 : )
115 108 : .expect("failed to define a metric")
116 108 : });
117 :
118 : /// Measures layers visited per read (i.e. read amplification).
119 : ///
120 : /// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits
121 : /// are amortized across the batch, and some layers may not intersect with a given key, each visited
122 : /// layer contributes directly to the observed latency for every read in the batch, which is what we
123 : /// care about.
124 108 : pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
125 108 : register_histogram_vec!(
126 : "pageserver_layers_per_read",
127 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
128 108 : &["tenant_id", "shard_id", "timeline_id"],
129 : // Low resolution to reduce cardinality.
130 108 : vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
131 : )
132 108 : .expect("failed to define a metric")
133 108 : });
134 :
135 106 : pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
136 106 : register_histogram!(
137 : "pageserver_layers_per_read_global",
138 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
139 106 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
140 : )
141 106 : .expect("failed to define a metric")
142 106 : });
143 :
144 106 : pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
145 106 : register_histogram!(
146 : "pageserver_layers_per_read_batch_global",
147 : "Layers visited to serve a single read batch (read amplification), regardless of number of reads.",
148 106 : vec![
149 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
150 : ],
151 : )
152 106 : .expect("failed to define a metric")
153 106 : });
154 :
155 106 : pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
156 106 : register_histogram!(
157 : "pageserver_layers_per_read_amortized_global",
158 : "Layers visited to serve a single read (read amplification). Amortized across a batch: \
159 : all visited layers are divided by number of reads.",
160 106 : vec![
161 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
162 : ],
163 : )
164 106 : .expect("failed to define a metric")
165 106 : });
166 :
167 106 : pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
168 : // We expect this to be low because of Postgres checkpoints. Let's see if that holds.
169 106 : register_histogram!(
170 : "pageserver_deltas_per_read_global",
171 : "Number of delta pages applied to image page per read",
172 106 : vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
173 : )
174 106 : .expect("failed to define a metric")
175 106 : });
176 :
177 0 : pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
178 0 : register_uint_gauge!(
179 : "pageserver_concurrent_initdb",
180 : "Number of initdb processes running"
181 : )
182 0 : .expect("failed to define a metric")
183 0 : });
184 :
185 0 : pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
186 0 : register_histogram!(
187 : "pageserver_initdb_semaphore_seconds_global",
188 : "Time spent getting a permit from the global initdb semaphore",
189 0 : STORAGE_OP_BUCKETS.into()
190 : )
191 0 : .expect("failed to define metric")
192 0 : });
193 :
194 0 : pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
195 0 : register_histogram!(
196 : "pageserver_initdb_seconds_global",
197 : "Time spent performing initdb",
198 0 : STORAGE_OP_BUCKETS.into()
199 : )
200 0 : .expect("failed to define metric")
201 0 : });
202 :
203 : pub(crate) struct GetVectoredLatency {
204 : map: EnumMap<TaskKind, Option<Histogram>>,
205 : }
206 :
207 : #[allow(dead_code)]
208 : pub(crate) struct ScanLatency {
209 : map: EnumMap<TaskKind, Option<Histogram>>,
210 : }
211 :
212 : impl GetVectoredLatency {
213 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
214 : // cardinality of the metric.
215 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
216 :
217 10888 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
218 10888 : self.map[task_kind].as_ref()
219 10888 : }
220 : }
221 :
222 : impl ScanLatency {
223 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
224 : // cardinality of the metric.
225 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
226 :
227 8 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
228 8 : self.map[task_kind].as_ref()
229 8 : }
230 : }
231 :
232 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
233 : parent: &'a Histogram,
234 : start: std::time::Instant,
235 : }
236 :
237 : impl<'a> ScanLatencyOngoingRecording<'a> {
238 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
239 0 : let start = Instant::now();
240 0 : ScanLatencyOngoingRecording { parent, start }
241 0 : }
242 :
243 0 : pub(crate) fn observe(self) {
244 0 : let elapsed = self.start.elapsed();
245 0 : self.parent.observe(elapsed.as_secs_f64());
246 0 : }
247 : }
248 :
249 104 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
250 104 : let inner = register_histogram_vec!(
251 : "pageserver_get_vectored_seconds",
252 : "Time spent in get_vectored.",
253 104 : &["task_kind"],
254 104 : CRITICAL_OP_BUCKETS.into(),
255 : )
256 104 : .expect("failed to define a metric");
257 :
258 : GetVectoredLatency {
259 3328 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
260 3328 : let task_kind = TaskKind::from_usize(task_kind_idx);
261 :
262 3328 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
263 208 : let task_kind = task_kind.into();
264 208 : Some(inner.with_label_values(&[task_kind]))
265 : } else {
266 3120 : None
267 : }
268 3328 : })),
269 : }
270 104 : });
271 :
272 3 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
273 3 : let inner = register_histogram_vec!(
274 : "pageserver_scan_seconds",
275 : "Time spent in scan.",
276 3 : &["task_kind"],
277 3 : CRITICAL_OP_BUCKETS.into(),
278 : )
279 3 : .expect("failed to define a metric");
280 :
281 : ScanLatency {
282 96 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
283 96 : let task_kind = TaskKind::from_usize(task_kind_idx);
284 :
285 96 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
286 3 : let task_kind = task_kind.into();
287 3 : Some(inner.with_label_values(&[task_kind]))
288 : } else {
289 93 : None
290 : }
291 96 : })),
292 : }
293 3 : });
294 :
295 : pub(crate) struct PageCacheMetricsForTaskKind {
296 : pub read_accesses_immutable: IntCounter,
297 : pub read_hits_immutable: IntCounter,
298 : }
299 :
300 : pub(crate) struct PageCacheMetrics {
301 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
302 : }
303 :
304 50 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
305 50 : register_int_counter_vec!(
306 : "pageserver_page_cache_read_hits_total",
307 : "Number of read accesses to the page cache that hit",
308 50 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
309 : )
310 50 : .expect("failed to define a metric")
311 50 : });
312 :
313 50 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
314 50 : register_int_counter_vec!(
315 : "pageserver_page_cache_read_accesses_total",
316 : "Number of read accesses to the page cache",
317 50 : &["task_kind", "key_kind", "content_kind"]
318 : )
319 50 : .expect("failed to define a metric")
320 50 : });
321 :
322 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
323 1600 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
324 1600 : let task_kind = TaskKind::from_usize(task_kind);
325 1600 : let task_kind: &'static str = task_kind.into();
326 12800 : EnumMap::from_array(std::array::from_fn(|content_kind| {
327 12800 : let content_kind = PageContentKind::from_usize(content_kind);
328 12800 : let content_kind: &'static str = content_kind.into();
329 12800 : PageCacheMetricsForTaskKind {
330 12800 : read_accesses_immutable: {
331 12800 : PAGE_CACHE_READ_ACCESSES
332 12800 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
333 12800 : .unwrap()
334 12800 : },
335 12800 :
336 12800 : read_hits_immutable: {
337 12800 : PAGE_CACHE_READ_HITS
338 12800 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
339 12800 : .unwrap()
340 12800 : },
341 12800 : }
342 12800 : }))
343 1600 : })),
344 50 : });
345 :
346 : impl PageCacheMetrics {
347 586812 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
348 586812 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
349 586812 : }
350 : }
351 :
352 : pub(crate) struct PageCacheSizeMetrics {
353 : pub max_bytes: UIntGauge,
354 :
355 : pub current_bytes_immutable: UIntGauge,
356 : }
357 :
358 50 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
359 50 : register_uint_gauge_vec!(
360 : "pageserver_page_cache_size_current_bytes",
361 : "Current size of the page cache in bytes, by key kind",
362 50 : &["key_kind"]
363 : )
364 50 : .expect("failed to define a metric")
365 50 : });
366 :
367 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
368 : Lazy::new(|| PageCacheSizeMetrics {
369 : max_bytes: {
370 50 : register_uint_gauge!(
371 : "pageserver_page_cache_size_max_bytes",
372 : "Maximum size of the page cache in bytes"
373 : )
374 50 : .expect("failed to define a metric")
375 : },
376 : current_bytes_immutable: {
377 50 : PAGE_CACHE_SIZE_CURRENT_BYTES
378 50 : .get_metric_with_label_values(&["immutable"])
379 50 : .unwrap()
380 : },
381 50 : });
382 :
383 : pub(crate) mod page_cache_eviction_metrics {
384 : use std::num::NonZeroUsize;
385 :
386 : use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
387 : use once_cell::sync::Lazy;
388 :
389 : #[derive(Clone, Copy)]
390 : pub(crate) enum Outcome {
391 : FoundSlotUnused { iters: NonZeroUsize },
392 : FoundSlotEvicted { iters: NonZeroUsize },
393 : ItersExceeded { iters: NonZeroUsize },
394 : }
395 :
396 50 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
397 50 : register_int_counter_vec!(
398 : "pageserver_page_cache_find_victim_iters_total",
399 : "Counter for the number of iterations in the find_victim loop",
400 50 : &["outcome"],
401 : )
402 50 : .expect("failed to define a metric")
403 50 : });
404 :
405 50 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
406 50 : register_int_counter_vec!(
407 : "pageserver_page_cache_find_victim_calls",
408 : "Incremented at the end of each find_victim() call.\
409 : Filter by outcome to get e.g., eviction rate.",
410 50 : &["outcome"]
411 : )
412 50 : .unwrap()
413 50 : });
414 :
415 15520 : pub(crate) fn observe(outcome: Outcome) {
416 : macro_rules! dry {
417 : ($label:literal, $iters:expr) => {{
418 : static LABEL: &'static str = $label;
419 : static ITERS_TOTAL: Lazy<IntCounter> =
420 59 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
421 : static CALLS: Lazy<IntCounter> =
422 59 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
423 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
424 : CALLS.inc();
425 : }};
426 : }
427 15520 : match outcome {
428 820 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
429 14700 : Outcome::FoundSlotEvicted { iters } => {
430 14700 : dry!("found_evicted", iters)
431 : }
432 0 : Outcome::ItersExceeded { iters } => {
433 0 : dry!("err_iters_exceeded", iters);
434 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
435 0 : }
436 : }
437 15520 : }
438 : }
439 :
440 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
441 0 : register_int_counter_vec!(
442 : "page_cache_errors_total",
443 : "Number of timeouts while acquiring a pinned slot in the page cache",
444 0 : &["error_kind"]
445 : )
446 0 : .expect("failed to define a metric")
447 0 : });
448 :
449 0 : pub(crate) static FEATURE_FLAG_EVALUATION: Lazy<CounterVec> = Lazy::new(|| {
450 0 : register_counter_vec!(
451 : "pageserver_feature_flag_evaluation",
452 : "Number of times a feature flag is evaluated",
453 0 : &["flag_key", "status", "value"],
454 : )
455 0 : .unwrap()
456 0 : });
457 :
458 : #[derive(IntoStaticStr)]
459 : #[strum(serialize_all = "kebab_case")]
460 : pub(crate) enum PageCacheErrorKind {
461 : AcquirePinnedSlotTimeout,
462 : EvictIterLimit,
463 : }
464 :
465 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
466 0 : PAGE_CACHE_ERRORS
467 0 : .get_metric_with_label_values(&[error_kind.into()])
468 0 : .unwrap()
469 0 : .inc();
470 0 : }
471 :
472 11 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
473 11 : register_histogram!(
474 : "pageserver_wait_lsn_seconds",
475 : "Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.",
476 11 : CRITICAL_OP_BUCKETS.into(),
477 : )
478 11 : .expect("failed to define a metric")
479 11 : });
480 :
481 108 : pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {
482 108 : register_int_counter_pair_vec!(
483 : "pageserver_wait_lsn_started_count",
484 : "Number of wait_lsn operations started.",
485 : "pageserver_wait_lsn_finished_count",
486 : "Number of wait_lsn operations finished.",
487 108 : &["tenant_id", "shard_id", "timeline_id"],
488 : )
489 108 : .expect("failed to define a metric")
490 108 : });
491 :
492 108 : pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
493 108 : register_int_counter_vec!(
494 : "pageserver_wait_lsn_in_progress_micros",
495 : "Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.",
496 108 : &["tenant_id", "shard_id", "timeline_id"],
497 : )
498 108 : .expect("failed to define a metric")
499 108 : });
500 :
501 108 : pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {
502 108 : register_int_counter!(
503 : "pageserver_wait_lsn_in_progress_micros_global",
504 : "Time spent waiting for WAL to arrive, globally. Updated periodically while waiting."
505 : )
506 108 : .expect("failed to define a metric")
507 108 : });
508 :
509 3 : pub(crate) static ONDEMAND_DOWNLOAD_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {
510 3 : register_int_counter_vec!(
511 : "pageserver_ondemand_download_bytes_total",
512 : "Total bytes of layers on-demand downloaded",
513 3 : &["task_kind"]
514 : )
515 3 : .expect("failed to define a metric")
516 3 : });
517 :
518 3 : pub(crate) static ONDEMAND_DOWNLOAD_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
519 3 : register_int_counter_vec!(
520 : "pageserver_ondemand_download_count",
521 : "Total count of layers on-demand downloaded",
522 3 : &["task_kind"]
523 : )
524 3 : .expect("failed to define a metric")
525 3 : });
526 :
527 : pub(crate) mod wait_ondemand_download_time {
528 : use super::*;
529 : const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[
530 : 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, // 10 ms - 100ms
531 : 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, // 100ms to 1s
532 : 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, // 1s to 10s
533 : 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, // 10s to 1m
534 : ];
535 :
536 : /// The task kinds for which we want to track wait times for on-demand downloads.
537 : /// Other task kinds' wait times are accumulated in label value `unknown`.
538 : pub(crate) const WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS: [TaskKind; 2] = [
539 : TaskKind::PageRequestHandler,
540 : TaskKind::WalReceiverConnectionHandler,
541 : ];
542 :
543 0 : pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL: Lazy<Vec<Histogram>> = Lazy::new(|| {
544 0 : let histo = register_histogram_vec!(
545 : "pageserver_wait_ondemand_download_seconds_global",
546 : "Observations are individual tasks' wait times for on-demand downloads. \
547 : If N tasks coalesce on an on-demand download, and it takes 10s, than we observe N * 10s.",
548 0 : &["task_kind"],
549 0 : WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS.into(),
550 : )
551 0 : .expect("failed to define a metric");
552 0 : WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
553 0 : .iter()
554 0 : .map(|task_kind| histo.with_label_values(&[task_kind.into()]))
555 0 : .collect::<Vec<_>>()
556 0 : });
557 :
558 108 : pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_SUM: Lazy<CounterVec> = Lazy::new(|| {
559 108 : register_counter_vec!(
560 : // use a name that _could_ be evolved into a per-timeline histogram later
561 : "pageserver_wait_ondemand_download_seconds_sum",
562 : "Like `pageserver_wait_ondemand_download_seconds_global` but per timeline",
563 108 : &["tenant_id", "shard_id", "timeline_id", "task_kind"],
564 : )
565 108 : .unwrap()
566 108 : });
567 :
568 : pub struct WaitOndemandDownloadTimeSum {
569 : counters: [Counter; WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS.len()],
570 : }
571 :
572 : impl WaitOndemandDownloadTimeSum {
573 234 : pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
574 234 : let counters = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
575 234 : .iter()
576 468 : .map(|task_kind| {
577 468 : WAIT_ONDEMAND_DOWNLOAD_TIME_SUM
578 468 : .get_metric_with_label_values(&[
579 468 : tenant_id,
580 468 : shard_id,
581 468 : timeline_id,
582 468 : task_kind.into(),
583 468 : ])
584 468 : .unwrap()
585 468 : })
586 234 : .collect::<Vec<_>>();
587 234 : Self {
588 234 : counters: counters.try_into().unwrap(),
589 234 : }
590 234 : }
591 12 : pub(crate) fn observe(&self, task_kind: TaskKind, duration: Duration) {
592 12 : let maybe = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
593 12 : .iter()
594 12 : .enumerate()
595 24 : .find(|(_, kind)| **kind == task_kind);
596 12 : let Some((idx, _)) = maybe else {
597 12 : return;
598 : };
599 0 : WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL[idx].observe(duration.as_secs_f64());
600 0 : let counter = &self.counters[idx];
601 0 : counter.inc_by(duration.as_secs_f64());
602 12 : }
603 : }
604 :
605 5 : pub(crate) fn shutdown_timeline(tenant_id: &str, shard_id: &str, timeline_id: &str) {
606 15 : for task_kind in WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS {
607 10 : let _ = WAIT_ONDEMAND_DOWNLOAD_TIME_SUM.remove_label_values(&[
608 10 : tenant_id,
609 10 : shard_id,
610 10 : timeline_id,
611 10 : task_kind.into(),
612 10 : ]);
613 10 : }
614 5 : }
615 :
616 0 : pub(crate) fn preinitialize_global_metrics() {
617 0 : Lazy::force(&WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL);
618 0 : }
619 : }
620 :
621 108 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
622 108 : register_int_gauge_vec!(
623 : "pageserver_last_record_lsn",
624 : "Last record LSN grouped by timeline",
625 108 : &["tenant_id", "shard_id", "timeline_id"]
626 : )
627 108 : .expect("failed to define a metric")
628 108 : });
629 :
630 108 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
631 108 : register_int_gauge_vec!(
632 : "pageserver_disk_consistent_lsn",
633 : "Disk consistent LSN grouped by timeline",
634 108 : &["tenant_id", "shard_id", "timeline_id"]
635 : )
636 108 : .expect("failed to define a metric")
637 108 : });
638 :
639 108 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
640 108 : register_uint_gauge_vec!(
641 : "pageserver_projected_remote_consistent_lsn",
642 : "Projected remote consistent LSN grouped by timeline",
643 108 : &["tenant_id", "shard_id", "timeline_id"]
644 : )
645 108 : .expect("failed to define a metric")
646 108 : });
647 :
648 108 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
649 108 : register_uint_gauge_vec!(
650 : "pageserver_pitr_history_size",
651 : "Data written since PITR cutoff on this timeline",
652 108 : &["tenant_id", "shard_id", "timeline_id"]
653 : )
654 108 : .expect("failed to define a metric")
655 108 : });
656 :
657 : #[derive(
658 : strum_macros::EnumIter,
659 : strum_macros::EnumString,
660 : strum_macros::Display,
661 : strum_macros::IntoStaticStr,
662 : )]
663 : #[strum(serialize_all = "kebab_case")]
664 : pub(crate) enum LayerKind {
665 : Delta,
666 : Image,
667 : }
668 :
669 : #[derive(
670 : strum_macros::EnumIter,
671 : strum_macros::EnumString,
672 : strum_macros::Display,
673 : strum_macros::IntoStaticStr,
674 : )]
675 : #[strum(serialize_all = "kebab_case")]
676 : pub(crate) enum LayerLevel {
677 : // We don't track the currently open ephemeral layer, since there's always exactly 1 and its
678 : // size changes. See `TIMELINE_EPHEMERAL_BYTES`.
679 : Frozen,
680 : L0,
681 : L1,
682 : }
683 :
684 106 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
685 106 : register_uint_gauge_vec!(
686 : "pageserver_layer_bytes",
687 : "Sum of frozen, L0, and L1 layer physical sizes in bytes (excluding the open ephemeral layer)",
688 106 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
689 : )
690 106 : .expect("failed to define a metric")
691 106 : });
692 :
693 106 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
694 106 : register_uint_gauge_vec!(
695 : "pageserver_layer_count",
696 : "Number of frozen, L0, and L1 layers (excluding the open ephemeral layer)",
697 106 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
698 : )
699 106 : .expect("failed to define a metric")
700 106 : });
701 :
702 108 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
703 108 : register_uint_gauge_vec!(
704 : "pageserver_archive_size",
705 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
706 108 : &["tenant_id", "shard_id", "timeline_id"]
707 : )
708 108 : .expect("failed to define a metric")
709 108 : });
710 :
711 108 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
712 108 : register_int_gauge_vec!(
713 : "pageserver_standby_horizon",
714 : "Standby apply LSN for which GC is hold off, by timeline.",
715 108 : &["tenant_id", "shard_id", "timeline_id"]
716 : )
717 108 : .expect("failed to define a metric")
718 108 : });
719 :
720 108 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
721 108 : register_uint_gauge_vec!(
722 : "pageserver_resident_physical_size",
723 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
724 108 : &["tenant_id", "shard_id", "timeline_id"]
725 : )
726 108 : .expect("failed to define a metric")
727 108 : });
728 :
729 108 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
730 108 : register_uint_gauge_vec!(
731 : "pageserver_visible_physical_size",
732 : "The size of the layer files present in the pageserver's filesystem.",
733 108 : &["tenant_id", "shard_id", "timeline_id"]
734 : )
735 108 : .expect("failed to define a metric")
736 108 : });
737 :
738 106 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
739 106 : register_uint_gauge!(
740 : "pageserver_resident_physical_size_global",
741 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
742 : )
743 106 : .expect("failed to define a metric")
744 106 : });
745 :
746 108 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
747 108 : register_uint_gauge_vec!(
748 : "pageserver_remote_physical_size",
749 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
750 : // Corollary: If any files are missing from the index part, they won't be included here.
751 108 : &["tenant_id", "shard_id", "timeline_id"]
752 : )
753 108 : .expect("failed to define a metric")
754 108 : });
755 :
756 108 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
757 108 : register_uint_gauge!(
758 : "pageserver_remote_physical_size_global",
759 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
760 : )
761 108 : .expect("failed to define a metric")
762 108 : });
763 :
764 3 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
765 3 : register_int_counter!(
766 : "pageserver_remote_ondemand_downloaded_layers_total",
767 : "Total on-demand downloaded layers"
768 : )
769 3 : .unwrap()
770 3 : });
771 :
772 3 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
773 3 : register_int_counter!(
774 : "pageserver_remote_ondemand_downloaded_bytes_total",
775 : "Total bytes of layers on-demand downloaded",
776 : )
777 3 : .unwrap()
778 3 : });
779 :
780 108 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
781 108 : register_uint_gauge_vec!(
782 : "pageserver_current_logical_size",
783 : "Current logical size grouped by timeline",
784 108 : &["tenant_id", "shard_id", "timeline_id"]
785 : )
786 108 : .expect("failed to define current logical size metric")
787 108 : });
788 :
789 108 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
790 108 : register_int_gauge_vec!(
791 : "pageserver_aux_file_estimated_size",
792 : "The size of all aux files for a timeline in aux file v2 store.",
793 108 : &["tenant_id", "shard_id", "timeline_id"]
794 : )
795 108 : .expect("failed to define a metric")
796 108 : });
797 :
798 108 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
799 108 : register_uint_gauge_vec!(
800 : "pageserver_valid_lsn_lease_count",
801 : "The number of valid leases after refreshing gc info.",
802 108 : &["tenant_id", "shard_id", "timeline_id"],
803 : )
804 108 : .expect("failed to define a metric")
805 108 : });
806 :
807 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
808 0 : register_int_counter!(
809 : "pageserver_circuit_breaker_broken",
810 : "How many times a circuit breaker has broken"
811 : )
812 0 : .expect("failed to define a metric")
813 0 : });
814 :
815 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
816 0 : register_int_counter!(
817 : "pageserver_circuit_breaker_unbroken",
818 : "How many times a circuit breaker has been un-broken (recovered)"
819 : )
820 0 : .expect("failed to define a metric")
821 0 : });
822 :
823 104 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
824 104 : register_int_counter!(
825 : "pageserver_compression_image_in_bytes_total",
826 : "Size of data written into image layers before compression"
827 : )
828 104 : .expect("failed to define a metric")
829 104 : });
830 :
831 104 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
832 104 : register_int_counter!(
833 : "pageserver_compression_image_in_bytes_considered",
834 : "Size of potentially compressible data written into image layers before compression"
835 : )
836 104 : .expect("failed to define a metric")
837 104 : });
838 :
839 104 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
840 104 : register_int_counter!(
841 : "pageserver_compression_image_in_bytes_chosen",
842 : "Size of data whose compressed form was written into image layers"
843 : )
844 104 : .expect("failed to define a metric")
845 104 : });
846 :
847 104 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
848 104 : register_int_counter!(
849 : "pageserver_compression_image_out_bytes_total",
850 : "Size of compressed image layer written"
851 : )
852 104 : .expect("failed to define a metric")
853 104 : });
854 :
855 5 : pub(crate) static RELSIZE_LATEST_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
856 5 : register_uint_gauge!(
857 : "pageserver_relsize_latest_cache_entries",
858 : "Number of entries in the latest relation size cache",
859 : )
860 5 : .expect("failed to define a metric")
861 5 : });
862 :
863 5 : pub(crate) static RELSIZE_LATEST_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
864 5 : register_int_counter!(
865 : "pageserver_relsize_latest_cache_hits",
866 : "Latest relation size cache hits",
867 : )
868 5 : .expect("failed to define a metric")
869 5 : });
870 :
871 4 : pub(crate) static RELSIZE_LATEST_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
872 4 : register_int_counter!(
873 : "pageserver_relsize_latest_cache_misses",
874 : "Relation size latest cache misses",
875 : )
876 4 : .expect("failed to define a metric")
877 4 : });
878 :
879 2 : pub(crate) static RELSIZE_SNAPSHOT_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
880 2 : register_uint_gauge!(
881 : "pageserver_relsize_snapshot_cache_entries",
882 : "Number of entries in the pitr relation size cache",
883 : )
884 2 : .expect("failed to define a metric")
885 2 : });
886 :
887 2 : pub(crate) static RELSIZE_SNAPSHOT_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
888 2 : register_int_counter!(
889 : "pageserver_relsize_snapshot_cache_hits",
890 : "Pitr relation size cache hits",
891 : )
892 2 : .expect("failed to define a metric")
893 2 : });
894 :
895 3 : pub(crate) static RELSIZE_SNAPSHOT_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
896 3 : register_int_counter!(
897 : "pageserver_relsize_snapshot_cache_misses",
898 : "Relation size snapshot cache misses",
899 : )
900 3 : .expect("failed to define a metric")
901 3 : });
902 :
903 2 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
904 2 : register_int_counter!(
905 : "pageserver_relsize_cache_misses_old",
906 : "Relation size cache misses where the lookup LSN is older than the last relation update"
907 : )
908 2 : .expect("failed to define a metric")
909 2 : });
910 :
911 : pub(crate) mod initial_logical_size {
912 : use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
913 : use once_cell::sync::Lazy;
914 :
915 : pub(crate) struct StartCalculation(IntCounterVec);
916 108 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
917 108 : StartCalculation(
918 108 : register_int_counter_vec!(
919 108 : "pageserver_initial_logical_size_start_calculation",
920 108 : "Incremented each time we start an initial logical size calculation attempt. \
921 108 : The `circumstances` label provides some additional details.",
922 108 : &["attempt", "circumstances"]
923 108 : )
924 108 : .unwrap(),
925 108 : )
926 108 : });
927 :
928 : struct DropCalculation {
929 : first: IntCounter,
930 : retry: IntCounter,
931 : }
932 :
933 108 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
934 108 : let vec = register_int_counter_vec!(
935 : "pageserver_initial_logical_size_drop_calculation",
936 : "Incremented each time we abort a started size calculation attmpt.",
937 108 : &["attempt"]
938 : )
939 108 : .unwrap();
940 108 : DropCalculation {
941 108 : first: vec.with_label_values(&["first"]),
942 108 : retry: vec.with_label_values(&["retry"]),
943 108 : }
944 108 : });
945 :
946 : pub(crate) struct Calculated {
947 : pub(crate) births: IntCounter,
948 : pub(crate) deaths: IntCounter,
949 : }
950 :
951 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
952 108 : births: register_int_counter!(
953 : "pageserver_initial_logical_size_finish_calculation",
954 : "Incremented every time we finish calculation of initial logical size.\
955 : If everything is working well, this should happen at most once per Timeline object."
956 : )
957 108 : .unwrap(),
958 108 : deaths: register_int_counter!(
959 : "pageserver_initial_logical_size_drop_finished_calculation",
960 : "Incremented when we drop a finished initial logical size calculation result.\
961 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
962 : )
963 108 : .unwrap(),
964 108 : });
965 :
966 : pub(crate) struct OngoingCalculationGuard {
967 : inc_drop_calculation: Option<IntCounter>,
968 : }
969 :
970 : #[derive(strum_macros::IntoStaticStr)]
971 : pub(crate) enum StartCircumstances {
972 : EmptyInitial,
973 : SkippedConcurrencyLimiter,
974 : AfterBackgroundTasksRateLimit,
975 : }
976 :
977 : impl StartCalculation {
978 114 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
979 114 : let circumstances_label: &'static str = circumstances.into();
980 114 : self.0
981 114 : .with_label_values(&["first", circumstances_label])
982 114 : .inc();
983 114 : OngoingCalculationGuard {
984 114 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
985 114 : }
986 114 : }
987 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
988 0 : let circumstances_label: &'static str = circumstances.into();
989 0 : self.0
990 0 : .with_label_values(&["retry", circumstances_label])
991 0 : .inc();
992 0 : OngoingCalculationGuard {
993 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
994 0 : }
995 0 : }
996 : }
997 :
998 : impl Drop for OngoingCalculationGuard {
999 114 : fn drop(&mut self) {
1000 114 : if let Some(counter) = self.inc_drop_calculation.take() {
1001 0 : counter.inc();
1002 114 : }
1003 114 : }
1004 : }
1005 :
1006 : impl OngoingCalculationGuard {
1007 114 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
1008 114 : drop(self.inc_drop_calculation.take());
1009 114 : CALCULATED.births.inc();
1010 114 : FinishedCalculationGuard {
1011 114 : inc_on_drop: CALCULATED.deaths.clone(),
1012 114 : }
1013 114 : }
1014 : }
1015 :
1016 : pub(crate) struct FinishedCalculationGuard {
1017 : inc_on_drop: IntCounter,
1018 : }
1019 :
1020 : impl Drop for FinishedCalculationGuard {
1021 3 : fn drop(&mut self) {
1022 3 : self.inc_on_drop.inc();
1023 3 : }
1024 : }
1025 :
1026 : // context: https://github.com/neondatabase/neon/issues/5963
1027 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
1028 0 : Lazy::new(|| {
1029 0 : register_int_counter!(
1030 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
1031 : "Counter for the following event: walreceiver calls\
1032 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
1033 : )
1034 0 : .unwrap()
1035 0 : });
1036 : }
1037 :
1038 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
1039 0 : register_uint_gauge_vec!(
1040 : "pageserver_directory_entries_count",
1041 : "Sum of the entries in pageserver-stored directory listings",
1042 0 : &["tenant_id", "shard_id", "timeline_id"]
1043 : )
1044 0 : .expect("failed to define a metric")
1045 0 : });
1046 :
1047 109 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
1048 109 : register_uint_gauge_vec!(
1049 : "pageserver_tenant_states_count",
1050 : "Count of tenants per state",
1051 109 : &["state"]
1052 : )
1053 109 : .expect("Failed to register pageserver_tenant_states_count metric")
1054 109 : });
1055 :
1056 108 : pub(crate) static TIMELINE_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
1057 108 : register_uint_gauge_vec!(
1058 : "pageserver_timeline_states_count",
1059 : "Count of timelines per state",
1060 108 : &["state"]
1061 : )
1062 108 : .expect("Failed to register pageserver_timeline_states_count metric")
1063 108 : });
1064 :
1065 : /// A set of broken tenants.
1066 : ///
1067 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
1068 : /// tenant.
1069 5 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
1070 5 : register_uint_gauge_vec!(
1071 : "pageserver_broken_tenants_count",
1072 : "Set of broken tenants",
1073 5 : &["tenant_id", "shard_id"]
1074 : )
1075 5 : .expect("Failed to register pageserver_tenant_states_count metric")
1076 5 : });
1077 :
1078 3 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
1079 3 : register_uint_gauge_vec!(
1080 : "pageserver_tenant_synthetic_cached_size_bytes",
1081 : "Synthetic size of each tenant in bytes",
1082 3 : &["tenant_id"]
1083 : )
1084 3 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
1085 3 : });
1086 :
1087 109 : pub(crate) static TENANT_OFFLOADED_TIMELINES: Lazy<UIntGaugeVec> = Lazy::new(|| {
1088 109 : register_uint_gauge_vec!(
1089 : "pageserver_tenant_offloaded_timelines",
1090 : "Number of offloaded timelines of a tenant",
1091 109 : &["tenant_id", "shard_id"]
1092 : )
1093 109 : .expect("Failed to register pageserver_tenant_offloaded_timelines metric")
1094 109 : });
1095 :
1096 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
1097 0 : register_histogram_vec!(
1098 : "pageserver_eviction_iteration_duration_seconds_global",
1099 : "Time spent on a single eviction iteration",
1100 0 : &["period_secs", "threshold_secs"],
1101 0 : STORAGE_OP_BUCKETS.into(),
1102 : )
1103 0 : .expect("failed to define a metric")
1104 0 : });
1105 :
1106 108 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
1107 108 : register_int_counter_vec!(
1108 : "pageserver_evictions",
1109 : "Number of layers evicted from the pageserver",
1110 108 : &["tenant_id", "shard_id", "timeline_id"]
1111 : )
1112 108 : .expect("failed to define a metric")
1113 108 : });
1114 :
1115 108 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
1116 108 : register_int_counter_vec!(
1117 : "pageserver_evictions_with_low_residence_duration",
1118 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
1119 : Residence duration is determined using the `residence_duration_data_source`.",
1120 108 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
1121 : )
1122 108 : .expect("failed to define a metric")
1123 108 : });
1124 :
1125 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
1126 0 : register_int_counter!(
1127 : "pageserver_unexpected_ondemand_downloads_count",
1128 : "Number of unexpected on-demand downloads. \
1129 : We log more context for each increment, so, forgo any labels in this metric.",
1130 : )
1131 0 : .expect("failed to define a metric")
1132 0 : });
1133 :
1134 : /// How long did we take to start up? Broken down by labels to describe
1135 : /// different phases of startup.
1136 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
1137 0 : register_gauge_vec!(
1138 : "pageserver_startup_duration_seconds",
1139 : "Time taken by phases of pageserver startup, in seconds",
1140 0 : &["phase"]
1141 : )
1142 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
1143 0 : });
1144 :
1145 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
1146 0 : register_uint_gauge!(
1147 : "pageserver_startup_is_loading",
1148 : "1 while in initial startup load of tenants, 0 at other times"
1149 : )
1150 0 : .expect("Failed to register pageserver_startup_is_loading")
1151 0 : });
1152 :
1153 110 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
1154 110 : register_uint_gauge!(
1155 : "pageserver_timeline_ephemeral_bytes",
1156 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
1157 : )
1158 110 : .expect("Failed to register metric")
1159 110 : });
1160 :
1161 : /// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
1162 : /// like how long it took to load.
1163 : ///
1164 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
1165 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
1166 : /// at a timeline level than tenant level.
1167 : pub(crate) struct TenantMetrics {
1168 : /// How long did tenants take to go from construction to active state?
1169 : pub(crate) activation: Histogram,
1170 : pub(crate) preload: Histogram,
1171 : pub(crate) attach: Histogram,
1172 :
1173 : /// How many tenants are included in the initial startup of the pagesrever?
1174 : pub(crate) startup_scheduled: IntCounter,
1175 : pub(crate) startup_complete: IntCounter,
1176 : }
1177 :
1178 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
1179 0 : TenantMetrics {
1180 0 : activation: register_histogram!(
1181 0 : "pageserver_tenant_activation_seconds",
1182 0 : "Time taken by tenants to activate, in seconds",
1183 0 : CRITICAL_OP_BUCKETS.into()
1184 0 : )
1185 0 : .expect("Failed to register metric"),
1186 0 : preload: register_histogram!(
1187 0 : "pageserver_tenant_preload_seconds",
1188 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
1189 0 : CRITICAL_OP_BUCKETS.into()
1190 0 : )
1191 0 : .expect("Failed to register metric"),
1192 0 : attach: register_histogram!(
1193 0 : "pageserver_tenant_attach_seconds",
1194 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
1195 0 : CRITICAL_OP_BUCKETS.into()
1196 0 : )
1197 0 : .expect("Failed to register metric"),
1198 0 : startup_scheduled: register_int_counter!(
1199 0 : "pageserver_tenant_startup_scheduled",
1200 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
1201 0 : ).expect("Failed to register metric"),
1202 0 : startup_complete: register_int_counter!(
1203 0 : "pageserver_tenant_startup_complete",
1204 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
1205 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
1206 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
1207 0 : ).expect("Failed to register metric"),
1208 0 : }
1209 0 : });
1210 :
1211 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
1212 : #[derive(Debug)]
1213 : pub(crate) struct EvictionsWithLowResidenceDuration {
1214 : data_source: &'static str,
1215 : threshold: Duration,
1216 : counter: Option<IntCounter>,
1217 : }
1218 :
1219 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
1220 : data_source: &'static str,
1221 : threshold: Duration,
1222 : }
1223 :
1224 : impl EvictionsWithLowResidenceDurationBuilder {
1225 234 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
1226 234 : Self {
1227 234 : data_source,
1228 234 : threshold,
1229 234 : }
1230 234 : }
1231 :
1232 234 : fn build(
1233 234 : &self,
1234 234 : tenant_id: &str,
1235 234 : shard_id: &str,
1236 234 : timeline_id: &str,
1237 234 : ) -> EvictionsWithLowResidenceDuration {
1238 234 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
1239 234 : .get_metric_with_label_values(&[
1240 234 : tenant_id,
1241 234 : shard_id,
1242 234 : timeline_id,
1243 234 : self.data_source,
1244 234 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
1245 234 : ])
1246 234 : .unwrap();
1247 234 : EvictionsWithLowResidenceDuration {
1248 234 : data_source: self.data_source,
1249 234 : threshold: self.threshold,
1250 234 : counter: Some(counter),
1251 234 : }
1252 234 : }
1253 : }
1254 :
1255 : impl EvictionsWithLowResidenceDuration {
1256 239 : fn threshold_label_value(threshold: Duration) -> String {
1257 239 : format!("{}", threshold.as_secs())
1258 239 : }
1259 :
1260 2 : pub fn observe(&self, observed_value: Duration) {
1261 2 : if observed_value < self.threshold {
1262 2 : self.counter
1263 2 : .as_ref()
1264 2 : .expect("nobody calls this function after `remove_from_vec`")
1265 2 : .inc();
1266 2 : }
1267 2 : }
1268 :
1269 0 : pub fn change_threshold(
1270 0 : &mut self,
1271 0 : tenant_id: &str,
1272 0 : shard_id: &str,
1273 0 : timeline_id: &str,
1274 0 : new_threshold: Duration,
1275 0 : ) {
1276 0 : if new_threshold == self.threshold {
1277 0 : return;
1278 0 : }
1279 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1280 0 : self.data_source,
1281 0 : new_threshold,
1282 0 : )
1283 0 : .build(tenant_id, shard_id, timeline_id);
1284 0 : std::mem::swap(self, &mut with_new);
1285 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1286 0 : }
1287 :
1288 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1289 5 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1290 5 : let Some(_counter) = self.counter.take() else {
1291 0 : return;
1292 : };
1293 :
1294 5 : let threshold = Self::threshold_label_value(self.threshold);
1295 :
1296 5 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1297 5 : tenant_id,
1298 5 : shard_id,
1299 5 : timeline_id,
1300 5 : self.data_source,
1301 5 : &threshold,
1302 5 : ]);
1303 :
1304 5 : match removed {
1305 0 : Err(e) => {
1306 : // this has been hit in staging as
1307 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1308 : // because we can be in the drop path already, don't risk:
1309 : // - "double-panic => illegal instruction" or
1310 : // - future "drop panick => abort"
1311 : //
1312 : // so just nag: (the error has the labels)
1313 0 : tracing::warn!(
1314 0 : "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
1315 : );
1316 : }
1317 : Ok(()) => {
1318 : // to help identify cases where we double-remove the same values, let's log all
1319 : // deletions?
1320 5 : tracing::info!(
1321 0 : "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
1322 : self.data_source
1323 : );
1324 : }
1325 : }
1326 5 : }
1327 : }
1328 :
1329 : // Metrics collected on disk IO operations
1330 : //
1331 : // Roughly logarithmic scale.
1332 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1333 : 0.00005, // 50us
1334 : 0.00006, // 60us
1335 : 0.00007, // 70us
1336 : 0.00008, // 80us
1337 : 0.00009, // 90us
1338 : 0.0001, // 100us
1339 : 0.000110, // 110us
1340 : 0.000120, // 120us
1341 : 0.000130, // 130us
1342 : 0.000140, // 140us
1343 : 0.000150, // 150us
1344 : 0.000160, // 160us
1345 : 0.000170, // 170us
1346 : 0.000180, // 180us
1347 : 0.000190, // 190us
1348 : 0.000200, // 200us
1349 : 0.000210, // 210us
1350 : 0.000220, // 220us
1351 : 0.000230, // 230us
1352 : 0.000240, // 240us
1353 : 0.000250, // 250us
1354 : 0.000300, // 300us
1355 : 0.000350, // 350us
1356 : 0.000400, // 400us
1357 : 0.000450, // 450us
1358 : 0.000500, // 500us
1359 : 0.000600, // 600us
1360 : 0.000700, // 700us
1361 : 0.000800, // 800us
1362 : 0.000900, // 900us
1363 : 0.001000, // 1ms
1364 : 0.002000, // 2ms
1365 : 0.003000, // 3ms
1366 : 0.004000, // 4ms
1367 : 0.005000, // 5ms
1368 : 0.01000, // 10ms
1369 : 0.02000, // 20ms
1370 : 0.05000, // 50ms
1371 : ];
1372 :
1373 : /// VirtualFile fs operation variants.
1374 : ///
1375 : /// Operations:
1376 : /// - open ([`std::fs::OpenOptions::open`])
1377 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1378 : /// - close-by-replace (close by replacement algorithm)
1379 : /// - read (`read_at`)
1380 : /// - write (`write_at`)
1381 : /// - seek (modify internal position or file length query)
1382 : /// - fsync ([`std::fs::File::sync_all`])
1383 : /// - metadata ([`std::fs::File::metadata`])
1384 : #[derive(
1385 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1386 : )]
1387 : pub(crate) enum StorageIoOperation {
1388 : Open,
1389 : OpenAfterReplace,
1390 : Close,
1391 : CloseByReplace,
1392 : Read,
1393 : Write,
1394 : Seek,
1395 : Fsync,
1396 : Metadata,
1397 : SetLen,
1398 : }
1399 :
1400 : impl StorageIoOperation {
1401 1210 : pub fn as_str(&self) -> &'static str {
1402 1210 : match self {
1403 121 : StorageIoOperation::Open => "open",
1404 121 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1405 121 : StorageIoOperation::Close => "close",
1406 121 : StorageIoOperation::CloseByReplace => "close-by-replace",
1407 121 : StorageIoOperation::Read => "read",
1408 121 : StorageIoOperation::Write => "write",
1409 121 : StorageIoOperation::Seek => "seek",
1410 121 : StorageIoOperation::Fsync => "fsync",
1411 121 : StorageIoOperation::Metadata => "metadata",
1412 121 : StorageIoOperation::SetLen => "set_len",
1413 : }
1414 1210 : }
1415 : }
1416 :
1417 : /// Tracks time taken by fs operations near VirtualFile.
1418 : #[derive(Debug)]
1419 : pub(crate) struct StorageIoTime {
1420 : metrics: [Histogram; StorageIoOperation::COUNT],
1421 : }
1422 :
1423 : impl StorageIoTime {
1424 121 : fn new() -> Self {
1425 121 : let storage_io_histogram_vec = register_histogram_vec!(
1426 : "pageserver_io_operations_seconds",
1427 : "Time spent in IO operations",
1428 121 : &["operation"],
1429 121 : STORAGE_IO_TIME_BUCKETS.into()
1430 : )
1431 121 : .expect("failed to define a metric");
1432 1210 : let metrics = std::array::from_fn(|i| {
1433 1210 : let op = StorageIoOperation::from_repr(i).unwrap();
1434 1210 : storage_io_histogram_vec
1435 1210 : .get_metric_with_label_values(&[op.as_str()])
1436 1210 : .unwrap()
1437 1210 : });
1438 121 : Self { metrics }
1439 121 : }
1440 :
1441 495204 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1442 495204 : &self.metrics[op as usize]
1443 495204 : }
1444 : }
1445 :
1446 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1447 :
1448 : #[derive(Clone, Copy)]
1449 : #[repr(usize)]
1450 : pub(crate) enum StorageIoSizeOperation {
1451 : Read,
1452 : Write,
1453 : }
1454 :
1455 : impl StorageIoSizeOperation {
1456 : pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];
1457 :
1458 752 : fn as_str(&self) -> &'static str {
1459 752 : Self::VARIANTS[*self as usize]
1460 752 : }
1461 : }
1462 :
1463 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1464 142 : pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1465 142 : register_uint_gauge_vec!(
1466 : "pageserver_io_operations_bytes_total",
1467 : "Total amount of bytes read/written in IO operations",
1468 142 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1469 : )
1470 142 : .expect("failed to define a metric")
1471 142 : });
1472 :
1473 : #[derive(Clone, Debug)]
1474 : pub(crate) struct StorageIoSizeMetrics {
1475 : pub read: UIntGauge,
1476 : pub write: UIntGauge,
1477 : }
1478 :
1479 : impl StorageIoSizeMetrics {
1480 376 : pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
1481 376 : let read = STORAGE_IO_SIZE
1482 376 : .get_metric_with_label_values(&[
1483 376 : StorageIoSizeOperation::Read.as_str(),
1484 376 : tenant_id,
1485 376 : shard_id,
1486 376 : timeline_id,
1487 376 : ])
1488 376 : .unwrap();
1489 376 : let write = STORAGE_IO_SIZE
1490 376 : .get_metric_with_label_values(&[
1491 376 : StorageIoSizeOperation::Write.as_str(),
1492 376 : tenant_id,
1493 376 : shard_id,
1494 376 : timeline_id,
1495 376 : ])
1496 376 : .unwrap();
1497 376 : Self { read, write }
1498 376 : }
1499 : }
1500 :
1501 : #[cfg(not(test))]
1502 : pub(crate) mod virtual_file_descriptor_cache {
1503 : use super::*;
1504 :
1505 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1506 0 : register_uint_gauge!(
1507 : "pageserver_virtual_file_descriptor_cache_size_max",
1508 : "Maximum number of open file descriptors in the cache."
1509 : )
1510 0 : .unwrap()
1511 0 : });
1512 :
1513 : // SIZE_CURRENT: derive it like so:
1514 : // ```
1515 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1516 : // -ignoring(operation)
1517 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1518 : // ```
1519 : }
1520 :
1521 : #[cfg(not(test))]
1522 : pub(crate) mod virtual_file_io_engine {
1523 : use super::*;
1524 :
1525 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1526 0 : register_uint_gauge_vec!(
1527 : "pageserver_virtual_file_io_engine_kind",
1528 : "The configured io engine for VirtualFile",
1529 0 : &["kind"],
1530 : )
1531 0 : .unwrap()
1532 0 : });
1533 : }
1534 :
1535 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1536 : pub(crate) struct SmgrOpTimerInner {
1537 : global_execution_latency_histo: Histogram,
1538 : per_timeline_execution_latency_histo: Option<Histogram>,
1539 :
1540 : global_batch_wait_time: Histogram,
1541 : per_timeline_batch_wait_time: Histogram,
1542 :
1543 : global_flush_in_progress_micros: IntCounter,
1544 : per_timeline_flush_in_progress_micros: IntCounter,
1545 :
1546 : throttling: Arc<tenant_throttling::Pagestream>,
1547 :
1548 : timings: SmgrOpTimerState,
1549 : }
1550 :
1551 : /// The stages of request processing are represented by the enum variants.
1552 : /// Used as part of [`SmgrOpTimerInner::timings`].
1553 : ///
1554 : /// Request processing calls into the `SmgrOpTimer::observe_*` methods at the
1555 : /// transition points.
1556 : /// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]
1557 : /// to the next state.
1558 : ///
1559 : /// Each request goes through every stage, in all configurations.
1560 : ///
1561 : #[derive(Debug)]
1562 : enum SmgrOpTimerState {
1563 : Received {
1564 : // In the future, we may want to track the full time the request spent
1565 : // inside pageserver process (time spent in kernel buffers can't be tracked).
1566 : // `received_at` would be used for that.
1567 : #[allow(dead_code)]
1568 : received_at: Instant,
1569 : },
1570 : Throttling {
1571 : throttle_started_at: Instant,
1572 : },
1573 : Batching {
1574 : throttle_done_at: Instant,
1575 : },
1576 : Executing {
1577 : execution_started_at: Instant,
1578 : },
1579 : Flushing,
1580 : // NB: when adding observation points, remember to update the Drop impl.
1581 : }
1582 :
1583 : // NB: when adding observation points, remember to update the Drop impl.
1584 : impl SmgrOpTimer {
1585 : /// See [`SmgrOpTimerState`] for more context.
1586 0 : pub(crate) fn observe_throttle_start(&mut self, at: Instant) {
1587 0 : let Some(inner) = self.0.as_mut() else {
1588 0 : return;
1589 : };
1590 0 : let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {
1591 0 : return;
1592 : };
1593 0 : inner.throttling.count_accounted_start.inc();
1594 0 : inner.timings = SmgrOpTimerState::Throttling {
1595 0 : throttle_started_at: at,
1596 0 : };
1597 0 : }
1598 :
1599 : /// See [`SmgrOpTimerState`] for more context.
1600 0 : pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {
1601 0 : let Some(inner) = self.0.as_mut() else {
1602 0 : return;
1603 : };
1604 : let SmgrOpTimerState::Throttling {
1605 0 : throttle_started_at,
1606 0 : } = &inner.timings
1607 : else {
1608 0 : return;
1609 : };
1610 0 : inner.throttling.count_accounted_finish.inc();
1611 0 : match throttle {
1612 0 : ThrottleResult::NotThrottled { end } => {
1613 0 : inner.timings = SmgrOpTimerState::Batching {
1614 0 : throttle_done_at: end,
1615 0 : };
1616 0 : }
1617 0 : ThrottleResult::Throttled { end } => {
1618 0 : // update metrics
1619 0 : inner.throttling.count_throttled.inc();
1620 0 : inner
1621 0 : .throttling
1622 0 : .wait_time
1623 0 : .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());
1624 0 : // state transition
1625 0 : inner.timings = SmgrOpTimerState::Batching {
1626 0 : throttle_done_at: end,
1627 0 : };
1628 0 : }
1629 : }
1630 0 : }
1631 :
1632 : /// See [`SmgrOpTimerState`] for more context.
1633 0 : pub(crate) fn observe_execution_start(&mut self, at: Instant) {
1634 0 : let Some(inner) = self.0.as_mut() else {
1635 0 : return;
1636 : };
1637 0 : let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {
1638 0 : return;
1639 : };
1640 : // update metrics
1641 0 : let batch = at - *throttle_done_at;
1642 0 : inner.global_batch_wait_time.observe(batch.as_secs_f64());
1643 0 : inner
1644 0 : .per_timeline_batch_wait_time
1645 0 : .observe(batch.as_secs_f64());
1646 : // state transition
1647 0 : inner.timings = SmgrOpTimerState::Executing {
1648 0 : execution_started_at: at,
1649 0 : }
1650 0 : }
1651 :
1652 : /// For all but the first caller, this is a no-op.
1653 : /// The first callers receives Some, subsequent ones None.
1654 : ///
1655 : /// See [`SmgrOpTimerState`] for more context.
1656 0 : pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {
1657 : // NB: unlike the other observe_* methods, this one take()s.
1658 : #[allow(clippy::question_mark)] // maintain similar code pattern.
1659 0 : let Some(mut inner) = self.0.take() else {
1660 0 : return None;
1661 : };
1662 : let SmgrOpTimerState::Executing {
1663 0 : execution_started_at,
1664 0 : } = &inner.timings
1665 : else {
1666 0 : return None;
1667 : };
1668 : // update metrics
1669 0 : let execution = at - *execution_started_at;
1670 0 : inner
1671 0 : .global_execution_latency_histo
1672 0 : .observe(execution.as_secs_f64());
1673 0 : if let Some(per_timeline_execution_latency_histo) =
1674 0 : &inner.per_timeline_execution_latency_histo
1675 0 : {
1676 0 : per_timeline_execution_latency_histo.observe(execution.as_secs_f64());
1677 0 : }
1678 :
1679 : // state transition
1680 0 : inner.timings = SmgrOpTimerState::Flushing;
1681 :
1682 : // return the flush in progress object which
1683 : // will do the remaining metrics updates
1684 : let SmgrOpTimerInner {
1685 0 : global_flush_in_progress_micros,
1686 0 : per_timeline_flush_in_progress_micros,
1687 : ..
1688 0 : } = inner;
1689 0 : Some(SmgrOpFlushInProgress {
1690 0 : global_micros: global_flush_in_progress_micros,
1691 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1692 0 : })
1693 0 : }
1694 : }
1695 :
1696 : /// The last stage of request processing is serializing and flushing the request
1697 : /// into the TCP connection. We want to make slow flushes observable
1698 : /// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]
1699 : /// to periodically bump the metric.
1700 : ///
1701 : /// If in the future we decide that we're not interested in live updates, we can
1702 : /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
1703 : /// and remove this struct from the code base.
1704 : pub(crate) struct SmgrOpFlushInProgress {
1705 : global_micros: IntCounter,
1706 : per_timeline_micros: IntCounter,
1707 : }
1708 :
1709 : impl Drop for SmgrOpTimer {
1710 0 : fn drop(&mut self) {
1711 : // In case of early drop, update any of the remaining metrics with
1712 : // observations so that (started,finished) counter pairs balance out
1713 : // and all counters on the latency path have the the same number of
1714 : // observations.
1715 : // It's technically lying and it would be better if each metric had
1716 : // a separate label or similar for cancelled requests.
1717 : // But we don't have that right now and counter pairs balancing
1718 : // out is useful when using the metrics in panels and whatnot.
1719 0 : let now = Instant::now();
1720 0 : self.observe_throttle_start(now);
1721 0 : self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
1722 0 : self.observe_execution_start(now);
1723 0 : let maybe_flush_timer = self.observe_execution_end(now);
1724 0 : drop(maybe_flush_timer);
1725 0 : }
1726 : }
1727 :
1728 : impl SmgrOpFlushInProgress {
1729 : /// The caller must guarantee that `socket_fd`` outlives this function.
1730 0 : pub(crate) async fn measure<Fut, O>(self, started_at: Instant, fut: Fut, socket_fd: RawFd) -> O
1731 0 : where
1732 0 : Fut: std::future::Future<Output = O>,
1733 0 : {
1734 0 : let mut fut = std::pin::pin!(fut);
1735 :
1736 0 : let mut logged = false;
1737 0 : let mut last_counter_increment_at = started_at;
1738 0 : let mut observe_guard = scopeguard::guard(
1739 0 : |is_timeout| {
1740 0 : let now = Instant::now();
1741 :
1742 : // Increment counter
1743 0 : {
1744 0 : let elapsed_since_last_observe = now - last_counter_increment_at;
1745 0 : self.global_micros
1746 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1747 0 : self.per_timeline_micros
1748 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1749 0 : last_counter_increment_at = now;
1750 0 : }
1751 :
1752 : // Log something on every timeout, and on completion but only if we hit a timeout.
1753 0 : if is_timeout || logged {
1754 0 : logged = true;
1755 0 : let elapsed_total = now - started_at;
1756 0 : let msg = if is_timeout {
1757 0 : "slow flush ongoing"
1758 : } else {
1759 0 : "slow flush completed or cancelled"
1760 : };
1761 :
1762 0 : let (inq, outq) = {
1763 0 : // SAFETY: caller guarantees that `socket_fd` outlives this function.
1764 0 : #[cfg(target_os = "linux")]
1765 0 : unsafe {
1766 0 : (
1767 0 : utils::linux_socket_ioctl::inq(socket_fd).unwrap_or(-2),
1768 0 : utils::linux_socket_ioctl::outq(socket_fd).unwrap_or(-2),
1769 0 : )
1770 0 : }
1771 0 : #[cfg(not(target_os = "linux"))]
1772 0 : {
1773 0 : _ = socket_fd; // appease unused lint on macOS
1774 0 : (-1, -1)
1775 0 : }
1776 0 : };
1777 :
1778 0 : let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64());
1779 0 : tracing::info!(elapsed_total_secs, inq, outq, msg);
1780 0 : }
1781 0 : },
1782 0 : |mut observe| {
1783 0 : observe(false);
1784 0 : },
1785 : );
1786 :
1787 : loop {
1788 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1789 0 : Ok(v) => return v,
1790 0 : Err(_timeout) => {
1791 0 : (*observe_guard)(true);
1792 0 : }
1793 : }
1794 : }
1795 0 : }
1796 : }
1797 :
1798 : #[derive(
1799 : Debug,
1800 : Clone,
1801 : Copy,
1802 : IntoStaticStr,
1803 : strum_macros::EnumCount,
1804 : strum_macros::EnumIter,
1805 : strum_macros::FromRepr,
1806 : enum_map::Enum,
1807 : )]
1808 : #[strum(serialize_all = "snake_case")]
1809 : pub enum SmgrQueryType {
1810 : GetRelExists,
1811 : GetRelSize,
1812 : GetPageAtLsn,
1813 : GetDbSize,
1814 : GetSlruSegment,
1815 : #[cfg(feature = "testing")]
1816 : Test,
1817 : }
1818 :
1819 : #[derive(
1820 : Debug,
1821 : Clone,
1822 : Copy,
1823 : IntoStaticStr,
1824 : strum_macros::EnumCount,
1825 : strum_macros::EnumIter,
1826 : strum_macros::FromRepr,
1827 : enum_map::Enum,
1828 : )]
1829 : #[strum(serialize_all = "snake_case")]
1830 : pub enum GetPageBatchBreakReason {
1831 : BatchFull,
1832 : NonBatchableRequest,
1833 : NonUniformLsn,
1834 : SamePageAtDifferentLsn,
1835 : NonUniformTimeline,
1836 : ExecutorSteal,
1837 : #[cfg(feature = "testing")]
1838 : NonUniformKey,
1839 : }
1840 :
1841 : pub(crate) struct SmgrQueryTimePerTimeline {
1842 : global_started: [IntCounter; SmgrQueryType::COUNT],
1843 : global_latency: [Histogram; SmgrQueryType::COUNT],
1844 : per_timeline_getpage_started: IntCounter,
1845 : per_timeline_getpage_latency: Histogram,
1846 : global_batch_size: Histogram,
1847 : per_timeline_batch_size: Histogram,
1848 : global_flush_in_progress_micros: IntCounter,
1849 : per_timeline_flush_in_progress_micros: IntCounter,
1850 : global_batch_wait_time: Histogram,
1851 : per_timeline_batch_wait_time: Histogram,
1852 : global_batch_break_reason: [IntCounter; GetPageBatchBreakReason::COUNT],
1853 : per_timeline_batch_break_reason: GetPageBatchBreakReasonTimelineMetrics,
1854 : throttling: Arc<tenant_throttling::Pagestream>,
1855 : }
1856 :
1857 108 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1858 108 : register_int_counter_vec!(
1859 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1860 : "pageserver_smgr_query_started_global_count",
1861 : "Number of smgr queries started, aggregated by query type.",
1862 108 : &["smgr_query_type"],
1863 : )
1864 108 : .expect("failed to define a metric")
1865 108 : });
1866 :
1867 108 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1868 108 : register_int_counter_vec!(
1869 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1870 : "pageserver_smgr_query_started_count",
1871 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1872 108 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1873 : )
1874 108 : .expect("failed to define a metric")
1875 108 : });
1876 :
1877 : /// Per-timeline smgr histogram buckets should be the same as the compute buckets, such that the
1878 : /// metrics are comparable across compute and Pageserver. See also:
1879 : /// <https://github.com/neondatabase/neon/blob/1a87975d956a8ad17ec8b85da32a137ec4893fcc/pgxn/neon/neon_perf_counters.h#L18-L27>
1880 : /// <https://github.com/neondatabase/flux-fleet/blob/556182a939edda87ff1d85a6b02e5cec901e0e9e/apps/base/compute-metrics/scrape-compute-sql-exporter.yaml#L29-L35>
1881 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] =
1882 : &[0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.1, 1.0, 3.0];
1883 :
1884 108 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1885 108 : register_histogram_vec!(
1886 : "pageserver_smgr_query_seconds",
1887 : "Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
1888 108 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1889 108 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1890 : )
1891 108 : .expect("failed to define a metric")
1892 108 : });
1893 :
1894 108 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1895 108 : [
1896 108 : 1,
1897 108 : 10,
1898 108 : 20,
1899 108 : 40,
1900 108 : 60,
1901 108 : 80,
1902 108 : 100,
1903 108 : 200,
1904 108 : 300,
1905 108 : 400,
1906 108 : 500,
1907 108 : 600,
1908 108 : 700,
1909 108 : 800,
1910 108 : 900,
1911 108 : 1_000, // 1ms
1912 108 : 2_000,
1913 108 : 4_000,
1914 108 : 6_000,
1915 108 : 8_000,
1916 108 : 10_000, // 10ms
1917 108 : 20_000,
1918 108 : 40_000,
1919 108 : 60_000,
1920 108 : 80_000,
1921 108 : 100_000,
1922 108 : 200_000,
1923 108 : 400_000,
1924 108 : 600_000,
1925 108 : 800_000,
1926 108 : 1_000_000, // 1s
1927 108 : 2_000_000,
1928 108 : 4_000_000,
1929 108 : 6_000_000,
1930 108 : 8_000_000,
1931 108 : 10_000_000, // 10s
1932 108 : 20_000_000,
1933 108 : 50_000_000,
1934 108 : 100_000_000,
1935 108 : 200_000_000,
1936 108 : 1_000_000_000, // 1000s
1937 108 : ]
1938 108 : .into_iter()
1939 108 : .map(Duration::from_micros)
1940 4428 : .map(|d| d.as_secs_f64())
1941 108 : .collect()
1942 108 : });
1943 :
1944 108 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1945 108 : register_histogram_vec!(
1946 : "pageserver_smgr_query_seconds_global",
1947 : "Like pageserver_smgr_query_seconds, but aggregated to instance level.",
1948 108 : &["smgr_query_type"],
1949 108 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1950 : )
1951 108 : .expect("failed to define a metric")
1952 108 : });
1953 :
1954 108 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1955 108 : (1..=u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap())
1956 3456 : .map(|v| v.into())
1957 108 : .collect()
1958 108 : });
1959 :
1960 108 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1961 108 : register_histogram!(
1962 : "pageserver_page_service_batch_size_global",
1963 : "Batch size of pageserver page service requests",
1964 108 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1965 : )
1966 108 : .expect("failed to define a metric")
1967 108 : });
1968 :
1969 108 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1970 108 : let mut buckets = Vec::new();
1971 756 : for i in 0.. {
1972 756 : let bucket = 1 << i;
1973 756 : if bucket > u32::try_from(DEFAULT_MAX_GET_VECTORED_KEYS).unwrap() {
1974 108 : break;
1975 648 : }
1976 648 : buckets.push(bucket.into());
1977 : }
1978 108 : buckets
1979 108 : });
1980 :
1981 108 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1982 108 : register_histogram_vec!(
1983 : "pageserver_page_service_batch_size",
1984 : "Batch size of pageserver page service requests",
1985 108 : &["tenant_id", "shard_id", "timeline_id"],
1986 108 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1987 : )
1988 108 : .expect("failed to define a metric")
1989 108 : });
1990 :
1991 108 : static PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1992 108 : register_int_counter_vec!(
1993 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1994 : "pageserver_page_service_batch_break_reason_global",
1995 : "Reason for breaking batches of get page requests",
1996 108 : &["reason"],
1997 : )
1998 108 : .expect("failed to define a metric")
1999 108 : });
2000 :
2001 : struct GetPageBatchBreakReasonTimelineMetrics {
2002 : map: EnumMap<GetPageBatchBreakReason, IntCounter>,
2003 : }
2004 :
2005 : impl GetPageBatchBreakReasonTimelineMetrics {
2006 234 : fn new(tenant_id: &str, shard_slug: &str, timeline_id: &str) -> Self {
2007 : GetPageBatchBreakReasonTimelineMetrics {
2008 1638 : map: EnumMap::from_array(std::array::from_fn(|reason_idx| {
2009 1638 : let reason = GetPageBatchBreakReason::from_usize(reason_idx);
2010 1638 : PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.with_label_values(&[
2011 1638 : tenant_id,
2012 1638 : shard_slug,
2013 1638 : timeline_id,
2014 1638 : reason.into(),
2015 1638 : ])
2016 1638 : })),
2017 : }
2018 234 : }
2019 :
2020 0 : fn inc(&self, reason: GetPageBatchBreakReason) {
2021 0 : self.map[reason].inc()
2022 0 : }
2023 : }
2024 :
2025 108 : static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
2026 108 : register_int_counter_vec!(
2027 : "pageserver_page_service_batch_break_reason",
2028 : "Reason for breaking batches of get page requests",
2029 108 : &["tenant_id", "shard_id", "timeline_id", "reason"],
2030 : )
2031 108 : .expect("failed to define a metric")
2032 108 : });
2033 :
2034 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
2035 0 : register_int_gauge_vec!(
2036 : "pageserver_page_service_config_max_batch_size",
2037 : "Configured maximum batch size for the server-side batching functionality of page_service. \
2038 : Labels expose more of the configuration parameters.",
2039 0 : &["mode", "execution", "batching"]
2040 : )
2041 0 : .expect("failed to define a metric")
2042 0 : });
2043 :
2044 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
2045 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
2046 0 : let (label_values, value) = match conf {
2047 0 : PageServicePipeliningConfig::Serial => (["serial", "-", "-"], 1),
2048 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
2049 0 : max_batch_size,
2050 0 : execution,
2051 0 : batching,
2052 : }) => {
2053 0 : let mode = "pipelined";
2054 0 : let execution = match execution {
2055 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
2056 0 : "concurrent-futures"
2057 : }
2058 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
2059 : };
2060 0 : let batching = match batching {
2061 0 : PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => "uniform-lsn",
2062 0 : PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => "scattered-lsn",
2063 : };
2064 :
2065 0 : ([mode, execution, batching], max_batch_size.get())
2066 : }
2067 : };
2068 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
2069 0 : .with_label_values(&label_values)
2070 0 : .set(value.try_into().unwrap());
2071 0 : }
2072 :
2073 108 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
2074 108 : register_int_counter_vec!(
2075 : "pageserver_page_service_pagestream_flush_in_progress_micros",
2076 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
2077 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
2078 : easily discoverable in monitoring. \
2079 : Hence, this is NOT a completion latency historgram.",
2080 108 : &["tenant_id", "shard_id", "timeline_id"],
2081 : )
2082 108 : .expect("failed to define a metric")
2083 108 : });
2084 :
2085 108 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
2086 108 : register_int_counter!(
2087 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
2088 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
2089 : )
2090 108 : .expect("failed to define a metric")
2091 108 : });
2092 :
2093 108 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
2094 108 : register_histogram_vec!(
2095 : "pageserver_page_service_pagestream_batch_wait_time_seconds",
2096 : "Time a request spent waiting in its batch until the batch moved to throttle&execution.",
2097 108 : &["tenant_id", "shard_id", "timeline_id"],
2098 108 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
2099 : )
2100 108 : .expect("failed to define a metric")
2101 108 : });
2102 :
2103 108 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
2104 108 : register_histogram!(
2105 : "pageserver_page_service_pagestream_batch_wait_time_seconds_global",
2106 : "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
2107 108 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
2108 : )
2109 108 : .expect("failed to define a metric")
2110 108 : });
2111 :
2112 : impl SmgrQueryTimePerTimeline {
2113 234 : pub(crate) fn new(
2114 234 : tenant_shard_id: &TenantShardId,
2115 234 : timeline_id: &TimelineId,
2116 234 : pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,
2117 234 : ) -> Self {
2118 234 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2119 234 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
2120 234 : let timeline_id = timeline_id.to_string();
2121 1404 : let global_started = std::array::from_fn(|i| {
2122 1404 : let op = SmgrQueryType::from_repr(i).unwrap();
2123 1404 : SMGR_QUERY_STARTED_GLOBAL
2124 1404 : .get_metric_with_label_values(&[op.into()])
2125 1404 : .unwrap()
2126 1404 : });
2127 1404 : let global_latency = std::array::from_fn(|i| {
2128 1404 : let op = SmgrQueryType::from_repr(i).unwrap();
2129 1404 : SMGR_QUERY_TIME_GLOBAL
2130 1404 : .get_metric_with_label_values(&[op.into()])
2131 1404 : .unwrap()
2132 1404 : });
2133 :
2134 234 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
2135 234 : .get_metric_with_label_values(&[
2136 234 : SmgrQueryType::GetPageAtLsn.into(),
2137 234 : &tenant_id,
2138 234 : &shard_slug,
2139 234 : &timeline_id,
2140 234 : ])
2141 234 : .unwrap();
2142 234 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
2143 234 : .get_metric_with_label_values(&[
2144 234 : SmgrQueryType::GetPageAtLsn.into(),
2145 234 : &tenant_id,
2146 234 : &shard_slug,
2147 234 : &timeline_id,
2148 234 : ])
2149 234 : .unwrap();
2150 :
2151 234 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
2152 234 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
2153 234 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2154 234 : .unwrap();
2155 :
2156 234 : let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
2157 234 : let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
2158 234 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2159 234 : .unwrap();
2160 :
2161 1638 : let global_batch_break_reason = std::array::from_fn(|i| {
2162 1638 : let reason = GetPageBatchBreakReason::from_usize(i);
2163 1638 : PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL
2164 1638 : .get_metric_with_label_values(&[reason.into()])
2165 1638 : .unwrap()
2166 1638 : });
2167 234 : let per_timeline_batch_break_reason =
2168 234 : GetPageBatchBreakReasonTimelineMetrics::new(&tenant_id, &shard_slug, &timeline_id);
2169 :
2170 234 : let global_flush_in_progress_micros =
2171 234 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
2172 234 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
2173 234 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2174 234 : .unwrap();
2175 :
2176 234 : Self {
2177 234 : global_started,
2178 234 : global_latency,
2179 234 : per_timeline_getpage_latency,
2180 234 : per_timeline_getpage_started,
2181 234 : global_batch_size,
2182 234 : per_timeline_batch_size,
2183 234 : global_flush_in_progress_micros,
2184 234 : per_timeline_flush_in_progress_micros,
2185 234 : global_batch_wait_time,
2186 234 : per_timeline_batch_wait_time,
2187 234 : global_batch_break_reason,
2188 234 : per_timeline_batch_break_reason,
2189 234 : throttling: pagestream_throttle_metrics,
2190 234 : }
2191 234 : }
2192 0 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
2193 0 : self.global_started[op as usize].inc();
2194 :
2195 0 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
2196 0 : self.per_timeline_getpage_started.inc();
2197 0 : Some(self.per_timeline_getpage_latency.clone())
2198 : } else {
2199 0 : None
2200 : };
2201 :
2202 0 : SmgrOpTimer(Some(SmgrOpTimerInner {
2203 0 : global_execution_latency_histo: self.global_latency[op as usize].clone(),
2204 0 : per_timeline_execution_latency_histo: per_timeline_latency_histo,
2205 0 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
2206 0 : per_timeline_flush_in_progress_micros: self
2207 0 : .per_timeline_flush_in_progress_micros
2208 0 : .clone(),
2209 0 : global_batch_wait_time: self.global_batch_wait_time.clone(),
2210 0 : per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
2211 0 : throttling: self.throttling.clone(),
2212 0 : timings: SmgrOpTimerState::Received { received_at },
2213 0 : }))
2214 0 : }
2215 :
2216 : /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
2217 0 : pub(crate) fn observe_getpage_batch_start(
2218 0 : &self,
2219 0 : batch_size: usize,
2220 0 : break_reason: GetPageBatchBreakReason,
2221 0 : ) {
2222 0 : self.global_batch_size.observe(batch_size as f64);
2223 0 : self.per_timeline_batch_size.observe(batch_size as f64);
2224 :
2225 0 : self.global_batch_break_reason[break_reason.into_usize()].inc();
2226 0 : self.per_timeline_batch_break_reason.inc(break_reason);
2227 0 : }
2228 : }
2229 :
2230 : // keep in sync with control plane Go code so that we can validate
2231 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
2232 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
2233 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
2234 0 : [
2235 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
2236 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
2237 0 : ]
2238 0 : .map(|ms| (ms as f64) / 1000.0)
2239 0 : });
2240 :
2241 : pub(crate) struct BasebackupQueryTime {
2242 : ok: Histogram,
2243 : error: Histogram,
2244 : client_error: Histogram,
2245 : }
2246 :
2247 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
2248 0 : let vec = register_histogram_vec!(
2249 : "pageserver_basebackup_query_seconds",
2250 : "Histogram of basebackup queries durations, by result type",
2251 0 : &["result"],
2252 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
2253 : )
2254 0 : .expect("failed to define a metric");
2255 0 : BasebackupQueryTime {
2256 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
2257 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
2258 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
2259 0 : }
2260 0 : });
2261 :
2262 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
2263 : parent: &'a BasebackupQueryTime,
2264 : start: std::time::Instant,
2265 : }
2266 :
2267 : impl BasebackupQueryTime {
2268 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
2269 0 : let start = Instant::now();
2270 0 : BasebackupQueryTimeOngoingRecording {
2271 0 : parent: self,
2272 0 : start,
2273 0 : }
2274 0 : }
2275 : }
2276 :
2277 : impl BasebackupQueryTimeOngoingRecording<'_> {
2278 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
2279 0 : let elapsed = self.start.elapsed().as_secs_f64();
2280 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
2281 0 : let metric = match res {
2282 0 : Ok(_) => &self.parent.ok,
2283 : Err(QueryError::Shutdown) | Err(QueryError::Reconnect) => {
2284 : // Do not observe ok/err for shutdown/reconnect.
2285 : // Reconnect error might be raised when the operation is waiting for LSN and the tenant shutdown interrupts
2286 : // the operation. A reconnect error will be issued and the client will retry.
2287 0 : return;
2288 : }
2289 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
2290 0 : if is_expected_io_error(io_error) =>
2291 : {
2292 0 : &self.parent.client_error
2293 : }
2294 0 : Err(_) => &self.parent.error,
2295 : };
2296 0 : metric.observe(elapsed);
2297 0 : }
2298 : }
2299 :
2300 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
2301 0 : register_int_counter_pair_vec!(
2302 : "pageserver_live_connections_started",
2303 : "Number of network connections that we started handling",
2304 : "pageserver_live_connections_finished",
2305 : "Number of network connections that we finished handling",
2306 0 : &["pageserver_connection_kind"]
2307 : )
2308 0 : .expect("failed to define a metric")
2309 0 : });
2310 :
2311 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
2312 : pub(crate) enum ComputeCommandKind {
2313 : PageStreamV3,
2314 : PageStreamV2,
2315 : Basebackup,
2316 : Fullbackup,
2317 : LeaseLsn,
2318 : }
2319 :
2320 : pub(crate) struct ComputeCommandCounters {
2321 : map: EnumMap<ComputeCommandKind, IntCounter>,
2322 : }
2323 :
2324 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
2325 0 : let inner = register_int_counter_vec!(
2326 : "pageserver_compute_commands",
2327 : "Number of compute -> pageserver commands processed",
2328 0 : &["command"]
2329 : )
2330 0 : .expect("failed to define a metric");
2331 :
2332 : ComputeCommandCounters {
2333 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
2334 0 : let command = ComputeCommandKind::from_usize(i);
2335 0 : let command_str: &'static str = command.into();
2336 0 : inner.with_label_values(&[command_str])
2337 0 : })),
2338 : }
2339 0 : });
2340 :
2341 : impl ComputeCommandCounters {
2342 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
2343 0 : &self.map[command]
2344 0 : }
2345 : }
2346 :
2347 : // remote storage metrics
2348 :
2349 106 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
2350 106 : register_int_counter_pair_vec!(
2351 : "pageserver_remote_timeline_client_calls_started",
2352 : "Number of started calls to remote timeline client.",
2353 : "pageserver_remote_timeline_client_calls_finished",
2354 : "Number of finshed calls to remote timeline client.",
2355 106 : &[
2356 106 : "tenant_id",
2357 106 : "shard_id",
2358 106 : "timeline_id",
2359 106 : "file_kind",
2360 106 : "op_kind"
2361 106 : ],
2362 : )
2363 106 : .unwrap()
2364 106 : });
2365 :
2366 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
2367 105 : Lazy::new(|| {
2368 105 : register_int_counter_vec!(
2369 : "pageserver_remote_timeline_client_bytes_started",
2370 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2371 : The increment happens when the operation is scheduled.",
2372 105 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2373 : )
2374 105 : .expect("failed to define a metric")
2375 105 : });
2376 :
2377 105 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
2378 105 : register_int_counter_vec!(
2379 : "pageserver_remote_timeline_client_bytes_finished",
2380 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2381 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
2382 105 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2383 : )
2384 105 : .expect("failed to define a metric")
2385 105 : });
2386 :
2387 : pub(crate) struct TenantManagerMetrics {
2388 : tenant_slots_attached: UIntGauge,
2389 : tenant_slots_secondary: UIntGauge,
2390 : tenant_slots_inprogress: UIntGauge,
2391 : pub(crate) tenant_slot_writes: IntCounter,
2392 : pub(crate) unexpected_errors: IntCounter,
2393 : }
2394 :
2395 : impl TenantManagerMetrics {
2396 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
2397 : /// exactly: they track the lifetime of the slots _in the tenant map_.
2398 1 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
2399 1 : match slot {
2400 0 : TenantSlot::Attached(_) => {
2401 0 : self.tenant_slots_attached.inc();
2402 0 : }
2403 0 : TenantSlot::Secondary(_) => {
2404 0 : self.tenant_slots_secondary.inc();
2405 0 : }
2406 1 : TenantSlot::InProgress(_) => {
2407 1 : self.tenant_slots_inprogress.inc();
2408 1 : }
2409 : }
2410 1 : }
2411 :
2412 1 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
2413 1 : match slot {
2414 1 : TenantSlot::Attached(_) => {
2415 1 : self.tenant_slots_attached.dec();
2416 1 : }
2417 0 : TenantSlot::Secondary(_) => {
2418 0 : self.tenant_slots_secondary.dec();
2419 0 : }
2420 0 : TenantSlot::InProgress(_) => {
2421 0 : self.tenant_slots_inprogress.dec();
2422 0 : }
2423 : }
2424 1 : }
2425 :
2426 : #[cfg(all(debug_assertions, not(test)))]
2427 0 : pub(crate) fn slots_total(&self) -> u64 {
2428 0 : self.tenant_slots_attached.get()
2429 0 : + self.tenant_slots_secondary.get()
2430 0 : + self.tenant_slots_inprogress.get()
2431 0 : }
2432 : }
2433 :
2434 1 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
2435 1 : let tenant_slots = register_uint_gauge_vec!(
2436 : "pageserver_tenant_manager_slots",
2437 : "How many slots currently exist, including all attached, secondary and in-progress operations",
2438 1 : &["mode"]
2439 : )
2440 1 : .expect("failed to define a metric");
2441 1 : TenantManagerMetrics {
2442 1 : tenant_slots_attached: tenant_slots
2443 1 : .get_metric_with_label_values(&["attached"])
2444 1 : .unwrap(),
2445 1 : tenant_slots_secondary: tenant_slots
2446 1 : .get_metric_with_label_values(&["secondary"])
2447 1 : .unwrap(),
2448 1 : tenant_slots_inprogress: tenant_slots
2449 1 : .get_metric_with_label_values(&["inprogress"])
2450 1 : .unwrap(),
2451 1 : tenant_slot_writes: register_int_counter!(
2452 1 : "pageserver_tenant_manager_slot_writes",
2453 1 : "Writes to a tenant slot, including all of create/attach/detach/delete"
2454 1 : )
2455 1 : .expect("failed to define a metric"),
2456 1 : unexpected_errors: register_int_counter!(
2457 1 : "pageserver_tenant_manager_unexpected_errors_total",
2458 1 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
2459 1 : )
2460 1 : .expect("failed to define a metric"),
2461 1 : }
2462 1 : });
2463 :
2464 : pub(crate) struct DeletionQueueMetrics {
2465 : pub(crate) keys_submitted: IntCounter,
2466 : pub(crate) keys_dropped: IntCounter,
2467 : pub(crate) keys_executed: IntCounter,
2468 : pub(crate) keys_validated: IntCounter,
2469 : pub(crate) dropped_lsn_updates: IntCounter,
2470 : pub(crate) unexpected_errors: IntCounter,
2471 : pub(crate) remote_errors: IntCounterVec,
2472 : }
2473 18 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
2474 18 : DeletionQueueMetrics{
2475 18 :
2476 18 : keys_submitted: register_int_counter!(
2477 18 : "pageserver_deletion_queue_submitted_total",
2478 18 : "Number of objects submitted for deletion"
2479 18 : )
2480 18 : .expect("failed to define a metric"),
2481 18 :
2482 18 : keys_dropped: register_int_counter!(
2483 18 : "pageserver_deletion_queue_dropped_total",
2484 18 : "Number of object deletions dropped due to stale generation."
2485 18 : )
2486 18 : .expect("failed to define a metric"),
2487 18 :
2488 18 : keys_executed: register_int_counter!(
2489 18 : "pageserver_deletion_queue_executed_total",
2490 18 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
2491 18 : )
2492 18 : .expect("failed to define a metric"),
2493 18 :
2494 18 : keys_validated: register_int_counter!(
2495 18 : "pageserver_deletion_queue_validated_total",
2496 18 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
2497 18 : )
2498 18 : .expect("failed to define a metric"),
2499 18 :
2500 18 : dropped_lsn_updates: register_int_counter!(
2501 18 : "pageserver_deletion_queue_dropped_lsn_updates_total",
2502 18 : "Updates to remote_consistent_lsn dropped due to stale generation number."
2503 18 : )
2504 18 : .expect("failed to define a metric"),
2505 18 : unexpected_errors: register_int_counter!(
2506 18 : "pageserver_deletion_queue_unexpected_errors_total",
2507 18 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
2508 18 : )
2509 18 : .expect("failed to define a metric"),
2510 18 : remote_errors: register_int_counter_vec!(
2511 18 : "pageserver_deletion_queue_remote_errors_total",
2512 18 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
2513 18 : &["op_kind"],
2514 18 : )
2515 18 : .expect("failed to define a metric")
2516 18 : }
2517 18 : });
2518 :
2519 : pub(crate) struct SecondaryModeMetrics {
2520 : pub(crate) upload_heatmap: IntCounter,
2521 : pub(crate) upload_heatmap_errors: IntCounter,
2522 : pub(crate) upload_heatmap_duration: Histogram,
2523 : pub(crate) download_heatmap: IntCounter,
2524 : pub(crate) download_layer: IntCounter,
2525 : }
2526 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2527 0 : SecondaryModeMetrics {
2528 0 : upload_heatmap: register_int_counter!(
2529 0 : "pageserver_secondary_upload_heatmap",
2530 0 : "Number of heatmaps written to remote storage by attached tenants"
2531 0 : )
2532 0 : .expect("failed to define a metric"),
2533 0 : upload_heatmap_errors: register_int_counter!(
2534 0 : "pageserver_secondary_upload_heatmap_errors",
2535 0 : "Failures writing heatmap to remote storage"
2536 0 : )
2537 0 : .expect("failed to define a metric"),
2538 0 : upload_heatmap_duration: register_histogram!(
2539 0 : "pageserver_secondary_upload_heatmap_duration",
2540 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2541 0 : )
2542 0 : .expect("failed to define a metric"),
2543 0 : download_heatmap: register_int_counter!(
2544 0 : "pageserver_secondary_download_heatmap",
2545 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2546 0 : )
2547 0 : .expect("failed to define a metric"),
2548 0 : download_layer: register_int_counter!(
2549 0 : "pageserver_secondary_download_layer",
2550 0 : "Number of downloads of layers by secondary mode locations"
2551 0 : )
2552 0 : .expect("failed to define a metric"),
2553 0 : }
2554 0 : });
2555 :
2556 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2557 0 : register_uint_gauge_vec!(
2558 : "pageserver_secondary_resident_physical_size",
2559 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2560 0 : &["tenant_id", "shard_id"]
2561 : )
2562 0 : .expect("failed to define a metric")
2563 0 : });
2564 :
2565 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2566 0 : register_uint_gauge!(
2567 : "pageserver_utilization_score",
2568 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2569 : )
2570 0 : .expect("failed to define a metric")
2571 0 : });
2572 :
2573 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2574 0 : register_uint_gauge_vec!(
2575 : "pageserver_secondary_heatmap_total_size",
2576 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2577 0 : &["tenant_id", "shard_id"]
2578 : )
2579 0 : .expect("failed to define a metric")
2580 0 : });
2581 :
2582 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2583 : pub enum RemoteOpKind {
2584 : Upload,
2585 : Download,
2586 : Delete,
2587 : }
2588 : impl RemoteOpKind {
2589 7962 : pub fn as_str(&self) -> &'static str {
2590 7962 : match self {
2591 7521 : Self::Upload => "upload",
2592 34 : Self::Download => "download",
2593 407 : Self::Delete => "delete",
2594 : }
2595 7962 : }
2596 : }
2597 :
2598 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2599 : pub enum RemoteOpFileKind {
2600 : Layer,
2601 : Index,
2602 : }
2603 : impl RemoteOpFileKind {
2604 7962 : pub fn as_str(&self) -> &'static str {
2605 7962 : match self {
2606 5608 : Self::Layer => "layer",
2607 2354 : Self::Index => "index",
2608 : }
2609 7962 : }
2610 : }
2611 :
2612 104 : pub(crate) static REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
2613 104 : register_histogram_vec!(
2614 : "pageserver_remote_timeline_client_seconds_global",
2615 : "Time spent on remote timeline client operations. \
2616 : Grouped by task_kind, file_kind, operation_kind and status. \
2617 : The task_kind is \
2618 : - for layer downloads, populated from RequestContext (primary objective of having the label) \
2619 : - for index downloads, set to 'unknown' \
2620 : - for any upload operation, set to 'RemoteUploadTask' \
2621 : This keeps dimensionality at bay. \
2622 : Does not account for time spent waiting in remote timeline client's queues.",
2623 104 : &["task_kind", "file_kind", "op_kind", "status"]
2624 : )
2625 104 : .expect("failed to define a metric")
2626 104 : });
2627 :
2628 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2629 0 : register_int_counter_vec!(
2630 : "pageserver_tenant_task_events",
2631 : "Number of task start/stop/fail events.",
2632 0 : &["event"],
2633 : )
2634 0 : .expect("Failed to register tenant_task_events metric")
2635 0 : });
2636 :
2637 : pub struct BackgroundLoopSemaphoreMetrics {
2638 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2639 : durations: EnumMap<BackgroundLoopKind, Histogram>,
2640 : waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2641 : running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2642 : }
2643 :
2644 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =
2645 10 : Lazy::new(|| {
2646 10 : let counters = register_int_counter_pair_vec!(
2647 : "pageserver_background_loop_semaphore_wait_start_count",
2648 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2649 : "pageserver_background_loop_semaphore_wait_finish_count",
2650 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2651 10 : &["task"],
2652 : )
2653 10 : .unwrap();
2654 :
2655 10 : let durations = register_histogram_vec!(
2656 : "pageserver_background_loop_semaphore_wait_seconds",
2657 : "Seconds spent waiting on background loop semaphore acquisition",
2658 10 : &["task"],
2659 10 : vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],
2660 : )
2661 10 : .unwrap();
2662 :
2663 10 : let waiting_tasks = register_int_gauge_vec!(
2664 : "pageserver_background_loop_semaphore_waiting_tasks",
2665 : "Number of background loop tasks waiting for semaphore",
2666 10 : &["task"],
2667 : )
2668 10 : .unwrap();
2669 :
2670 10 : let running_tasks = register_int_gauge_vec!(
2671 : "pageserver_background_loop_semaphore_running_tasks",
2672 : "Number of background loop tasks running concurrently",
2673 10 : &["task"],
2674 : )
2675 10 : .unwrap();
2676 :
2677 : BackgroundLoopSemaphoreMetrics {
2678 100 : counters: EnumMap::from_array(std::array::from_fn(|i| {
2679 100 : let kind = BackgroundLoopKind::from_usize(i);
2680 100 : counters.with_label_values(&[kind.into()])
2681 100 : })),
2682 100 : durations: EnumMap::from_array(std::array::from_fn(|i| {
2683 100 : let kind = BackgroundLoopKind::from_usize(i);
2684 100 : durations.with_label_values(&[kind.into()])
2685 100 : })),
2686 100 : waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2687 100 : let kind = BackgroundLoopKind::from_usize(i);
2688 100 : waiting_tasks.with_label_values(&[kind.into()])
2689 100 : })),
2690 100 : running_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2691 100 : let kind = BackgroundLoopKind::from_usize(i);
2692 100 : running_tasks.with_label_values(&[kind.into()])
2693 100 : })),
2694 : }
2695 10 : });
2696 :
2697 : impl BackgroundLoopSemaphoreMetrics {
2698 : /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the
2699 : /// semaphore is acquired, and drop it when the task completes or is cancelled.
2700 192 : pub(crate) fn record(
2701 192 : &self,
2702 192 : task: BackgroundLoopKind,
2703 192 : ) -> BackgroundLoopSemaphoreMetricsRecorder {
2704 192 : BackgroundLoopSemaphoreMetricsRecorder::start(self, task)
2705 192 : }
2706 : }
2707 :
2708 : /// Records metrics for a background task.
2709 : pub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {
2710 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2711 : task: BackgroundLoopKind,
2712 : start: Instant,
2713 : wait_counter_guard: Option<metrics::IntCounterPairGuard>,
2714 : }
2715 :
2716 : impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {
2717 : /// Starts recording semaphore metrics, by recording wait time and incrementing
2718 : /// `wait_start_count` and `waiting_tasks`.
2719 192 : fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {
2720 192 : metrics.waiting_tasks[task].inc();
2721 192 : Self {
2722 192 : metrics,
2723 192 : task,
2724 192 : start: Instant::now(),
2725 192 : wait_counter_guard: Some(metrics.counters[task].guard()),
2726 192 : }
2727 192 : }
2728 :
2729 : /// Signals that the semaphore has been acquired, and updates relevant metrics.
2730 192 : pub fn acquired(&mut self) -> Duration {
2731 192 : let waited = self.start.elapsed();
2732 192 : self.wait_counter_guard.take().expect("already acquired");
2733 192 : self.metrics.durations[self.task].observe(waited.as_secs_f64());
2734 192 : self.metrics.waiting_tasks[self.task].dec();
2735 192 : self.metrics.running_tasks[self.task].inc();
2736 192 : waited
2737 192 : }
2738 : }
2739 :
2740 : impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {
2741 : /// The task either completed or was cancelled.
2742 192 : fn drop(&mut self) {
2743 192 : if self.wait_counter_guard.take().is_some() {
2744 0 : // Waiting.
2745 0 : self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());
2746 0 : self.metrics.waiting_tasks[self.task].dec();
2747 192 : } else {
2748 192 : // Running.
2749 192 : self.metrics.running_tasks[self.task].dec();
2750 192 : }
2751 192 : }
2752 : }
2753 :
2754 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2755 0 : register_int_counter_vec!(
2756 : "pageserver_background_loop_period_overrun_count",
2757 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2758 0 : &["task", "period"],
2759 : )
2760 0 : .expect("failed to define a metric")
2761 0 : });
2762 :
2763 : // walreceiver metrics
2764 :
2765 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2766 0 : register_int_counter!(
2767 : "pageserver_walreceiver_started_connections_total",
2768 : "Number of started walreceiver connections"
2769 : )
2770 0 : .expect("failed to define a metric")
2771 0 : });
2772 :
2773 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2774 0 : register_int_gauge!(
2775 : "pageserver_walreceiver_active_managers",
2776 : "Number of active walreceiver managers"
2777 : )
2778 0 : .expect("failed to define a metric")
2779 0 : });
2780 :
2781 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2782 0 : register_int_counter_vec!(
2783 : "pageserver_walreceiver_switches_total",
2784 : "Number of walreceiver manager change_connection calls",
2785 0 : &["reason"]
2786 : )
2787 0 : .expect("failed to define a metric")
2788 0 : });
2789 :
2790 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2791 0 : register_int_counter!(
2792 : "pageserver_walreceiver_broker_updates_total",
2793 : "Number of received broker updates in walreceiver"
2794 : )
2795 0 : .expect("failed to define a metric")
2796 0 : });
2797 :
2798 1 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2799 1 : register_int_counter_vec!(
2800 : "pageserver_walreceiver_candidates_events_total",
2801 : "Number of walreceiver candidate events",
2802 1 : &["event"]
2803 : )
2804 1 : .expect("failed to define a metric")
2805 1 : });
2806 :
2807 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2808 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2809 :
2810 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2811 1 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2812 :
2813 : // Metrics collected on WAL redo operations
2814 : //
2815 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2816 : // for access to the postgres process ('wait') since there is only one for
2817 : // each tenant.
2818 :
2819 : /// Time buckets are small because we want to be able to measure the
2820 : /// smallest redo processing times. These buckets allow us to measure down
2821 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2822 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2823 : ///
2824 : /// Values up to 1s are recorded because metrics show that we have redo
2825 : /// durations and lock times larger than 0.250s.
2826 : macro_rules! redo_histogram_time_buckets {
2827 : () => {
2828 : vec![
2829 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2830 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2831 : 1.000_000,
2832 : ]
2833 : };
2834 : }
2835 :
2836 : /// While we're at it, also measure the amount of records replayed in each
2837 : /// operation. We have a global 'total replayed' counter, but that's not
2838 : /// as useful as 'what is the skew for how many records we replay in one
2839 : /// operation'.
2840 : macro_rules! redo_histogram_count_buckets {
2841 : () => {
2842 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2843 : };
2844 : }
2845 :
2846 : macro_rules! redo_bytes_histogram_count_buckets {
2847 : () => {
2848 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2849 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2850 : vec![
2851 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2852 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2853 : ]
2854 : };
2855 : }
2856 :
2857 : pub(crate) struct WalIngestMetrics {
2858 : pub(crate) bytes_received: IntCounter,
2859 : pub(crate) records_received: IntCounter,
2860 : pub(crate) records_observed: IntCounter,
2861 : pub(crate) records_committed: IntCounter,
2862 : pub(crate) values_committed_metadata_images: IntCounter,
2863 : pub(crate) values_committed_metadata_deltas: IntCounter,
2864 : pub(crate) values_committed_data_images: IntCounter,
2865 : pub(crate) values_committed_data_deltas: IntCounter,
2866 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2867 : }
2868 :
2869 : impl WalIngestMetrics {
2870 0 : pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
2871 0 : if stats.metadata_images > 0 {
2872 0 : self.values_committed_metadata_images
2873 0 : .inc_by(stats.metadata_images);
2874 0 : }
2875 0 : if stats.metadata_deltas > 0 {
2876 0 : self.values_committed_metadata_deltas
2877 0 : .inc_by(stats.metadata_deltas);
2878 0 : }
2879 0 : if stats.data_images > 0 {
2880 0 : self.values_committed_data_images.inc_by(stats.data_images);
2881 0 : }
2882 0 : if stats.data_deltas > 0 {
2883 0 : self.values_committed_data_deltas.inc_by(stats.data_deltas);
2884 0 : }
2885 0 : }
2886 : }
2887 :
2888 5 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
2889 5 : let values_committed = register_int_counter_vec!(
2890 : "pageserver_wal_ingest_values_committed",
2891 : "Number of values committed to pageserver storage from WAL records",
2892 5 : &["class", "kind"],
2893 : )
2894 5 : .expect("failed to define a metric");
2895 :
2896 5 : WalIngestMetrics {
2897 5 : bytes_received: register_int_counter!(
2898 5 : "pageserver_wal_ingest_bytes_received",
2899 5 : "Bytes of WAL ingested from safekeepers",
2900 5 : )
2901 5 : .unwrap(),
2902 5 : records_received: register_int_counter!(
2903 5 : "pageserver_wal_ingest_records_received",
2904 5 : "Number of WAL records received from safekeepers"
2905 5 : )
2906 5 : .expect("failed to define a metric"),
2907 5 : records_observed: register_int_counter!(
2908 5 : "pageserver_wal_ingest_records_observed",
2909 5 : "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
2910 5 : )
2911 5 : .expect("failed to define a metric"),
2912 5 : records_committed: register_int_counter!(
2913 5 : "pageserver_wal_ingest_records_committed",
2914 5 : "Number of WAL records which resulted in writes to pageserver storage"
2915 5 : )
2916 5 : .expect("failed to define a metric"),
2917 5 : values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
2918 5 : values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
2919 5 : values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
2920 5 : values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
2921 5 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2922 5 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2923 5 : "Total number of zero gap blocks written on relation extends"
2924 5 : )
2925 5 : .expect("failed to define a metric"),
2926 5 : }
2927 5 : });
2928 :
2929 108 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2930 108 : register_int_counter_vec!(
2931 : "pageserver_timeline_wal_records_received",
2932 : "Number of WAL records received per shard",
2933 108 : &["tenant_id", "shard_id", "timeline_id"]
2934 : )
2935 108 : .expect("failed to define a metric")
2936 108 : });
2937 :
2938 3 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2939 3 : register_histogram!(
2940 : "pageserver_wal_redo_seconds",
2941 : "Time spent on WAL redo",
2942 3 : redo_histogram_time_buckets!()
2943 : )
2944 3 : .expect("failed to define a metric")
2945 3 : });
2946 :
2947 3 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2948 3 : register_histogram!(
2949 : "pageserver_wal_redo_records_histogram",
2950 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2951 3 : redo_histogram_count_buckets!(),
2952 : )
2953 3 : .expect("failed to define a metric")
2954 3 : });
2955 :
2956 3 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2957 3 : register_histogram!(
2958 : "pageserver_wal_redo_bytes_histogram",
2959 : "Histogram of number of records replayed per redo sent to Postgres",
2960 3 : redo_bytes_histogram_count_buckets!(),
2961 : )
2962 3 : .expect("failed to define a metric")
2963 3 : });
2964 :
2965 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2966 3 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2967 3 : register_int_counter!(
2968 : "pageserver_replayed_wal_records_total",
2969 : "Number of WAL records replayed in WAL redo process"
2970 : )
2971 3 : .unwrap()
2972 3 : });
2973 :
2974 : #[rustfmt::skip]
2975 4 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2976 4 : register_histogram!(
2977 : "pageserver_wal_redo_process_launch_duration",
2978 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2979 4 : vec![
2980 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2981 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2982 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2983 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2984 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2985 : ],
2986 : )
2987 4 : .expect("failed to define a metric")
2988 4 : });
2989 :
2990 : pub(crate) struct WalRedoProcessCounters {
2991 : pub(crate) started: IntCounter,
2992 : pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,
2993 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2994 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2995 : }
2996 :
2997 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2998 : pub(crate) enum WalRedoKillCause {
2999 : WalRedoProcessDrop,
3000 : NoLeakChildDrop,
3001 : Startup,
3002 : }
3003 :
3004 : impl Default for WalRedoProcessCounters {
3005 4 : fn default() -> Self {
3006 4 : let started = register_int_counter!(
3007 : "pageserver_wal_redo_process_started_total",
3008 : "Number of WAL redo processes started",
3009 : )
3010 4 : .unwrap();
3011 :
3012 4 : let killed = register_int_counter_vec!(
3013 : "pageserver_wal_redo_process_stopped_total",
3014 : "Number of WAL redo processes stopped",
3015 4 : &["cause"],
3016 : )
3017 4 : .unwrap();
3018 :
3019 4 : let active_stderr_logger_tasks_started = register_int_counter!(
3020 : "pageserver_walredo_stderr_logger_tasks_started_total",
3021 : "Number of active walredo stderr logger tasks that have started",
3022 : )
3023 4 : .unwrap();
3024 :
3025 4 : let active_stderr_logger_tasks_finished = register_int_counter!(
3026 : "pageserver_walredo_stderr_logger_tasks_finished_total",
3027 : "Number of active walredo stderr logger tasks that have finished",
3028 : )
3029 4 : .unwrap();
3030 :
3031 : Self {
3032 4 : started,
3033 12 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
3034 12 : let cause = WalRedoKillCause::from_usize(i);
3035 12 : let cause_str: &'static str = cause.into();
3036 12 : killed.with_label_values(&[cause_str])
3037 12 : })),
3038 4 : active_stderr_logger_tasks_started,
3039 4 : active_stderr_logger_tasks_finished,
3040 : }
3041 4 : }
3042 : }
3043 :
3044 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
3045 : Lazy::new(WalRedoProcessCounters::default);
3046 :
3047 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
3048 : pub(crate) struct StorageTimeMetricsTimer {
3049 : metrics: StorageTimeMetrics,
3050 : start: Instant,
3051 : }
3052 :
3053 : impl StorageTimeMetricsTimer {
3054 1750 : fn new(metrics: StorageTimeMetrics) -> Self {
3055 1750 : Self {
3056 1750 : metrics,
3057 1750 : start: Instant::now(),
3058 1750 : }
3059 1750 : }
3060 :
3061 : /// Returns the elapsed duration of the timer.
3062 1749 : pub fn elapsed(&self) -> Duration {
3063 1749 : self.start.elapsed()
3064 1749 : }
3065 :
3066 : /// Record the time from creation to now and return it.
3067 1749 : pub fn stop_and_record(self) -> Duration {
3068 1749 : let duration = self.elapsed();
3069 1749 : let seconds = duration.as_secs_f64();
3070 1749 : self.metrics.timeline_sum.inc_by(seconds);
3071 1749 : self.metrics.timeline_count.inc();
3072 1749 : self.metrics.global_histogram.observe(seconds);
3073 1749 : duration
3074 1749 : }
3075 :
3076 : /// Turns this timer into a timer, which will always record -- usually this means recording
3077 : /// regardless an early `?` path was taken in a function.
3078 385 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
3079 385 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
3080 385 : }
3081 : }
3082 :
3083 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
3084 :
3085 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
3086 385 : fn drop(&mut self) {
3087 385 : if let Some(inner) = self.0.take() {
3088 385 : inner.stop_and_record();
3089 385 : }
3090 385 : }
3091 : }
3092 :
3093 : impl AlwaysRecordingStorageTimeMetricsTimer {
3094 : /// Returns the elapsed duration of the timer.
3095 0 : pub fn elapsed(&self) -> Duration {
3096 0 : self.0.as_ref().expect("not dropped yet").elapsed()
3097 0 : }
3098 : }
3099 :
3100 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
3101 : /// timeline total sum and count.
3102 : #[derive(Clone, Debug)]
3103 : pub(crate) struct StorageTimeMetrics {
3104 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
3105 : timeline_sum: Counter,
3106 : /// Number of oeprations, per operation, tenant_id and timeline_id
3107 : timeline_count: IntCounter,
3108 : /// Global histogram having only the "operation" label.
3109 : global_histogram: Histogram,
3110 : }
3111 :
3112 : impl StorageTimeMetrics {
3113 2106 : pub fn new(
3114 2106 : operation: StorageTimeOperation,
3115 2106 : tenant_id: &str,
3116 2106 : shard_id: &str,
3117 2106 : timeline_id: &str,
3118 2106 : ) -> Self {
3119 2106 : let operation: &'static str = operation.into();
3120 :
3121 2106 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
3122 2106 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
3123 2106 : .unwrap();
3124 2106 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
3125 2106 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
3126 2106 : .unwrap();
3127 2106 : let global_histogram = STORAGE_TIME_GLOBAL
3128 2106 : .get_metric_with_label_values(&[operation])
3129 2106 : .unwrap();
3130 :
3131 2106 : StorageTimeMetrics {
3132 2106 : timeline_sum,
3133 2106 : timeline_count,
3134 2106 : global_histogram,
3135 2106 : }
3136 2106 : }
3137 :
3138 : /// Starts timing a new operation.
3139 : ///
3140 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
3141 1750 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
3142 1750 : StorageTimeMetricsTimer::new(self.clone())
3143 1750 : }
3144 : }
3145 :
3146 : pub(crate) struct TimelineMetrics {
3147 : tenant_id: String,
3148 : shard_id: String,
3149 : timeline_id: String,
3150 : pub flush_time_histo: StorageTimeMetrics,
3151 : pub flush_delay_histo: StorageTimeMetrics,
3152 : pub compact_time_histo: StorageTimeMetrics,
3153 : pub create_images_time_histo: StorageTimeMetrics,
3154 : pub logical_size_histo: StorageTimeMetrics,
3155 : pub imitate_logical_size_histo: StorageTimeMetrics,
3156 : pub load_layer_map_histo: StorageTimeMetrics,
3157 : pub garbage_collect_histo: StorageTimeMetrics,
3158 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
3159 : pub last_record_lsn_gauge: IntGauge,
3160 : pub disk_consistent_lsn_gauge: IntGauge,
3161 : pub pitr_history_size: UIntGauge,
3162 : pub archival_size: UIntGauge,
3163 : pub layers_per_read: Histogram,
3164 : pub standby_horizon_gauge: IntGauge,
3165 : pub resident_physical_size_gauge: UIntGauge,
3166 : pub visible_physical_size_gauge: UIntGauge,
3167 : /// copy of LayeredTimeline.current_logical_size
3168 : pub current_logical_size_gauge: UIntGauge,
3169 : pub aux_file_size_gauge: IntGauge,
3170 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
3171 : pub evictions: IntCounter,
3172 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
3173 : /// Number of valid LSN leases.
3174 : pub valid_lsn_lease_count_gauge: UIntGauge,
3175 : pub wal_records_received: IntCounter,
3176 : pub storage_io_size: StorageIoSizeMetrics,
3177 : pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,
3178 : pub wait_lsn_start_finish_counterpair: IntCounterPair,
3179 : pub wait_ondemand_download_time: wait_ondemand_download_time::WaitOndemandDownloadTimeSum,
3180 : shutdown: std::sync::atomic::AtomicBool,
3181 : }
3182 :
3183 : impl TimelineMetrics {
3184 234 : pub fn new(
3185 234 : tenant_shard_id: &TenantShardId,
3186 234 : timeline_id_raw: &TimelineId,
3187 234 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
3188 234 : ) -> Self {
3189 234 : let tenant_id = tenant_shard_id.tenant_id.to_string();
3190 234 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
3191 234 : let timeline_id = timeline_id_raw.to_string();
3192 234 : let flush_time_histo = StorageTimeMetrics::new(
3193 234 : StorageTimeOperation::LayerFlush,
3194 234 : &tenant_id,
3195 234 : &shard_id,
3196 234 : &timeline_id,
3197 : );
3198 234 : let flush_delay_histo = StorageTimeMetrics::new(
3199 234 : StorageTimeOperation::LayerFlushDelay,
3200 234 : &tenant_id,
3201 234 : &shard_id,
3202 234 : &timeline_id,
3203 : );
3204 234 : let compact_time_histo = StorageTimeMetrics::new(
3205 234 : StorageTimeOperation::Compact,
3206 234 : &tenant_id,
3207 234 : &shard_id,
3208 234 : &timeline_id,
3209 : );
3210 234 : let create_images_time_histo = StorageTimeMetrics::new(
3211 234 : StorageTimeOperation::CreateImages,
3212 234 : &tenant_id,
3213 234 : &shard_id,
3214 234 : &timeline_id,
3215 : );
3216 234 : let logical_size_histo = StorageTimeMetrics::new(
3217 234 : StorageTimeOperation::LogicalSize,
3218 234 : &tenant_id,
3219 234 : &shard_id,
3220 234 : &timeline_id,
3221 : );
3222 234 : let imitate_logical_size_histo = StorageTimeMetrics::new(
3223 234 : StorageTimeOperation::ImitateLogicalSize,
3224 234 : &tenant_id,
3225 234 : &shard_id,
3226 234 : &timeline_id,
3227 : );
3228 234 : let load_layer_map_histo = StorageTimeMetrics::new(
3229 234 : StorageTimeOperation::LoadLayerMap,
3230 234 : &tenant_id,
3231 234 : &shard_id,
3232 234 : &timeline_id,
3233 : );
3234 234 : let garbage_collect_histo = StorageTimeMetrics::new(
3235 234 : StorageTimeOperation::Gc,
3236 234 : &tenant_id,
3237 234 : &shard_id,
3238 234 : &timeline_id,
3239 : );
3240 234 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
3241 234 : StorageTimeOperation::FindGcCutoffs,
3242 234 : &tenant_id,
3243 234 : &shard_id,
3244 234 : &timeline_id,
3245 : );
3246 234 : let last_record_lsn_gauge = LAST_RECORD_LSN
3247 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3248 234 : .unwrap();
3249 :
3250 234 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
3251 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3252 234 : .unwrap();
3253 :
3254 234 : let pitr_history_size = PITR_HISTORY_SIZE
3255 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3256 234 : .unwrap();
3257 :
3258 234 : let archival_size = TIMELINE_ARCHIVE_SIZE
3259 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3260 234 : .unwrap();
3261 :
3262 234 : let layers_per_read = LAYERS_PER_READ
3263 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3264 234 : .unwrap();
3265 :
3266 234 : let standby_horizon_gauge = STANDBY_HORIZON
3267 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3268 234 : .unwrap();
3269 234 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
3270 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3271 234 : .unwrap();
3272 234 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
3273 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3274 234 : .unwrap();
3275 : // TODO: we shouldn't expose this metric
3276 234 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
3277 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3278 234 : .unwrap();
3279 234 : let aux_file_size_gauge = AUX_FILE_SIZE
3280 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3281 234 : .unwrap();
3282 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
3283 234 : let directory_entries_count_gauge_closure = {
3284 234 : let tenant_shard_id = *tenant_shard_id;
3285 234 : let timeline_id_raw = *timeline_id_raw;
3286 0 : move || {
3287 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
3288 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
3289 0 : let timeline_id = timeline_id_raw.to_string();
3290 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
3291 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3292 0 : .unwrap();
3293 0 : gauge
3294 0 : }
3295 : };
3296 234 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
3297 234 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
3298 234 : let evictions = EVICTIONS
3299 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3300 234 : .unwrap();
3301 234 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
3302 234 : .build(&tenant_id, &shard_id, &timeline_id);
3303 :
3304 234 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
3305 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3306 234 : .unwrap();
3307 :
3308 234 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
3309 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3310 234 : .unwrap();
3311 :
3312 234 : let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
3313 :
3314 234 : let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {
3315 234 : global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),
3316 234 : per_tenant: WAIT_LSN_IN_PROGRESS_MICROS
3317 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3318 234 : .unwrap(),
3319 234 : };
3320 :
3321 234 : let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR
3322 234 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3323 234 : .unwrap();
3324 :
3325 234 : let wait_ondemand_download_time =
3326 234 : wait_ondemand_download_time::WaitOndemandDownloadTimeSum::new(
3327 234 : &tenant_id,
3328 234 : &shard_id,
3329 234 : &timeline_id,
3330 : );
3331 :
3332 234 : TIMELINE_STATE_METRIC.with_label_values(&["active"]).inc();
3333 :
3334 234 : TimelineMetrics {
3335 234 : tenant_id,
3336 234 : shard_id,
3337 234 : timeline_id,
3338 234 : flush_time_histo,
3339 234 : flush_delay_histo,
3340 234 : compact_time_histo,
3341 234 : create_images_time_histo,
3342 234 : logical_size_histo,
3343 234 : imitate_logical_size_histo,
3344 234 : garbage_collect_histo,
3345 234 : find_gc_cutoffs_histo,
3346 234 : load_layer_map_histo,
3347 234 : last_record_lsn_gauge,
3348 234 : disk_consistent_lsn_gauge,
3349 234 : pitr_history_size,
3350 234 : archival_size,
3351 234 : layers_per_read,
3352 234 : standby_horizon_gauge,
3353 234 : resident_physical_size_gauge,
3354 234 : visible_physical_size_gauge,
3355 234 : current_logical_size_gauge,
3356 234 : aux_file_size_gauge,
3357 234 : directory_entries_count_gauge,
3358 234 : evictions,
3359 234 : evictions_with_low_residence_duration: std::sync::RwLock::new(
3360 234 : evictions_with_low_residence_duration,
3361 234 : ),
3362 234 : storage_io_size,
3363 234 : valid_lsn_lease_count_gauge,
3364 234 : wal_records_received,
3365 234 : wait_lsn_in_progress_micros,
3366 234 : wait_lsn_start_finish_counterpair,
3367 234 : wait_ondemand_download_time,
3368 234 : shutdown: std::sync::atomic::AtomicBool::default(),
3369 234 : }
3370 234 : }
3371 :
3372 804 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
3373 804 : self.resident_physical_size_add(sz);
3374 804 : }
3375 :
3376 277 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
3377 277 : self.resident_physical_size_gauge.sub(sz);
3378 277 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
3379 277 : }
3380 :
3381 872 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
3382 872 : self.resident_physical_size_gauge.add(sz);
3383 872 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
3384 872 : }
3385 :
3386 5 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
3387 5 : self.resident_physical_size_gauge.get()
3388 5 : }
3389 :
3390 : /// Generates TIMELINE_LAYER labels for a persistent layer.
3391 1341 : fn make_layer_labels(&self, layer_desc: &PersistentLayerDesc) -> [&str; 5] {
3392 1341 : let level = match LayerMap::is_l0(&layer_desc.key_range, layer_desc.is_delta()) {
3393 715 : true => LayerLevel::L0,
3394 626 : false => LayerLevel::L1,
3395 : };
3396 1341 : let kind = match layer_desc.is_delta() {
3397 1107 : true => LayerKind::Delta,
3398 234 : false => LayerKind::Image,
3399 : };
3400 1341 : [
3401 1341 : &self.tenant_id,
3402 1341 : &self.shard_id,
3403 1341 : &self.timeline_id,
3404 1341 : level.into(),
3405 1341 : kind.into(),
3406 1341 : ]
3407 1341 : }
3408 :
3409 : /// Generates TIMELINE_LAYER labels for a frozen ephemeral layer.
3410 1191 : fn make_frozen_layer_labels(&self, _layer: &InMemoryLayer) -> [&str; 5] {
3411 1191 : [
3412 1191 : &self.tenant_id,
3413 1191 : &self.shard_id,
3414 1191 : &self.timeline_id,
3415 1191 : LayerLevel::Frozen.into(),
3416 1191 : LayerKind::Delta.into(), // by definition
3417 1191 : ]
3418 1191 : }
3419 :
3420 : /// Removes a frozen ephemeral layer to TIMELINE_LAYER metrics.
3421 595 : pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
3422 595 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3423 595 : let labels = self.make_frozen_layer_labels(layer);
3424 595 : let size = layer.len();
3425 595 : TIMELINE_LAYER_COUNT
3426 595 : .get_metric_with_label_values(&labels)
3427 595 : .unwrap()
3428 595 : .dec();
3429 595 : TIMELINE_LAYER_SIZE
3430 595 : .get_metric_with_label_values(&labels)
3431 595 : .unwrap()
3432 595 : .sub(size);
3433 595 : }
3434 :
3435 : /// Adds a frozen ephemeral layer to TIMELINE_LAYER metrics.
3436 596 : pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
3437 596 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3438 596 : let labels = self.make_frozen_layer_labels(layer);
3439 596 : let size = layer.len();
3440 596 : TIMELINE_LAYER_COUNT
3441 596 : .get_metric_with_label_values(&labels)
3442 596 : .unwrap()
3443 596 : .inc();
3444 596 : TIMELINE_LAYER_SIZE
3445 596 : .get_metric_with_label_values(&labels)
3446 596 : .unwrap()
3447 596 : .add(size);
3448 596 : }
3449 :
3450 : /// Removes a persistent layer from TIMELINE_LAYER metrics.
3451 352 : pub fn dec_layer(&self, layer_desc: &PersistentLayerDesc) {
3452 352 : let labels = self.make_layer_labels(layer_desc);
3453 352 : TIMELINE_LAYER_COUNT
3454 352 : .get_metric_with_label_values(&labels)
3455 352 : .unwrap()
3456 352 : .dec();
3457 352 : TIMELINE_LAYER_SIZE
3458 352 : .get_metric_with_label_values(&labels)
3459 352 : .unwrap()
3460 352 : .sub(layer_desc.file_size);
3461 352 : }
3462 :
3463 : /// Adds a persistent layer to TIMELINE_LAYER metrics.
3464 989 : pub fn inc_layer(&self, layer_desc: &PersistentLayerDesc) {
3465 989 : let labels = self.make_layer_labels(layer_desc);
3466 989 : TIMELINE_LAYER_COUNT
3467 989 : .get_metric_with_label_values(&labels)
3468 989 : .unwrap()
3469 989 : .inc();
3470 989 : TIMELINE_LAYER_SIZE
3471 989 : .get_metric_with_label_values(&labels)
3472 989 : .unwrap()
3473 989 : .add(layer_desc.file_size);
3474 989 : }
3475 :
3476 5 : pub(crate) fn shutdown(&self) {
3477 5 : let was_shutdown = self
3478 5 : .shutdown
3479 5 : .swap(true, std::sync::atomic::Ordering::Relaxed);
3480 :
3481 5 : if was_shutdown {
3482 : // this happens on tenant deletion because tenant first shuts down timelines, then
3483 : // invokes timeline deletion which first shuts down the timeline again.
3484 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
3485 0 : return;
3486 5 : }
3487 :
3488 5 : TIMELINE_STATE_METRIC.with_label_values(&["active"]).dec();
3489 :
3490 5 : let tenant_id = &self.tenant_id;
3491 5 : let timeline_id = &self.timeline_id;
3492 5 : let shard_id = &self.shard_id;
3493 5 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3494 5 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3495 5 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3496 5 : {
3497 5 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
3498 5 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3499 5 : }
3500 5 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3501 5 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3502 5 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
3503 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3504 5 : }
3505 :
3506 5 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3507 5 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3508 :
3509 15 : for ref level in LayerLevel::iter() {
3510 30 : for ref kind in LayerKind::iter() {
3511 30 : let labels: [&str; 5] =
3512 30 : [tenant_id, shard_id, timeline_id, level.into(), kind.into()];
3513 30 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&labels);
3514 30 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&labels);
3515 30 : }
3516 : }
3517 :
3518 5 : let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3519 :
3520 5 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3521 5 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3522 5 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3523 :
3524 5 : self.evictions_with_low_residence_duration
3525 5 : .write()
3526 5 : .unwrap()
3527 5 : .remove(tenant_id, shard_id, timeline_id);
3528 :
3529 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
3530 : // removed at the end of it. The idea is to have the metrics outlive the
3531 : // entity during which they're observed, e.g., the smgr metrics shall
3532 : // outlive an individual smgr connection, but not the timeline.
3533 :
3534 50 : for op in StorageTimeOperation::VARIANTS {
3535 45 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
3536 45 : op,
3537 45 : tenant_id,
3538 45 : shard_id,
3539 45 : timeline_id,
3540 45 : ]);
3541 45 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
3542 45 : op,
3543 45 : tenant_id,
3544 45 : shard_id,
3545 45 : timeline_id,
3546 45 : ]);
3547 45 : }
3548 :
3549 15 : for op in StorageIoSizeOperation::VARIANTS {
3550 10 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
3551 10 : }
3552 :
3553 : let _ =
3554 5 : WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3555 :
3556 5 : {
3557 5 : let mut res = [Ok(()), Ok(())];
3558 5 : WAIT_LSN_START_FINISH_COUNTERPAIR
3559 5 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);
3560 5 : }
3561 :
3562 5 : wait_ondemand_download_time::shutdown_timeline(tenant_id, shard_id, timeline_id);
3563 :
3564 5 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
3565 5 : SmgrQueryType::GetPageAtLsn.into(),
3566 5 : tenant_id,
3567 5 : shard_id,
3568 5 : timeline_id,
3569 5 : ]);
3570 5 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
3571 5 : SmgrQueryType::GetPageAtLsn.into(),
3572 5 : tenant_id,
3573 5 : shard_id,
3574 5 : timeline_id,
3575 5 : ]);
3576 5 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
3577 5 : tenant_id,
3578 5 : shard_id,
3579 5 : timeline_id,
3580 5 : ]);
3581 5 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
3582 5 : tenant_id,
3583 5 : shard_id,
3584 5 : timeline_id,
3585 5 : ]);
3586 5 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
3587 5 : tenant_id,
3588 5 : shard_id,
3589 5 : timeline_id,
3590 5 : ]);
3591 5 : let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
3592 5 : tenant_id,
3593 5 : shard_id,
3594 5 : timeline_id,
3595 5 : ]);
3596 :
3597 35 : for reason in GetPageBatchBreakReason::iter() {
3598 35 : let _ = PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.remove_label_values(&[
3599 35 : tenant_id,
3600 35 : shard_id,
3601 35 : timeline_id,
3602 35 : reason.into(),
3603 35 : ]);
3604 35 : }
3605 5 : }
3606 : }
3607 :
3608 3 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3609 3 : let tid = tenant_shard_id.tenant_id.to_string();
3610 3 : let shard_id = tenant_shard_id.shard_slug().to_string();
3611 :
3612 : // Only shard zero deals in synthetic sizes
3613 3 : if tenant_shard_id.is_shard_zero() {
3614 3 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
3615 3 : }
3616 3 : let _ = TENANT_OFFLOADED_TIMELINES.remove_label_values(&[&tid, &shard_id]);
3617 :
3618 3 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
3619 :
3620 : // we leave the BROKEN_TENANTS_SET entry if any
3621 3 : }
3622 :
3623 : /// Maintain a per timeline gauge in addition to the global gauge.
3624 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
3625 : last_set: AtomicU64,
3626 : gauge: UIntGauge,
3627 : }
3628 :
3629 : impl PerTimelineRemotePhysicalSizeGauge {
3630 239 : fn new(per_timeline_gauge: UIntGauge) -> Self {
3631 239 : Self {
3632 239 : last_set: AtomicU64::new(0),
3633 239 : gauge: per_timeline_gauge,
3634 239 : }
3635 239 : }
3636 1000 : pub(crate) fn set(&self, sz: u64) {
3637 1000 : self.gauge.set(sz);
3638 1000 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
3639 1000 : if sz < prev {
3640 20 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
3641 980 : } else {
3642 980 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
3643 980 : };
3644 1000 : }
3645 1 : pub(crate) fn get(&self) -> u64 {
3646 1 : self.gauge.get()
3647 1 : }
3648 : }
3649 :
3650 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
3651 10 : fn drop(&mut self) {
3652 10 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
3653 10 : }
3654 : }
3655 :
3656 : pub(crate) struct RemoteTimelineClientMetrics {
3657 : tenant_id: String,
3658 : shard_id: String,
3659 : timeline_id: String,
3660 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
3661 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
3662 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3663 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3664 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
3665 : }
3666 :
3667 : impl RemoteTimelineClientMetrics {
3668 239 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
3669 239 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
3670 239 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
3671 239 : let timeline_id_str = timeline_id.to_string();
3672 :
3673 239 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
3674 239 : REMOTE_PHYSICAL_SIZE
3675 239 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3676 239 : .unwrap(),
3677 : );
3678 :
3679 239 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
3680 239 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3681 239 : .unwrap();
3682 :
3683 239 : RemoteTimelineClientMetrics {
3684 239 : tenant_id: tenant_id_str,
3685 239 : shard_id: shard_id_str,
3686 239 : timeline_id: timeline_id_str,
3687 239 : calls: Mutex::new(HashMap::default()),
3688 239 : bytes_started_counter: Mutex::new(HashMap::default()),
3689 239 : bytes_finished_counter: Mutex::new(HashMap::default()),
3690 239 : remote_physical_size_gauge,
3691 239 : projected_remote_consistent_lsn_gauge,
3692 239 : }
3693 239 : }
3694 :
3695 1616 : pub fn remote_operation_time(
3696 1616 : &self,
3697 1616 : task_kind: Option<TaskKind>,
3698 1616 : file_kind: &RemoteOpFileKind,
3699 1616 : op_kind: &RemoteOpKind,
3700 1616 : status: &'static str,
3701 1616 : ) -> Histogram {
3702 1616 : REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY
3703 1616 : .get_metric_with_label_values(&[
3704 1616 : task_kind.as_ref().map(|tk| tk.into()).unwrap_or("unknown"),
3705 1616 : file_kind.as_str(),
3706 1616 : op_kind.as_str(),
3707 1616 : status,
3708 : ])
3709 1616 : .unwrap()
3710 1616 : }
3711 :
3712 3719 : fn calls_counter_pair(
3713 3719 : &self,
3714 3719 : file_kind: &RemoteOpFileKind,
3715 3719 : op_kind: &RemoteOpKind,
3716 3719 : ) -> IntCounterPair {
3717 3719 : let mut guard = self.calls.lock().unwrap();
3718 3719 : let key = (file_kind.as_str(), op_kind.as_str());
3719 3719 : let metric = guard.entry(key).or_insert_with(move || {
3720 429 : REMOTE_TIMELINE_CLIENT_CALLS
3721 429 : .get_metric_with_label_values(&[
3722 429 : &self.tenant_id,
3723 429 : &self.shard_id,
3724 429 : &self.timeline_id,
3725 429 : key.0,
3726 429 : key.1,
3727 429 : ])
3728 429 : .unwrap()
3729 429 : });
3730 3719 : metric.clone()
3731 3719 : }
3732 :
3733 894 : fn bytes_started_counter(
3734 894 : &self,
3735 894 : file_kind: &RemoteOpFileKind,
3736 894 : op_kind: &RemoteOpKind,
3737 894 : ) -> IntCounter {
3738 894 : let mut guard = self.bytes_started_counter.lock().unwrap();
3739 894 : let key = (file_kind.as_str(), op_kind.as_str());
3740 894 : let metric = guard.entry(key).or_insert_with(move || {
3741 169 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3742 169 : .get_metric_with_label_values(&[
3743 169 : &self.tenant_id,
3744 169 : &self.shard_id,
3745 169 : &self.timeline_id,
3746 169 : key.0,
3747 169 : key.1,
3748 169 : ])
3749 169 : .unwrap()
3750 169 : });
3751 894 : metric.clone()
3752 894 : }
3753 :
3754 1727 : fn bytes_finished_counter(
3755 1727 : &self,
3756 1727 : file_kind: &RemoteOpFileKind,
3757 1727 : op_kind: &RemoteOpKind,
3758 1727 : ) -> IntCounter {
3759 1727 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3760 1727 : let key = (file_kind.as_str(), op_kind.as_str());
3761 1727 : let metric = guard.entry(key).or_insert_with(move || {
3762 169 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3763 169 : .get_metric_with_label_values(&[
3764 169 : &self.tenant_id,
3765 169 : &self.shard_id,
3766 169 : &self.timeline_id,
3767 169 : key.0,
3768 169 : key.1,
3769 169 : ])
3770 169 : .unwrap()
3771 169 : });
3772 1727 : metric.clone()
3773 1727 : }
3774 : }
3775 :
3776 : #[cfg(test)]
3777 : impl RemoteTimelineClientMetrics {
3778 3 : pub fn get_bytes_started_counter_value(
3779 3 : &self,
3780 3 : file_kind: &RemoteOpFileKind,
3781 3 : op_kind: &RemoteOpKind,
3782 3 : ) -> Option<u64> {
3783 3 : let guard = self.bytes_started_counter.lock().unwrap();
3784 3 : let key = (file_kind.as_str(), op_kind.as_str());
3785 3 : guard.get(&key).map(|counter| counter.get())
3786 3 : }
3787 :
3788 3 : pub fn get_bytes_finished_counter_value(
3789 3 : &self,
3790 3 : file_kind: &RemoteOpFileKind,
3791 3 : op_kind: &RemoteOpKind,
3792 3 : ) -> Option<u64> {
3793 3 : let guard = self.bytes_finished_counter.lock().unwrap();
3794 3 : let key = (file_kind.as_str(), op_kind.as_str());
3795 3 : guard.get(&key).map(|counter| counter.get())
3796 3 : }
3797 : }
3798 :
3799 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3800 : #[must_use]
3801 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3802 : /// Decremented on drop.
3803 : calls_counter_pair: Option<IntCounterPair>,
3804 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3805 : bytes_finished: Option<(IntCounter, u64)>,
3806 : }
3807 :
3808 : impl RemoteTimelineClientCallMetricGuard {
3809 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3810 : /// The caller vouches to do the metric updates manually.
3811 1951 : pub fn will_decrement_manually(mut self) {
3812 : let RemoteTimelineClientCallMetricGuard {
3813 1951 : calls_counter_pair,
3814 1951 : bytes_finished,
3815 1951 : } = &mut self;
3816 1951 : calls_counter_pair.take();
3817 1951 : bytes_finished.take();
3818 1951 : }
3819 : }
3820 :
3821 : impl Drop for RemoteTimelineClientCallMetricGuard {
3822 1968 : fn drop(&mut self) {
3823 : let RemoteTimelineClientCallMetricGuard {
3824 1968 : calls_counter_pair,
3825 1968 : bytes_finished,
3826 1968 : } = self;
3827 1968 : if let Some(guard) = calls_counter_pair.take() {
3828 17 : guard.dec();
3829 1951 : }
3830 1968 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3831 0 : bytes_finished_metric.inc_by(*value);
3832 1968 : }
3833 1968 : }
3834 : }
3835 :
3836 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3837 : /// track the byte size of this call in applicable metric(s).
3838 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3839 : /// Do not account for this call's byte size in any metrics.
3840 : /// The `reason` field is there to make the call sites self-documenting
3841 : /// about why they don't need the metric.
3842 : DontTrackSize { reason: &'static str },
3843 : /// Track the byte size of the call in applicable metric(s).
3844 : Bytes(u64),
3845 : }
3846 :
3847 : impl RemoteTimelineClientMetrics {
3848 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3849 : ///
3850 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3851 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3852 : /// is more suitable.
3853 : /// Never do both.
3854 1968 : pub(crate) fn call_begin(
3855 1968 : &self,
3856 1968 : file_kind: &RemoteOpFileKind,
3857 1968 : op_kind: &RemoteOpKind,
3858 1968 : size: RemoteTimelineClientMetricsCallTrackSize,
3859 1968 : ) -> RemoteTimelineClientCallMetricGuard {
3860 1968 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3861 1968 : calls_counter_pair.inc();
3862 :
3863 1968 : let bytes_finished = match size {
3864 1074 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3865 : // nothing to do
3866 1074 : None
3867 : }
3868 894 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3869 894 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3870 894 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3871 894 : Some((finished_counter, size))
3872 : }
3873 : };
3874 1968 : RemoteTimelineClientCallMetricGuard {
3875 1968 : calls_counter_pair: Some(calls_counter_pair),
3876 1968 : bytes_finished,
3877 1968 : }
3878 1968 : }
3879 :
3880 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3881 : /// Using the guard object is generally preferable.
3882 : /// See [`call_begin`](Self::call_begin) for more context.
3883 1751 : pub(crate) fn call_end(
3884 1751 : &self,
3885 1751 : file_kind: &RemoteOpFileKind,
3886 1751 : op_kind: &RemoteOpKind,
3887 1751 : size: RemoteTimelineClientMetricsCallTrackSize,
3888 1751 : ) {
3889 1751 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3890 1751 : calls_counter_pair.dec();
3891 1751 : match size {
3892 918 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3893 833 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3894 833 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3895 833 : }
3896 : }
3897 1751 : }
3898 : }
3899 :
3900 : impl Drop for RemoteTimelineClientMetrics {
3901 10 : fn drop(&mut self) {
3902 : let RemoteTimelineClientMetrics {
3903 10 : tenant_id,
3904 10 : shard_id,
3905 10 : timeline_id,
3906 10 : remote_physical_size_gauge,
3907 10 : calls,
3908 10 : bytes_started_counter,
3909 10 : bytes_finished_counter,
3910 10 : projected_remote_consistent_lsn_gauge,
3911 10 : } = self;
3912 12 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3913 12 : let mut res = [Ok(()), Ok(())];
3914 12 : REMOTE_TIMELINE_CLIENT_CALLS
3915 12 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3916 12 : // don't care about results
3917 12 : }
3918 10 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3919 3 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3920 3 : tenant_id,
3921 3 : shard_id,
3922 3 : timeline_id,
3923 3 : a,
3924 3 : b,
3925 3 : ]);
3926 3 : }
3927 10 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3928 3 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3929 3 : tenant_id,
3930 3 : shard_id,
3931 3 : timeline_id,
3932 3 : a,
3933 3 : b,
3934 3 : ]);
3935 3 : }
3936 10 : {
3937 10 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3938 10 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3939 10 : }
3940 10 : {
3941 10 : let _ = projected_remote_consistent_lsn_gauge;
3942 10 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3943 10 : tenant_id,
3944 10 : shard_id,
3945 10 : timeline_id,
3946 10 : ]);
3947 10 : }
3948 10 : }
3949 : }
3950 :
3951 : /// Wrapper future that measures the time spent by a remote storage operation,
3952 : /// and records the time and success/failure as a prometheus metric.
3953 : pub(crate) trait MeasureRemoteOp<O, E>: Sized + Future<Output = Result<O, E>> {
3954 1676 : async fn measure_remote_op(
3955 1676 : self,
3956 1676 : task_kind: Option<TaskKind>, // not all caller contexts have a RequestContext / TaskKind handy
3957 1676 : file_kind: RemoteOpFileKind,
3958 1676 : op: RemoteOpKind,
3959 1676 : metrics: Arc<RemoteTimelineClientMetrics>,
3960 1676 : ) -> Result<O, E> {
3961 1676 : let start = Instant::now();
3962 1676 : let res = self.await;
3963 1616 : let duration = start.elapsed();
3964 1616 : let status = if res.is_ok() { &"success" } else { &"failure" };
3965 1616 : metrics
3966 1616 : .remote_operation_time(task_kind, &file_kind, &op, status)
3967 1616 : .observe(duration.as_secs_f64());
3968 1616 : res
3969 1616 : }
3970 : }
3971 :
3972 : impl<Fut, O, E> MeasureRemoteOp<O, E> for Fut where Fut: Sized + Future<Output = Result<O, E>> {}
3973 :
3974 : pub mod tokio_epoll_uring {
3975 : use std::collections::HashMap;
3976 : use std::sync::{Arc, Mutex};
3977 :
3978 : use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
3979 : use once_cell::sync::Lazy;
3980 :
3981 : /// Shared storage for tokio-epoll-uring thread local metrics.
3982 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3983 121 : Lazy::new(|| {
3984 121 : let slots_submission_queue_depth = register_histogram!(
3985 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3986 : "The slots waiters queue depth of each tokio_epoll_uring system",
3987 121 : vec![
3988 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
3989 : ],
3990 : )
3991 121 : .expect("failed to define a metric");
3992 121 : ThreadLocalMetricsStorage {
3993 121 : observers: Mutex::new(HashMap::new()),
3994 121 : slots_submission_queue_depth,
3995 121 : }
3996 121 : });
3997 :
3998 : pub struct ThreadLocalMetricsStorage {
3999 : /// List of thread local metrics observers.
4000 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
4001 : /// A histogram shared between all thread local systems
4002 : /// for collecting slots submission queue depth.
4003 : slots_submission_queue_depth: Histogram,
4004 : }
4005 :
4006 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
4007 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
4008 : ///
4009 : /// The System makes observations into [`Self`] and periodically, the collector
4010 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
4011 : ///
4012 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
4013 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
4014 : /// for cache coherence protocol to get an exclusive cache line.
4015 : pub struct ThreadLocalMetrics {
4016 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
4017 : slots_submission_queue_depth: Mutex<LocalHistogram>,
4018 : }
4019 :
4020 : impl ThreadLocalMetricsStorage {
4021 : /// Registers a new thread local system. Returns a thread local metrics observer.
4022 578 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
4023 578 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
4024 578 : self.slots_submission_queue_depth.local(),
4025 : ));
4026 578 : let mut g = self.observers.lock().unwrap();
4027 578 : g.insert(id, Arc::clone(&per_system_metrics));
4028 578 : per_system_metrics
4029 578 : }
4030 :
4031 : /// Removes metrics observer for a thread local system.
4032 : /// This should be called before dropping a thread local system.
4033 121 : pub fn remove_system(&self, id: u64) {
4034 121 : let mut g = self.observers.lock().unwrap();
4035 121 : g.remove(&id);
4036 121 : }
4037 :
4038 : /// Flush all thread local metrics to the shared storage.
4039 0 : pub fn flush_thread_local_metrics(&self) {
4040 0 : let g = self.observers.lock().unwrap();
4041 0 : g.values().for_each(|local| {
4042 0 : local.flush();
4043 0 : });
4044 0 : }
4045 : }
4046 :
4047 : impl ThreadLocalMetrics {
4048 578 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
4049 578 : ThreadLocalMetrics {
4050 578 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
4051 578 : }
4052 578 : }
4053 :
4054 : /// Flushes the thread local metrics to shared aggregator.
4055 0 : pub fn flush(&self) {
4056 : let Self {
4057 0 : slots_submission_queue_depth,
4058 0 : } = self;
4059 0 : slots_submission_queue_depth.lock().unwrap().flush();
4060 0 : }
4061 : }
4062 :
4063 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
4064 397253 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
4065 : let Self {
4066 397253 : slots_submission_queue_depth,
4067 397253 : } = self;
4068 397253 : slots_submission_queue_depth
4069 397253 : .lock()
4070 397253 : .unwrap()
4071 397253 : .observe(queue_depth as f64);
4072 397253 : }
4073 : }
4074 :
4075 : pub struct Collector {
4076 : descs: Vec<metrics::core::Desc>,
4077 : systems_created: UIntGauge,
4078 : systems_destroyed: UIntGauge,
4079 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
4080 : }
4081 :
4082 : impl metrics::core::Collector for Collector {
4083 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
4084 0 : self.descs.iter().collect()
4085 0 : }
4086 :
4087 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
4088 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
4089 : let tokio_epoll_uring::metrics::GlobalMetrics {
4090 0 : systems_created,
4091 0 : systems_destroyed,
4092 0 : } = tokio_epoll_uring::metrics::global();
4093 0 : self.systems_created.set(systems_created);
4094 0 : mfs.extend(self.systems_created.collect());
4095 0 : self.systems_destroyed.set(systems_destroyed);
4096 0 : mfs.extend(self.systems_destroyed.collect());
4097 :
4098 0 : self.thread_local_metrics_storage
4099 0 : .flush_thread_local_metrics();
4100 :
4101 0 : mfs.extend(
4102 0 : self.thread_local_metrics_storage
4103 0 : .slots_submission_queue_depth
4104 0 : .collect(),
4105 : );
4106 0 : mfs
4107 0 : }
4108 : }
4109 :
4110 : impl Collector {
4111 : const NMETRICS: usize = 3;
4112 :
4113 : #[allow(clippy::new_without_default)]
4114 0 : pub fn new() -> Self {
4115 0 : let mut descs = Vec::new();
4116 :
4117 0 : let systems_created = UIntGauge::new(
4118 : "pageserver_tokio_epoll_uring_systems_created",
4119 : "counter of tokio-epoll-uring systems that were created",
4120 : )
4121 0 : .unwrap();
4122 0 : descs.extend(
4123 0 : metrics::core::Collector::desc(&systems_created)
4124 0 : .into_iter()
4125 0 : .cloned(),
4126 : );
4127 :
4128 0 : let systems_destroyed = UIntGauge::new(
4129 : "pageserver_tokio_epoll_uring_systems_destroyed",
4130 : "counter of tokio-epoll-uring systems that were destroyed",
4131 : )
4132 0 : .unwrap();
4133 0 : descs.extend(
4134 0 : metrics::core::Collector::desc(&systems_destroyed)
4135 0 : .into_iter()
4136 0 : .cloned(),
4137 : );
4138 :
4139 0 : Self {
4140 0 : descs,
4141 0 : systems_created,
4142 0 : systems_destroyed,
4143 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
4144 0 : }
4145 0 : }
4146 : }
4147 :
4148 121 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
4149 121 : register_int_counter!(
4150 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
4151 : "Number of times where thread_local_system creation spanned multiple executor threads",
4152 : )
4153 121 : .unwrap()
4154 121 : });
4155 :
4156 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
4157 0 : register_int_counter!(
4158 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
4159 : "Number of times thread_local_system creation failed and was retried after back-off.",
4160 : )
4161 0 : .unwrap()
4162 0 : });
4163 : }
4164 :
4165 : pub(crate) struct GlobalAndPerTenantIntCounter {
4166 : global: IntCounter,
4167 : per_tenant: IntCounter,
4168 : }
4169 :
4170 : impl GlobalAndPerTenantIntCounter {
4171 : #[inline(always)]
4172 0 : pub(crate) fn inc(&self) {
4173 0 : self.inc_by(1)
4174 0 : }
4175 : #[inline(always)]
4176 112578 : pub(crate) fn inc_by(&self, n: u64) {
4177 112578 : self.global.inc_by(n);
4178 112578 : self.per_tenant.inc_by(n);
4179 112578 : }
4180 : }
4181 :
4182 : pub(crate) mod tenant_throttling {
4183 : use metrics::register_int_counter_vec;
4184 : use once_cell::sync::Lazy;
4185 : use utils::shard::TenantShardId;
4186 :
4187 : use super::GlobalAndPerTenantIntCounter;
4188 :
4189 : pub(crate) struct Metrics<const KIND: usize> {
4190 : pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
4191 : pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,
4192 : pub(super) wait_time: GlobalAndPerTenantIntCounter,
4193 : pub(super) count_throttled: GlobalAndPerTenantIntCounter,
4194 : }
4195 :
4196 109 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4197 109 : register_int_counter_vec!(
4198 : "pageserver_tenant_throttling_count_accounted_start_global",
4199 : "Count of tenant throttling starts, by kind of throttle.",
4200 109 : &["kind"]
4201 : )
4202 109 : .unwrap()
4203 109 : });
4204 109 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4205 109 : register_int_counter_vec!(
4206 : "pageserver_tenant_throttling_count_accounted_start",
4207 : "Count of tenant throttling starts, by kind of throttle.",
4208 109 : &["kind", "tenant_id", "shard_id"]
4209 : )
4210 109 : .unwrap()
4211 109 : });
4212 109 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4213 109 : register_int_counter_vec!(
4214 : "pageserver_tenant_throttling_count_accounted_finish_global",
4215 : "Count of tenant throttling finishes, by kind of throttle.",
4216 109 : &["kind"]
4217 : )
4218 109 : .unwrap()
4219 109 : });
4220 109 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4221 109 : register_int_counter_vec!(
4222 : "pageserver_tenant_throttling_count_accounted_finish",
4223 : "Count of tenant throttling finishes, by kind of throttle.",
4224 109 : &["kind", "tenant_id", "shard_id"]
4225 : )
4226 109 : .unwrap()
4227 109 : });
4228 109 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4229 109 : register_int_counter_vec!(
4230 : "pageserver_tenant_throttling_wait_usecs_sum_global",
4231 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
4232 109 : &["kind"]
4233 : )
4234 109 : .unwrap()
4235 109 : });
4236 109 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4237 109 : register_int_counter_vec!(
4238 : "pageserver_tenant_throttling_wait_usecs_sum",
4239 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
4240 109 : &["kind", "tenant_id", "shard_id"]
4241 : )
4242 109 : .unwrap()
4243 109 : });
4244 :
4245 109 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4246 109 : register_int_counter_vec!(
4247 : "pageserver_tenant_throttling_count_global",
4248 : "Count of tenant throttlings, by kind of throttle.",
4249 109 : &["kind"]
4250 : )
4251 109 : .unwrap()
4252 109 : });
4253 109 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4254 109 : register_int_counter_vec!(
4255 : "pageserver_tenant_throttling_count",
4256 : "Count of tenant throttlings, by kind of throttle.",
4257 109 : &["kind", "tenant_id", "shard_id"]
4258 : )
4259 109 : .unwrap()
4260 109 : });
4261 :
4262 : const KINDS: &[&str] = &["pagestream"];
4263 : pub type Pagestream = Metrics<0>;
4264 :
4265 : impl<const KIND: usize> Metrics<KIND> {
4266 118 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
4267 118 : let per_tenant_label_values = &[
4268 118 : KINDS[KIND],
4269 118 : &tenant_shard_id.tenant_id.to_string(),
4270 118 : &tenant_shard_id.shard_slug().to_string(),
4271 118 : ];
4272 118 : Metrics {
4273 118 : count_accounted_start: {
4274 118 : GlobalAndPerTenantIntCounter {
4275 118 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
4276 118 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
4277 118 : .with_label_values(per_tenant_label_values),
4278 118 : }
4279 118 : },
4280 118 : count_accounted_finish: {
4281 118 : GlobalAndPerTenantIntCounter {
4282 118 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
4283 118 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
4284 118 : .with_label_values(per_tenant_label_values),
4285 118 : }
4286 118 : },
4287 118 : wait_time: {
4288 118 : GlobalAndPerTenantIntCounter {
4289 118 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
4290 118 : per_tenant: WAIT_USECS_PER_TENANT
4291 118 : .with_label_values(per_tenant_label_values),
4292 118 : }
4293 118 : },
4294 118 : count_throttled: {
4295 118 : GlobalAndPerTenantIntCounter {
4296 118 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
4297 118 : per_tenant: WAIT_COUNT_PER_TENANT
4298 118 : .with_label_values(per_tenant_label_values),
4299 118 : }
4300 118 : },
4301 118 : }
4302 118 : }
4303 : }
4304 :
4305 0 : pub(crate) fn preinitialize_global_metrics() {
4306 0 : Lazy::force(&COUNT_ACCOUNTED_START);
4307 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
4308 0 : Lazy::force(&WAIT_USECS);
4309 0 : Lazy::force(&WAIT_COUNT);
4310 0 : }
4311 :
4312 3 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
4313 12 : for m in &[
4314 3 : &COUNT_ACCOUNTED_START_PER_TENANT,
4315 3 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
4316 3 : &WAIT_USECS_PER_TENANT,
4317 3 : &WAIT_COUNT_PER_TENANT,
4318 3 : ] {
4319 24 : for kind in KINDS {
4320 12 : let _ = m.remove_label_values(&[
4321 12 : kind,
4322 12 : &tenant_shard_id.tenant_id.to_string(),
4323 12 : &tenant_shard_id.shard_slug().to_string(),
4324 12 : ]);
4325 12 : }
4326 : }
4327 3 : }
4328 : }
4329 :
4330 : pub(crate) mod disk_usage_based_eviction {
4331 : use super::*;
4332 :
4333 : pub(crate) struct Metrics {
4334 : pub(crate) tenant_collection_time: Histogram,
4335 : pub(crate) tenant_layer_count: Histogram,
4336 : pub(crate) layers_collected: IntCounter,
4337 : pub(crate) layers_selected: IntCounter,
4338 : pub(crate) layers_evicted: IntCounter,
4339 : }
4340 :
4341 : impl Default for Metrics {
4342 0 : fn default() -> Self {
4343 0 : let tenant_collection_time = register_histogram!(
4344 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
4345 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
4346 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
4347 : )
4348 0 : .unwrap();
4349 :
4350 0 : let tenant_layer_count = register_histogram!(
4351 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
4352 : "Amount of layers gathered from a tenant",
4353 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
4354 : )
4355 0 : .unwrap();
4356 :
4357 0 : let layers_collected = register_int_counter!(
4358 : "pageserver_disk_usage_based_eviction_collected_layers_total",
4359 : "Amount of layers collected"
4360 : )
4361 0 : .unwrap();
4362 :
4363 0 : let layers_selected = register_int_counter!(
4364 : "pageserver_disk_usage_based_eviction_select_layers_total",
4365 : "Amount of layers selected"
4366 : )
4367 0 : .unwrap();
4368 :
4369 0 : let layers_evicted = register_int_counter!(
4370 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
4371 : "Amount of layers successfully evicted"
4372 : )
4373 0 : .unwrap();
4374 :
4375 0 : Self {
4376 0 : tenant_collection_time,
4377 0 : tenant_layer_count,
4378 0 : layers_collected,
4379 0 : layers_selected,
4380 0 : layers_evicted,
4381 0 : }
4382 0 : }
4383 : }
4384 :
4385 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
4386 : }
4387 :
4388 106 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
4389 106 : register_uint_gauge_vec!(
4390 : "pageserver_tokio_executor_thread_configured_count",
4391 : "Total number of configued tokio executor threads in the process.
4392 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
4393 106 : &["setup"],
4394 : )
4395 106 : .unwrap()
4396 106 : });
4397 :
4398 106 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
4399 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
4400 106 : let _guard = SERIALIZE.lock().unwrap();
4401 106 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
4402 106 : TOKIO_EXECUTOR_THREAD_COUNT
4403 106 : .get_metric_with_label_values(&[setup])
4404 106 : .unwrap()
4405 106 : .set(u64::try_from(num_threads.get()).unwrap());
4406 106 : }
4407 :
4408 109 : pub(crate) static BASEBACKUP_CACHE_READ: Lazy<IntCounterVec> = Lazy::new(|| {
4409 109 : register_int_counter_vec!(
4410 : "pageserver_basebackup_cache_read_total",
4411 : "Number of read accesses to the basebackup cache grouped by hit/miss/error",
4412 109 : &["result"]
4413 : )
4414 109 : .expect("failed to define a metric")
4415 109 : });
4416 :
4417 109 : pub(crate) static BASEBACKUP_CACHE_PREPARE: Lazy<IntCounterVec> = Lazy::new(|| {
4418 109 : register_int_counter_vec!(
4419 : "pageserver_basebackup_cache_prepare_total",
4420 : "Number of prepare requests processed by the basebackup cache grouped by ok/skip/error",
4421 109 : &["result"]
4422 : )
4423 109 : .expect("failed to define a metric")
4424 109 : });
4425 :
4426 0 : pub(crate) static BASEBACKUP_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
4427 0 : register_uint_gauge!(
4428 : "pageserver_basebackup_cache_entries_total",
4429 : "Number of entries in the basebackup cache"
4430 : )
4431 0 : .expect("failed to define a metric")
4432 0 : });
4433 :
4434 0 : pub(crate) static BASEBACKUP_CACHE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {
4435 0 : register_uint_gauge!(
4436 : "pageserver_basebackup_cache_size_bytes",
4437 : "Total size of all basebackup cache entries on disk in bytes"
4438 : )
4439 0 : .expect("failed to define a metric")
4440 0 : });
4441 :
4442 0 : pub(crate) static BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {
4443 0 : register_uint_gauge!(
4444 : "pageserver_basebackup_cache_prepare_queue_size",
4445 : "Number of requests in the basebackup prepare channel"
4446 : )
4447 0 : .expect("failed to define a metric")
4448 0 : });
4449 :
4450 0 : static PAGESERVER_CONFIG_IGNORED_ITEMS: Lazy<UIntGaugeVec> = Lazy::new(|| {
4451 0 : register_uint_gauge_vec!(
4452 : "pageserver_config_ignored_items",
4453 : "TOML items present in the on-disk configuration file but ignored by the pageserver config parser.\
4454 : The `item` label is the dot-separated path of the ignored item in the on-disk configuration file.\
4455 : The value for an unknown config item is always 1.\
4456 : There is a special label value \"\", which is 0, so that there is always a metric exposed (simplifies dashboards).",
4457 0 : &["item"]
4458 : )
4459 0 : .unwrap()
4460 0 : });
4461 :
4462 0 : pub fn preinitialize_metrics(
4463 0 : conf: &'static PageServerConf,
4464 0 : ignored: config::ignored_fields::Paths,
4465 0 : ) {
4466 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
4467 :
4468 0 : PAGESERVER_CONFIG_IGNORED_ITEMS
4469 0 : .with_label_values(&[""])
4470 0 : .set(0);
4471 0 : for path in &ignored.paths {
4472 0 : PAGESERVER_CONFIG_IGNORED_ITEMS
4473 0 : .with_label_values(&[path])
4474 0 : .set(1);
4475 0 : }
4476 :
4477 : // Python tests need these and on some we do alerting.
4478 : //
4479 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
4480 : // order:
4481 : // - global metrics reside in a Lazy<PageserverMetrics>
4482 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
4483 : // - could move the statics into TimelineMetrics::new()?
4484 :
4485 : // counters
4486 0 : [
4487 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
4488 0 : &WALRECEIVER_STARTED_CONNECTIONS,
4489 0 : &WALRECEIVER_BROKER_UPDATES,
4490 0 : &WALRECEIVER_CANDIDATES_ADDED,
4491 0 : &WALRECEIVER_CANDIDATES_REMOVED,
4492 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
4493 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
4494 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
4495 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
4496 0 : &CIRCUIT_BREAKERS_BROKEN,
4497 0 : &CIRCUIT_BREAKERS_UNBROKEN,
4498 0 : &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
4499 0 : &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,
4500 0 : ]
4501 0 : .into_iter()
4502 0 : .for_each(|c| {
4503 0 : Lazy::force(c);
4504 0 : });
4505 :
4506 : // Deletion queue stats
4507 0 : Lazy::force(&DELETION_QUEUE);
4508 :
4509 : // Tenant stats
4510 0 : Lazy::force(&TENANT);
4511 :
4512 : // Tenant manager stats
4513 0 : Lazy::force(&TENANT_MANAGER);
4514 :
4515 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
4516 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
4517 :
4518 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
4519 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
4520 0 : // values from last restart.
4521 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
4522 0 : }
4523 :
4524 : // countervecs
4525 0 : [
4526 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
4527 0 : &SMGR_QUERY_STARTED_GLOBAL,
4528 0 : &PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL,
4529 0 : ]
4530 0 : .into_iter()
4531 0 : .for_each(|c| {
4532 0 : Lazy::force(c);
4533 0 : });
4534 :
4535 : // gauges
4536 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
4537 :
4538 : // histograms
4539 0 : [
4540 0 : &LAYERS_PER_READ_GLOBAL,
4541 0 : &LAYERS_PER_READ_BATCH_GLOBAL,
4542 0 : &LAYERS_PER_READ_AMORTIZED_GLOBAL,
4543 0 : &DELTAS_PER_READ_GLOBAL,
4544 0 : &WAIT_LSN_TIME,
4545 0 : &WAL_REDO_TIME,
4546 0 : &WAL_REDO_RECORDS_HISTOGRAM,
4547 0 : &WAL_REDO_BYTES_HISTOGRAM,
4548 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
4549 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
4550 0 : &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
4551 0 : ]
4552 0 : .into_iter()
4553 0 : .for_each(|h| {
4554 0 : Lazy::force(h);
4555 0 : });
4556 :
4557 : // Custom
4558 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
4559 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
4560 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
4561 :
4562 0 : tenant_throttling::preinitialize_global_metrics();
4563 0 : wait_ondemand_download_time::preinitialize_global_metrics();
4564 0 : }
|