Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::metric_vec_duration::DurationResultObserver;
3 : use metrics::{
4 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
5 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
6 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
7 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
8 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
9 : };
10 : use once_cell::sync::Lazy;
11 : use pageserver_api::shard::TenantShardId;
12 : use strum::{EnumCount, IntoEnumIterator, VariantNames};
13 : use strum_macros::{EnumVariantNames, IntoStaticStr};
14 : use utils::id::TimelineId;
15 :
16 : /// Prometheus histogram buckets (in seconds) for operations in the critical
17 : /// path. In other words, operations that directly affect that latency of user
18 : /// queries.
19 : ///
20 : /// The buckets capture the majority of latencies in the microsecond and
21 : /// millisecond range but also extend far enough up to distinguish "bad" from
22 : /// "really bad".
23 : const CRITICAL_OP_BUCKETS: &[f64] = &[
24 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
25 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
26 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
27 : ];
28 :
29 : // Metrics collected on operations on the storage repository.
30 2072 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
31 : #[strum(serialize_all = "kebab_case")]
32 : pub(crate) enum StorageTimeOperation {
33 : #[strum(serialize = "layer flush")]
34 : LayerFlush,
35 :
36 : #[strum(serialize = "compact")]
37 : Compact,
38 :
39 : #[strum(serialize = "create images")]
40 : CreateImages,
41 :
42 : #[strum(serialize = "logical size")]
43 : LogicalSize,
44 :
45 : #[strum(serialize = "imitate logical size")]
46 : ImitateLogicalSize,
47 :
48 : #[strum(serialize = "load layer map")]
49 : LoadLayerMap,
50 :
51 : #[strum(serialize = "gc")]
52 : Gc,
53 :
54 : #[strum(serialize = "create tenant")]
55 : CreateTenant,
56 : }
57 :
58 80 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
59 80 : register_counter_vec!(
60 80 : "pageserver_storage_operations_seconds_sum",
61 80 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
62 80 : &["operation", "tenant_id", "shard_id", "timeline_id"],
63 80 : )
64 80 : .expect("failed to define a metric")
65 80 : });
66 :
67 80 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
68 80 : register_int_counter_vec!(
69 80 : "pageserver_storage_operations_seconds_count",
70 80 : "Count of storage operations with operation, tenant and timeline dimensions",
71 80 : &["operation", "tenant_id", "shard_id", "timeline_id"],
72 80 : )
73 80 : .expect("failed to define a metric")
74 80 : });
75 :
76 : // Buckets for background operations like compaction, GC, size calculation
77 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
78 :
79 80 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
80 80 : register_histogram_vec!(
81 80 : "pageserver_storage_operations_seconds_global",
82 80 : "Time spent on storage operations",
83 80 : &["operation"],
84 80 : STORAGE_OP_BUCKETS.into(),
85 80 : )
86 80 : .expect("failed to define a metric")
87 80 : });
88 :
89 78 : pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
90 78 : register_histogram!(
91 78 : "pageserver_read_num_fs_layers",
92 78 : "Number of persistent layers accessed for processing a read request, including those in the cache",
93 78 : vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
94 78 : )
95 78 : .expect("failed to define a metric")
96 78 : });
97 :
98 : // Metrics collected on operations on the storage repository.
99 :
100 : pub(crate) struct ReconstructTimeMetrics {
101 : ok: Histogram,
102 : err: Histogram,
103 : }
104 :
105 78 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
106 78 : let inner = register_histogram_vec!(
107 78 : "pageserver_getpage_reconstruct_seconds",
108 78 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
109 78 : &["result"],
110 78 : CRITICAL_OP_BUCKETS.into(),
111 78 : )
112 78 : .expect("failed to define a metric");
113 78 : ReconstructTimeMetrics {
114 78 : ok: inner.get_metric_with_label_values(&["ok"]).unwrap(),
115 78 : err: inner.get_metric_with_label_values(&["err"]).unwrap(),
116 78 : }
117 78 : });
118 :
119 : impl ReconstructTimeMetrics {
120 502416 : pub(crate) fn for_result<T, E>(&self, result: &Result<T, E>) -> &Histogram {
121 502416 : match result {
122 502416 : Ok(_) => &self.ok,
123 0 : Err(_) => &self.err,
124 : }
125 502416 : }
126 : }
127 :
128 0 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
129 0 : register_int_counter!(
130 0 : "pageserver_materialized_cache_hits_direct_total",
131 0 : "Number of cache hits from materialized page cache without redo",
132 0 : )
133 0 : .expect("failed to define a metric")
134 0 : });
135 :
136 78 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<Histogram> = Lazy::new(|| {
137 78 : register_histogram!(
138 78 : "pageserver_getpage_get_reconstruct_data_seconds",
139 78 : "Time spent in get_reconstruct_value_data",
140 78 : CRITICAL_OP_BUCKETS.into(),
141 78 : )
142 78 : .expect("failed to define a metric")
143 78 : });
144 :
145 0 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
146 0 : register_int_counter!(
147 0 : "pageserver_materialized_cache_hits_total",
148 0 : "Number of cache hits from materialized page cache",
149 0 : )
150 0 : .expect("failed to define a metric")
151 0 : });
152 :
153 : pub(crate) struct GetVectoredLatency {
154 : map: EnumMap<TaskKind, Option<Histogram>>,
155 : }
156 :
157 : impl GetVectoredLatency {
158 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
159 : // cardinality of the metric.
160 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
161 :
162 444 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
163 444 : self.map[task_kind].as_ref()
164 444 : }
165 : }
166 :
167 74 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
168 74 : let inner = register_histogram_vec!(
169 74 : "pageserver_get_vectored_seconds",
170 74 : "Time spent in get_vectored",
171 74 : &["task_kind"],
172 74 : CRITICAL_OP_BUCKETS.into(),
173 74 : )
174 74 : .expect("failed to define a metric");
175 74 :
176 74 : GetVectoredLatency {
177 1998 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
178 1998 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
179 1998 :
180 1998 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
181 148 : let task_kind = task_kind.into();
182 148 : Some(inner.with_label_values(&[task_kind]))
183 : } else {
184 1850 : None
185 : }
186 1998 : })),
187 74 : }
188 74 : });
189 :
190 : pub(crate) struct PageCacheMetricsForTaskKind {
191 : pub read_accesses_materialized_page: IntCounter,
192 : pub read_accesses_immutable: IntCounter,
193 :
194 : pub read_hits_immutable: IntCounter,
195 : pub read_hits_materialized_page_exact: IntCounter,
196 : pub read_hits_materialized_page_older_lsn: IntCounter,
197 : }
198 :
199 : pub(crate) struct PageCacheMetrics {
200 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
201 : }
202 :
203 80 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
204 80 : register_int_counter_vec!(
205 80 : "pageserver_page_cache_read_hits_total",
206 80 : "Number of read accesses to the page cache that hit",
207 80 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
208 80 : )
209 80 : .expect("failed to define a metric")
210 80 : });
211 :
212 80 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
213 80 : register_int_counter_vec!(
214 80 : "pageserver_page_cache_read_accesses_total",
215 80 : "Number of read accesses to the page cache",
216 80 : &["task_kind", "key_kind", "content_kind"]
217 80 : )
218 80 : .expect("failed to define a metric")
219 80 : });
220 :
221 80 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
222 2160 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
223 2160 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
224 2160 : let task_kind: &'static str = task_kind.into();
225 12960 : EnumMap::from_array(std::array::from_fn(|content_kind| {
226 12960 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
227 12960 : let content_kind: &'static str = content_kind.into();
228 12960 : PageCacheMetricsForTaskKind {
229 12960 : read_accesses_materialized_page: {
230 12960 : PAGE_CACHE_READ_ACCESSES
231 12960 : .get_metric_with_label_values(&[
232 12960 : task_kind,
233 12960 : "materialized_page",
234 12960 : content_kind,
235 12960 : ])
236 12960 : .unwrap()
237 12960 : },
238 12960 :
239 12960 : read_accesses_immutable: {
240 12960 : PAGE_CACHE_READ_ACCESSES
241 12960 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
242 12960 : .unwrap()
243 12960 : },
244 12960 :
245 12960 : read_hits_immutable: {
246 12960 : PAGE_CACHE_READ_HITS
247 12960 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
248 12960 : .unwrap()
249 12960 : },
250 12960 :
251 12960 : read_hits_materialized_page_exact: {
252 12960 : PAGE_CACHE_READ_HITS
253 12960 : .get_metric_with_label_values(&[
254 12960 : task_kind,
255 12960 : "materialized_page",
256 12960 : content_kind,
257 12960 : "exact",
258 12960 : ])
259 12960 : .unwrap()
260 12960 : },
261 12960 :
262 12960 : read_hits_materialized_page_older_lsn: {
263 12960 : PAGE_CACHE_READ_HITS
264 12960 : .get_metric_with_label_values(&[
265 12960 : task_kind,
266 12960 : "materialized_page",
267 12960 : content_kind,
268 12960 : "older_lsn",
269 12960 : ])
270 12960 : .unwrap()
271 12960 : },
272 12960 : }
273 12960 : }))
274 2160 : })),
275 80 : });
276 :
277 : impl PageCacheMetrics {
278 10219006 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
279 10219006 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
280 10219006 : }
281 : }
282 :
283 : pub(crate) struct PageCacheSizeMetrics {
284 : pub max_bytes: UIntGauge,
285 :
286 : pub current_bytes_immutable: UIntGauge,
287 : pub current_bytes_materialized_page: UIntGauge,
288 : }
289 :
290 80 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
291 80 : register_uint_gauge_vec!(
292 80 : "pageserver_page_cache_size_current_bytes",
293 80 : "Current size of the page cache in bytes, by key kind",
294 80 : &["key_kind"]
295 80 : )
296 80 : .expect("failed to define a metric")
297 80 : });
298 :
299 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
300 80 : Lazy::new(|| PageCacheSizeMetrics {
301 80 : max_bytes: {
302 80 : register_uint_gauge!(
303 80 : "pageserver_page_cache_size_max_bytes",
304 80 : "Maximum size of the page cache in bytes"
305 80 : )
306 80 : .expect("failed to define a metric")
307 80 : },
308 80 : current_bytes_immutable: {
309 80 : PAGE_CACHE_SIZE_CURRENT_BYTES
310 80 : .get_metric_with_label_values(&["immutable"])
311 80 : .unwrap()
312 80 : },
313 80 : current_bytes_materialized_page: {
314 80 : PAGE_CACHE_SIZE_CURRENT_BYTES
315 80 : .get_metric_with_label_values(&["materialized_page"])
316 80 : .unwrap()
317 80 : },
318 80 : });
319 :
320 : pub(crate) mod page_cache_eviction_metrics {
321 : use std::num::NonZeroUsize;
322 :
323 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
324 : use once_cell::sync::Lazy;
325 :
326 0 : #[derive(Clone, Copy)]
327 : pub(crate) enum Outcome {
328 : FoundSlotUnused { iters: NonZeroUsize },
329 : FoundSlotEvicted { iters: NonZeroUsize },
330 : ItersExceeded { iters: NonZeroUsize },
331 : }
332 :
333 30 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
334 30 : register_int_counter_vec!(
335 30 : "pageserver_page_cache_find_victim_iters_total",
336 30 : "Counter for the number of iterations in the find_victim loop",
337 30 : &["outcome"],
338 30 : )
339 30 : .expect("failed to define a metric")
340 30 : });
341 :
342 30 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
343 30 : register_int_counter_vec!(
344 30 : "pageserver_page_cache_find_victim_calls",
345 30 : "Incremented at the end of each find_victim() call.\
346 30 : Filter by outcome to get e.g., eviction rate.",
347 30 : &["outcome"]
348 30 : )
349 30 : .unwrap()
350 30 : });
351 :
352 125338 : pub(crate) fn observe(outcome: Outcome) {
353 125338 : macro_rules! dry {
354 125338 : ($label:literal, $iters:expr) => {{
355 125338 : static LABEL: &'static str = $label;
356 125338 : static ITERS_TOTAL: Lazy<IntCounter> =
357 125338 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
358 125338 : static CALLS: Lazy<IntCounter> =
359 125338 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
360 125338 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
361 125338 : CALLS.inc();
362 125338 : }};
363 125338 : }
364 125338 : match outcome {
365 904 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
366 124434 : Outcome::FoundSlotEvicted { iters } => {
367 124434 : dry!("found_evicted", iters)
368 : }
369 0 : Outcome::ItersExceeded { iters } => {
370 0 : dry!("err_iters_exceeded", iters);
371 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
372 0 : }
373 : }
374 125338 : }
375 : }
376 :
377 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
378 0 : register_int_counter_vec!(
379 0 : "page_cache_errors_total",
380 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
381 0 : &["error_kind"]
382 0 : )
383 0 : .expect("failed to define a metric")
384 0 : });
385 :
386 0 : #[derive(IntoStaticStr)]
387 : #[strum(serialize_all = "kebab_case")]
388 : pub(crate) enum PageCacheErrorKind {
389 : AcquirePinnedSlotTimeout,
390 : EvictIterLimit,
391 : }
392 :
393 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
394 0 : PAGE_CACHE_ERRORS
395 0 : .get_metric_with_label_values(&[error_kind.into()])
396 0 : .unwrap()
397 0 : .inc();
398 0 : }
399 :
400 10 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
401 10 : register_histogram!(
402 10 : "pageserver_wait_lsn_seconds",
403 10 : "Time spent waiting for WAL to arrive",
404 10 : CRITICAL_OP_BUCKETS.into(),
405 10 : )
406 10 : .expect("failed to define a metric")
407 10 : });
408 :
409 80 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
410 80 : register_int_gauge_vec!(
411 80 : "pageserver_last_record_lsn",
412 80 : "Last record LSN grouped by timeline",
413 80 : &["tenant_id", "shard_id", "timeline_id"]
414 80 : )
415 80 : .expect("failed to define a metric")
416 80 : });
417 :
418 80 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
419 80 : register_uint_gauge_vec!(
420 80 : "pageserver_resident_physical_size",
421 80 : "The size of the layer files present in the pageserver's filesystem.",
422 80 : &["tenant_id", "shard_id", "timeline_id"]
423 80 : )
424 80 : .expect("failed to define a metric")
425 80 : });
426 :
427 78 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
428 78 : register_uint_gauge!(
429 78 : "pageserver_resident_physical_size_global",
430 78 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
431 78 : )
432 78 : .expect("failed to define a metric")
433 78 : });
434 :
435 80 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
436 80 : register_uint_gauge_vec!(
437 80 : "pageserver_remote_physical_size",
438 80 : "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
439 80 : // Corollary: If any files are missing from the index part, they won't be included here.
440 80 : &["tenant_id", "shard_id", "timeline_id"]
441 80 : )
442 80 : .expect("failed to define a metric")
443 80 : });
444 :
445 80 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
446 80 : register_uint_gauge!(
447 80 : "pageserver_remote_physical_size_global",
448 80 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
449 80 : )
450 80 : .expect("failed to define a metric")
451 80 : });
452 :
453 0 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
454 0 : register_int_counter!(
455 0 : "pageserver_remote_ondemand_downloaded_layers_total",
456 0 : "Total on-demand downloaded layers"
457 0 : )
458 0 : .unwrap()
459 0 : });
460 :
461 0 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
462 0 : register_int_counter!(
463 0 : "pageserver_remote_ondemand_downloaded_bytes_total",
464 0 : "Total bytes of layers on-demand downloaded",
465 0 : )
466 0 : .unwrap()
467 0 : });
468 :
469 80 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
470 80 : register_uint_gauge_vec!(
471 80 : "pageserver_current_logical_size",
472 80 : "Current logical size grouped by timeline",
473 80 : &["tenant_id", "shard_id", "timeline_id"]
474 80 : )
475 80 : .expect("failed to define current logical size metric")
476 80 : });
477 :
478 : pub(crate) mod initial_logical_size {
479 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
480 : use once_cell::sync::Lazy;
481 :
482 : pub(crate) struct StartCalculation(IntCounterVec);
483 80 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
484 80 : StartCalculation(
485 80 : register_int_counter_vec!(
486 80 : "pageserver_initial_logical_size_start_calculation",
487 80 : "Incremented each time we start an initial logical size calculation attempt. \
488 80 : The `circumstances` label provides some additional details.",
489 80 : &["attempt", "circumstances"]
490 80 : )
491 80 : .unwrap(),
492 80 : )
493 80 : });
494 :
495 : struct DropCalculation {
496 : first: IntCounter,
497 : retry: IntCounter,
498 : }
499 :
500 80 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
501 80 : let vec = register_int_counter_vec!(
502 80 : "pageserver_initial_logical_size_drop_calculation",
503 80 : "Incremented each time we abort a started size calculation attmpt.",
504 80 : &["attempt"]
505 80 : )
506 80 : .unwrap();
507 80 : DropCalculation {
508 80 : first: vec.with_label_values(&["first"]),
509 80 : retry: vec.with_label_values(&["retry"]),
510 80 : }
511 80 : });
512 :
513 : pub(crate) struct Calculated {
514 : pub(crate) births: IntCounter,
515 : pub(crate) deaths: IntCounter,
516 : }
517 :
518 80 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
519 80 : births: register_int_counter!(
520 80 : "pageserver_initial_logical_size_finish_calculation",
521 80 : "Incremented every time we finish calculation of initial logical size.\
522 80 : If everything is working well, this should happen at most once per Timeline object."
523 80 : )
524 80 : .unwrap(),
525 80 : deaths: register_int_counter!(
526 80 : "pageserver_initial_logical_size_drop_finished_calculation",
527 80 : "Incremented when we drop a finished initial logical size calculation result.\
528 80 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
529 80 : )
530 80 : .unwrap(),
531 80 : });
532 :
533 : pub(crate) struct OngoingCalculationGuard {
534 : inc_drop_calculation: Option<IntCounter>,
535 : }
536 :
537 80 : #[derive(strum_macros::IntoStaticStr)]
538 : pub(crate) enum StartCircumstances {
539 : EmptyInitial,
540 : SkippedConcurrencyLimiter,
541 : AfterBackgroundTasksRateLimit,
542 : }
543 :
544 : impl StartCalculation {
545 80 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
546 80 : let circumstances_label: &'static str = circumstances.into();
547 80 : self.0
548 80 : .with_label_values(&["first", circumstances_label])
549 80 : .inc();
550 80 : OngoingCalculationGuard {
551 80 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
552 80 : }
553 80 : }
554 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
555 0 : let circumstances_label: &'static str = circumstances.into();
556 0 : self.0
557 0 : .with_label_values(&["retry", circumstances_label])
558 0 : .inc();
559 0 : OngoingCalculationGuard {
560 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
561 0 : }
562 0 : }
563 : }
564 :
565 : impl Drop for OngoingCalculationGuard {
566 80 : fn drop(&mut self) {
567 80 : if let Some(counter) = self.inc_drop_calculation.take() {
568 0 : counter.inc();
569 80 : }
570 80 : }
571 : }
572 :
573 : impl OngoingCalculationGuard {
574 80 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
575 80 : drop(self.inc_drop_calculation.take());
576 80 : CALCULATED.births.inc();
577 80 : FinishedCalculationGuard {
578 80 : inc_on_drop: CALCULATED.deaths.clone(),
579 80 : }
580 80 : }
581 : }
582 :
583 : pub(crate) struct FinishedCalculationGuard {
584 : inc_on_drop: IntCounter,
585 : }
586 :
587 : impl Drop for FinishedCalculationGuard {
588 6 : fn drop(&mut self) {
589 6 : self.inc_on_drop.inc();
590 6 : }
591 : }
592 :
593 : // context: https://github.com/neondatabase/neon/issues/5963
594 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
595 0 : Lazy::new(|| {
596 0 : register_int_counter!(
597 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
598 0 : "Counter for the following event: walreceiver calls\
599 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
600 0 : )
601 0 : .unwrap()
602 0 : });
603 : }
604 :
605 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
606 0 : register_uint_gauge_vec!(
607 0 : "pageserver_directory_entries_count",
608 0 : "Sum of the entries in pageserver-stored directory listings",
609 0 : &["tenant_id", "shard_id", "timeline_id"]
610 0 : )
611 0 : .expect("failed to define a metric")
612 0 : });
613 :
614 79 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
615 79 : register_uint_gauge_vec!(
616 79 : "pageserver_tenant_states_count",
617 79 : "Count of tenants per state",
618 79 : &["state"]
619 79 : )
620 79 : .expect("Failed to register pageserver_tenant_states_count metric")
621 79 : });
622 :
623 : /// A set of broken tenants.
624 : ///
625 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
626 : /// tenant.
627 4 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
628 4 : register_uint_gauge_vec!(
629 4 : "pageserver_broken_tenants_count",
630 4 : "Set of broken tenants",
631 4 : &["tenant_id", "shard_id"]
632 4 : )
633 4 : .expect("Failed to register pageserver_tenant_states_count metric")
634 4 : });
635 :
636 82 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
637 82 : register_uint_gauge_vec!(
638 82 : "pageserver_tenant_synthetic_cached_size_bytes",
639 82 : "Synthetic size of each tenant in bytes",
640 82 : &["tenant_id"]
641 82 : )
642 82 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
643 82 : });
644 :
645 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
646 0 : register_histogram_vec!(
647 0 : "pageserver_eviction_iteration_duration_seconds_global",
648 0 : "Time spent on a single eviction iteration",
649 0 : &["period_secs", "threshold_secs"],
650 0 : STORAGE_OP_BUCKETS.into(),
651 0 : )
652 0 : .expect("failed to define a metric")
653 0 : });
654 :
655 80 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
656 80 : register_int_counter_vec!(
657 80 : "pageserver_evictions",
658 80 : "Number of layers evicted from the pageserver",
659 80 : &["tenant_id", "shard_id", "timeline_id"]
660 80 : )
661 80 : .expect("failed to define a metric")
662 80 : });
663 :
664 80 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
665 80 : register_int_counter_vec!(
666 80 : "pageserver_evictions_with_low_residence_duration",
667 80 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
668 80 : Residence duration is determined using the `residence_duration_data_source`.",
669 80 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
670 80 : )
671 80 : .expect("failed to define a metric")
672 80 : });
673 :
674 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
675 0 : register_int_counter!(
676 0 : "pageserver_unexpected_ondemand_downloads_count",
677 0 : "Number of unexpected on-demand downloads. \
678 0 : We log more context for each increment, so, forgo any labels in this metric.",
679 0 : )
680 0 : .expect("failed to define a metric")
681 0 : });
682 :
683 : /// How long did we take to start up? Broken down by labels to describe
684 : /// different phases of startup.
685 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
686 0 : register_gauge_vec!(
687 0 : "pageserver_startup_duration_seconds",
688 0 : "Time taken by phases of pageserver startup, in seconds",
689 0 : &["phase"]
690 0 : )
691 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
692 0 : });
693 :
694 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
695 0 : register_uint_gauge!(
696 0 : "pageserver_startup_is_loading",
697 0 : "1 while in initial startup load of tenants, 0 at other times"
698 0 : )
699 0 : .expect("Failed to register pageserver_startup_is_loading")
700 0 : });
701 :
702 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
703 : /// like how long it took to load.
704 : ///
705 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
706 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
707 : /// at a timeline level than tenant level.
708 : pub(crate) struct TenantMetrics {
709 : /// How long did tenants take to go from construction to active state?
710 : pub(crate) activation: Histogram,
711 : pub(crate) preload: Histogram,
712 : pub(crate) attach: Histogram,
713 :
714 : /// How many tenants are included in the initial startup of the pagesrever?
715 : pub(crate) startup_scheduled: IntCounter,
716 : pub(crate) startup_complete: IntCounter,
717 : }
718 :
719 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
720 0 : TenantMetrics {
721 0 : activation: register_histogram!(
722 0 : "pageserver_tenant_activation_seconds",
723 0 : "Time taken by tenants to activate, in seconds",
724 0 : CRITICAL_OP_BUCKETS.into()
725 0 : )
726 0 : .expect("Failed to register metric"),
727 0 : preload: register_histogram!(
728 0 : "pageserver_tenant_preload_seconds",
729 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
730 0 : CRITICAL_OP_BUCKETS.into()
731 0 : )
732 0 : .expect("Failed to register metric"),
733 0 : attach: register_histogram!(
734 0 : "pageserver_tenant_attach_seconds",
735 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
736 0 : CRITICAL_OP_BUCKETS.into()
737 0 : )
738 0 : .expect("Failed to register metric"),
739 0 : startup_scheduled: register_int_counter!(
740 0 : "pageserver_tenant_startup_scheduled",
741 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
742 0 : ).expect("Failed to register metric"),
743 0 : startup_complete: register_int_counter!(
744 0 : "pageserver_tenant_startup_complete",
745 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
746 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
747 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
748 0 : ).expect("Failed to register metric"),
749 0 : }
750 0 : });
751 :
752 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
753 0 : #[derive(Debug)]
754 : pub(crate) struct EvictionsWithLowResidenceDuration {
755 : data_source: &'static str,
756 : threshold: Duration,
757 : counter: Option<IntCounter>,
758 : }
759 :
760 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
761 : data_source: &'static str,
762 : threshold: Duration,
763 : }
764 :
765 : impl EvictionsWithLowResidenceDurationBuilder {
766 296 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
767 296 : Self {
768 296 : data_source,
769 296 : threshold,
770 296 : }
771 296 : }
772 :
773 296 : fn build(
774 296 : &self,
775 296 : tenant_id: &str,
776 296 : shard_id: &str,
777 296 : timeline_id: &str,
778 296 : ) -> EvictionsWithLowResidenceDuration {
779 296 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
780 296 : .get_metric_with_label_values(&[
781 296 : tenant_id,
782 296 : shard_id,
783 296 : timeline_id,
784 296 : self.data_source,
785 296 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
786 296 : ])
787 296 : .unwrap();
788 296 : EvictionsWithLowResidenceDuration {
789 296 : data_source: self.data_source,
790 296 : threshold: self.threshold,
791 296 : counter: Some(counter),
792 296 : }
793 296 : }
794 : }
795 :
796 : impl EvictionsWithLowResidenceDuration {
797 304 : fn threshold_label_value(threshold: Duration) -> String {
798 304 : format!("{}", threshold.as_secs())
799 304 : }
800 :
801 6 : pub fn observe(&self, observed_value: Duration) {
802 6 : if observed_value < self.threshold {
803 6 : self.counter
804 6 : .as_ref()
805 6 : .expect("nobody calls this function after `remove_from_vec`")
806 6 : .inc();
807 6 : }
808 6 : }
809 :
810 0 : pub fn change_threshold(
811 0 : &mut self,
812 0 : tenant_id: &str,
813 0 : shard_id: &str,
814 0 : timeline_id: &str,
815 0 : new_threshold: Duration,
816 0 : ) {
817 0 : if new_threshold == self.threshold {
818 0 : return;
819 0 : }
820 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
821 0 : self.data_source,
822 0 : new_threshold,
823 0 : )
824 0 : .build(tenant_id, shard_id, timeline_id);
825 0 : std::mem::swap(self, &mut with_new);
826 0 : with_new.remove(tenant_id, shard_id, timeline_id);
827 0 : }
828 :
829 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
830 8 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
831 8 : let Some(_counter) = self.counter.take() else {
832 0 : return;
833 : };
834 :
835 8 : let threshold = Self::threshold_label_value(self.threshold);
836 8 :
837 8 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
838 8 : tenant_id,
839 8 : shard_id,
840 8 : timeline_id,
841 8 : self.data_source,
842 8 : &threshold,
843 8 : ]);
844 8 :
845 8 : match removed {
846 0 : Err(e) => {
847 0 : // this has been hit in staging as
848 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
849 0 : // because we can be in the drop path already, don't risk:
850 0 : // - "double-panic => illegal instruction" or
851 0 : // - future "drop panick => abort"
852 0 : //
853 0 : // so just nag: (the error has the labels)
854 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
855 : }
856 : Ok(()) => {
857 : // to help identify cases where we double-remove the same values, let's log all
858 : // deletions?
859 8 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
860 : }
861 : }
862 8 : }
863 : }
864 :
865 : // Metrics collected on disk IO operations
866 : //
867 : // Roughly logarithmic scale.
868 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
869 : 0.000030, // 30 usec
870 : 0.001000, // 1000 usec
871 : 0.030, // 30 ms
872 : 1.000, // 1000 ms
873 : 30.000, // 30000 ms
874 : ];
875 :
876 : /// VirtualFile fs operation variants.
877 : ///
878 : /// Operations:
879 : /// - open ([`std::fs::OpenOptions::open`])
880 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
881 : /// - close-by-replace (close by replacement algorithm)
882 : /// - read (`read_at`)
883 : /// - write (`write_at`)
884 : /// - seek (modify internal position or file length query)
885 : /// - fsync ([`std::fs::File::sync_all`])
886 : /// - metadata ([`std::fs::File::metadata`])
887 : #[derive(
888 900 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
889 : )]
890 : pub(crate) enum StorageIoOperation {
891 : Open,
892 : OpenAfterReplace,
893 : Close,
894 : CloseByReplace,
895 : Read,
896 : Write,
897 : Seek,
898 : Fsync,
899 : Metadata,
900 : }
901 :
902 : impl StorageIoOperation {
903 900 : pub fn as_str(&self) -> &'static str {
904 900 : match self {
905 100 : StorageIoOperation::Open => "open",
906 100 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
907 100 : StorageIoOperation::Close => "close",
908 100 : StorageIoOperation::CloseByReplace => "close-by-replace",
909 100 : StorageIoOperation::Read => "read",
910 100 : StorageIoOperation::Write => "write",
911 100 : StorageIoOperation::Seek => "seek",
912 100 : StorageIoOperation::Fsync => "fsync",
913 100 : StorageIoOperation::Metadata => "metadata",
914 : }
915 900 : }
916 : }
917 :
918 : /// Tracks time taken by fs operations near VirtualFile.
919 0 : #[derive(Debug)]
920 : pub(crate) struct StorageIoTime {
921 : metrics: [Histogram; StorageIoOperation::COUNT],
922 : }
923 :
924 : impl StorageIoTime {
925 100 : fn new() -> Self {
926 100 : let storage_io_histogram_vec = register_histogram_vec!(
927 100 : "pageserver_io_operations_seconds",
928 100 : "Time spent in IO operations",
929 100 : &["operation"],
930 100 : STORAGE_IO_TIME_BUCKETS.into()
931 100 : )
932 100 : .expect("failed to define a metric");
933 900 : let metrics = std::array::from_fn(|i| {
934 900 : let op = StorageIoOperation::from_repr(i).unwrap();
935 900 : storage_io_histogram_vec
936 900 : .get_metric_with_label_values(&[op.as_str()])
937 900 : .unwrap()
938 900 : });
939 100 : Self { metrics }
940 100 : }
941 :
942 800791 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
943 800791 : &self.metrics[op as usize]
944 800791 : }
945 : }
946 :
947 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
948 :
949 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
950 :
951 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
952 100 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
953 100 : register_int_gauge_vec!(
954 100 : "pageserver_io_operations_bytes_total",
955 100 : "Total amount of bytes read/written in IO operations",
956 100 : &["operation", "tenant_id", "shard_id", "timeline_id"]
957 100 : )
958 100 : .expect("failed to define a metric")
959 100 : });
960 :
961 : #[cfg(not(test))]
962 : pub(crate) mod virtual_file_descriptor_cache {
963 : use super::*;
964 :
965 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
966 0 : register_uint_gauge!(
967 0 : "pageserver_virtual_file_descriptor_cache_size_max",
968 0 : "Maximum number of open file descriptors in the cache."
969 0 : )
970 0 : .unwrap()
971 0 : });
972 :
973 : // SIZE_CURRENT: derive it like so:
974 : // ```
975 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
976 : // -ignoring(operation)
977 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
978 : // ```
979 : }
980 :
981 : #[cfg(not(test))]
982 : pub(crate) mod virtual_file_io_engine {
983 : use super::*;
984 :
985 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
986 0 : register_uint_gauge_vec!(
987 0 : "pageserver_virtual_file_io_engine_kind",
988 0 : "The configured io engine for VirtualFile",
989 0 : &["kind"],
990 0 : )
991 0 : .unwrap()
992 0 : });
993 : }
994 :
995 0 : #[derive(Debug)]
996 : struct GlobalAndPerTimelineHistogram {
997 : global: Histogram,
998 : per_tenant_timeline: Histogram,
999 : }
1000 :
1001 : impl GlobalAndPerTimelineHistogram {
1002 10 : fn observe(&self, value: f64) {
1003 10 : self.global.observe(value);
1004 10 : self.per_tenant_timeline.observe(value);
1005 10 : }
1006 : }
1007 :
1008 : struct GlobalAndPerTimelineHistogramTimer<'a> {
1009 : h: &'a GlobalAndPerTimelineHistogram,
1010 : start: std::time::Instant,
1011 : }
1012 :
1013 : impl<'a> Drop for GlobalAndPerTimelineHistogramTimer<'a> {
1014 10 : fn drop(&mut self) {
1015 10 : let elapsed = self.start.elapsed();
1016 10 : self.h.observe(elapsed.as_secs_f64());
1017 10 : }
1018 : }
1019 :
1020 : #[derive(
1021 0 : Debug,
1022 0 : Clone,
1023 : Copy,
1024 3110 : IntoStaticStr,
1025 : strum_macros::EnumCount,
1026 112 : strum_macros::EnumIter,
1027 1530 : strum_macros::FromRepr,
1028 : )]
1029 : #[strum(serialize_all = "snake_case")]
1030 : pub enum SmgrQueryType {
1031 : GetRelExists,
1032 : GetRelSize,
1033 : GetPageAtLsn,
1034 : GetDbSize,
1035 : GetSlruSegment,
1036 : }
1037 :
1038 0 : #[derive(Debug)]
1039 : pub(crate) struct SmgrQueryTimePerTimeline {
1040 : metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
1041 : }
1042 :
1043 82 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1044 82 : register_histogram_vec!(
1045 82 : "pageserver_smgr_query_seconds",
1046 82 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1047 82 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1048 82 : CRITICAL_OP_BUCKETS.into(),
1049 82 : )
1050 82 : .expect("failed to define a metric")
1051 82 : });
1052 :
1053 82 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1054 82 : [
1055 82 : 1,
1056 82 : 10,
1057 82 : 20,
1058 82 : 40,
1059 82 : 60,
1060 82 : 80,
1061 82 : 100,
1062 82 : 200,
1063 82 : 300,
1064 82 : 400,
1065 82 : 500,
1066 82 : 600,
1067 82 : 700,
1068 82 : 800,
1069 82 : 900,
1070 82 : 1_000, // 1ms
1071 82 : 2_000,
1072 82 : 4_000,
1073 82 : 6_000,
1074 82 : 8_000,
1075 82 : 10_000, // 10ms
1076 82 : 20_000,
1077 82 : 40_000,
1078 82 : 60_000,
1079 82 : 80_000,
1080 82 : 100_000,
1081 82 : 200_000,
1082 82 : 400_000,
1083 82 : 600_000,
1084 82 : 800_000,
1085 82 : 1_000_000, // 1s
1086 82 : 2_000_000,
1087 82 : 4_000_000,
1088 82 : 6_000_000,
1089 82 : 8_000_000,
1090 82 : 10_000_000, // 10s
1091 82 : 20_000_000,
1092 82 : 50_000_000,
1093 82 : 100_000_000,
1094 82 : 200_000_000,
1095 82 : 1_000_000_000, // 1000s
1096 82 : ]
1097 82 : .into_iter()
1098 82 : .map(Duration::from_micros)
1099 3362 : .map(|d| d.as_secs_f64())
1100 82 : .collect()
1101 82 : });
1102 :
1103 82 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1104 82 : register_histogram_vec!(
1105 82 : "pageserver_smgr_query_seconds_global",
1106 82 : "Time spent on smgr query handling, aggregated by query type.",
1107 82 : &["smgr_query_type"],
1108 82 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1109 82 : )
1110 82 : .expect("failed to define a metric")
1111 82 : });
1112 :
1113 : impl SmgrQueryTimePerTimeline {
1114 306 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1115 306 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1116 306 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1117 306 : let timeline_id = timeline_id.to_string();
1118 1530 : let metrics = std::array::from_fn(|i| {
1119 1530 : let op = SmgrQueryType::from_repr(i).unwrap();
1120 1530 : let global = SMGR_QUERY_TIME_GLOBAL
1121 1530 : .get_metric_with_label_values(&[op.into()])
1122 1530 : .unwrap();
1123 1530 : let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1124 1530 : .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id])
1125 1530 : .unwrap();
1126 1530 : GlobalAndPerTimelineHistogram {
1127 1530 : global,
1128 1530 : per_tenant_timeline,
1129 1530 : }
1130 1530 : });
1131 306 : Self { metrics }
1132 306 : }
1133 10 : pub(crate) fn start_timer(&self, op: SmgrQueryType) -> impl Drop + '_ {
1134 10 : let metric = &self.metrics[op as usize];
1135 10 : GlobalAndPerTimelineHistogramTimer {
1136 10 : h: metric,
1137 10 : start: std::time::Instant::now(),
1138 10 : }
1139 10 : }
1140 : }
1141 :
1142 : #[cfg(test)]
1143 : mod smgr_query_time_tests {
1144 : use pageserver_api::shard::TenantShardId;
1145 : use strum::IntoEnumIterator;
1146 : use utils::id::{TenantId, TimelineId};
1147 :
1148 : // Regression test, we used hard-coded string constants before using an enum.
1149 2 : #[test]
1150 2 : fn op_label_name() {
1151 2 : use super::SmgrQueryType::*;
1152 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1153 2 : (GetRelExists, "get_rel_exists"),
1154 2 : (GetRelSize, "get_rel_size"),
1155 2 : (GetPageAtLsn, "get_page_at_lsn"),
1156 2 : (GetDbSize, "get_db_size"),
1157 2 : (GetSlruSegment, "get_slru_segment"),
1158 2 : ];
1159 12 : for (op, expect) in expect {
1160 10 : let actual: &'static str = op.into();
1161 10 : assert_eq!(actual, expect);
1162 : }
1163 2 : }
1164 :
1165 2 : #[test]
1166 2 : fn basic() {
1167 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1168 :
1169 12 : for op in &ops {
1170 10 : let tenant_id = TenantId::generate();
1171 10 : let timeline_id = TimelineId::generate();
1172 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1173 10 : &TenantShardId::unsharded(tenant_id),
1174 10 : &timeline_id,
1175 10 : );
1176 10 :
1177 20 : let get_counts = || {
1178 20 : let global: u64 = ops
1179 20 : .iter()
1180 100 : .map(|op| metrics.metrics[*op as usize].global.get_sample_count())
1181 20 : .sum();
1182 20 : let per_tenant_timeline: u64 = ops
1183 20 : .iter()
1184 100 : .map(|op| {
1185 100 : metrics.metrics[*op as usize]
1186 100 : .per_tenant_timeline
1187 100 : .get_sample_count()
1188 100 : })
1189 20 : .sum();
1190 20 : (global, per_tenant_timeline)
1191 20 : };
1192 :
1193 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1194 10 : assert_eq!(pre_per_tenant_timeline, 0);
1195 :
1196 10 : let timer = metrics.start_timer(*op);
1197 10 : drop(timer);
1198 10 :
1199 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1200 10 : assert_eq!(post_per_tenant_timeline, 1);
1201 10 : assert!(post_global > pre_global);
1202 : }
1203 2 : }
1204 : }
1205 :
1206 : // keep in sync with control plane Go code so that we can validate
1207 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1208 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1209 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1210 0 : [
1211 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1212 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1213 0 : ]
1214 0 : .map(|ms| (ms as f64) / 1000.0)
1215 0 : });
1216 :
1217 : pub(crate) struct BasebackupQueryTime(HistogramVec);
1218 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1219 0 : BasebackupQueryTime({
1220 0 : register_histogram_vec!(
1221 0 : "pageserver_basebackup_query_seconds",
1222 0 : "Histogram of basebackup queries durations, by result type",
1223 0 : &["result"],
1224 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1225 0 : )
1226 0 : .expect("failed to define a metric")
1227 0 : })
1228 0 : });
1229 :
1230 : impl DurationResultObserver for BasebackupQueryTime {
1231 0 : fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
1232 0 : let label_value = if res.is_ok() { "ok" } else { "error" };
1233 0 : let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
1234 0 : metric.observe(duration.as_secs_f64());
1235 0 : }
1236 : }
1237 :
1238 0 : pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
1239 0 : register_int_gauge_vec!(
1240 0 : "pageserver_live_connections",
1241 0 : "Number of live network connections",
1242 0 : &["pageserver_connection_kind"]
1243 0 : )
1244 0 : .expect("failed to define a metric")
1245 0 : });
1246 :
1247 : // remote storage metrics
1248 :
1249 76 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1250 152 : register_int_counter_pair_vec!(
1251 152 : "pageserver_remote_timeline_client_calls_started",
1252 152 : "Number of started calls to remote timeline client.",
1253 152 : "pageserver_remote_timeline_client_calls_finished",
1254 152 : "Number of finshed calls to remote timeline client.",
1255 152 : &[
1256 152 : "tenant_id",
1257 152 : "shard_id",
1258 152 : "timeline_id",
1259 152 : "file_kind",
1260 152 : "op_kind"
1261 152 : ],
1262 152 : )
1263 76 : .unwrap()
1264 76 : });
1265 :
1266 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1267 76 : Lazy::new(|| {
1268 76 : register_int_counter_vec!(
1269 76 : "pageserver_remote_timeline_client_bytes_started",
1270 76 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1271 76 : The increment happens when the operation is scheduled.",
1272 76 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1273 76 : )
1274 76 : .expect("failed to define a metric")
1275 76 : });
1276 :
1277 76 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1278 76 : register_int_counter_vec!(
1279 76 : "pageserver_remote_timeline_client_bytes_finished",
1280 76 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1281 76 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1282 76 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1283 76 : )
1284 76 : .expect("failed to define a metric")
1285 76 : });
1286 :
1287 : pub(crate) struct TenantManagerMetrics {
1288 : pub(crate) tenant_slots: UIntGauge,
1289 : pub(crate) tenant_slot_writes: IntCounter,
1290 : pub(crate) unexpected_errors: IntCounter,
1291 : }
1292 :
1293 2 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1294 2 : TenantManagerMetrics {
1295 2 : tenant_slots: register_uint_gauge!(
1296 2 : "pageserver_tenant_manager_slots",
1297 2 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1298 2 : )
1299 2 : .expect("failed to define a metric"),
1300 2 : tenant_slot_writes: register_int_counter!(
1301 2 : "pageserver_tenant_manager_slot_writes",
1302 2 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1303 2 : )
1304 2 : .expect("failed to define a metric"),
1305 2 : unexpected_errors: register_int_counter!(
1306 2 : "pageserver_tenant_manager_unexpected_errors_total",
1307 2 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1308 2 : )
1309 2 : .expect("failed to define a metric"),
1310 2 : }
1311 2 : });
1312 :
1313 : pub(crate) struct DeletionQueueMetrics {
1314 : pub(crate) keys_submitted: IntCounter,
1315 : pub(crate) keys_dropped: IntCounter,
1316 : pub(crate) keys_executed: IntCounter,
1317 : pub(crate) keys_validated: IntCounter,
1318 : pub(crate) dropped_lsn_updates: IntCounter,
1319 : pub(crate) unexpected_errors: IntCounter,
1320 : pub(crate) remote_errors: IntCounterVec,
1321 : }
1322 17 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1323 17 : DeletionQueueMetrics{
1324 17 :
1325 17 : keys_submitted: register_int_counter!(
1326 17 : "pageserver_deletion_queue_submitted_total",
1327 17 : "Number of objects submitted for deletion"
1328 17 : )
1329 17 : .expect("failed to define a metric"),
1330 17 :
1331 17 : keys_dropped: register_int_counter!(
1332 17 : "pageserver_deletion_queue_dropped_total",
1333 17 : "Number of object deletions dropped due to stale generation."
1334 17 : )
1335 17 : .expect("failed to define a metric"),
1336 17 :
1337 17 : keys_executed: register_int_counter!(
1338 17 : "pageserver_deletion_queue_executed_total",
1339 17 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1340 17 : )
1341 17 : .expect("failed to define a metric"),
1342 17 :
1343 17 : keys_validated: register_int_counter!(
1344 17 : "pageserver_deletion_queue_validated_total",
1345 17 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1346 17 : )
1347 17 : .expect("failed to define a metric"),
1348 17 :
1349 17 : dropped_lsn_updates: register_int_counter!(
1350 17 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1351 17 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1352 17 : )
1353 17 : .expect("failed to define a metric"),
1354 17 : unexpected_errors: register_int_counter!(
1355 17 : "pageserver_deletion_queue_unexpected_errors_total",
1356 17 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1357 17 : )
1358 17 : .expect("failed to define a metric"),
1359 17 : remote_errors: register_int_counter_vec!(
1360 17 : "pageserver_deletion_queue_remote_errors_total",
1361 17 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1362 17 : &["op_kind"],
1363 17 : )
1364 17 : .expect("failed to define a metric")
1365 17 : }
1366 17 : });
1367 :
1368 : pub(crate) struct WalIngestMetrics {
1369 : pub(crate) records_received: IntCounter,
1370 : pub(crate) records_committed: IntCounter,
1371 : pub(crate) records_filtered: IntCounter,
1372 : }
1373 :
1374 2 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
1375 2 : records_received: register_int_counter!(
1376 2 : "pageserver_wal_ingest_records_received",
1377 2 : "Number of WAL records received from safekeepers"
1378 2 : )
1379 2 : .expect("failed to define a metric"),
1380 2 : records_committed: register_int_counter!(
1381 2 : "pageserver_wal_ingest_records_committed",
1382 2 : "Number of WAL records which resulted in writes to pageserver storage"
1383 2 : )
1384 2 : .expect("failed to define a metric"),
1385 2 : records_filtered: register_int_counter!(
1386 2 : "pageserver_wal_ingest_records_filtered",
1387 2 : "Number of WAL records filtered out due to sharding"
1388 2 : )
1389 2 : .expect("failed to define a metric"),
1390 2 : });
1391 : pub(crate) struct SecondaryModeMetrics {
1392 : pub(crate) upload_heatmap: IntCounter,
1393 : pub(crate) upload_heatmap_errors: IntCounter,
1394 : pub(crate) upload_heatmap_duration: Histogram,
1395 : pub(crate) download_heatmap: IntCounter,
1396 : pub(crate) download_layer: IntCounter,
1397 : }
1398 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
1399 0 : upload_heatmap: register_int_counter!(
1400 0 : "pageserver_secondary_upload_heatmap",
1401 0 : "Number of heatmaps written to remote storage by attached tenants"
1402 0 : )
1403 0 : .expect("failed to define a metric"),
1404 0 : upload_heatmap_errors: register_int_counter!(
1405 0 : "pageserver_secondary_upload_heatmap_errors",
1406 0 : "Failures writing heatmap to remote storage"
1407 0 : )
1408 0 : .expect("failed to define a metric"),
1409 0 : upload_heatmap_duration: register_histogram!(
1410 0 : "pageserver_secondary_upload_heatmap_duration",
1411 0 : "Time to build and upload a heatmap, including any waiting inside the S3 client"
1412 0 : )
1413 0 : .expect("failed to define a metric"),
1414 0 : download_heatmap: register_int_counter!(
1415 0 : "pageserver_secondary_download_heatmap",
1416 0 : "Number of downloads of heatmaps by secondary mode locations"
1417 0 : )
1418 0 : .expect("failed to define a metric"),
1419 0 : download_layer: register_int_counter!(
1420 0 : "pageserver_secondary_download_layer",
1421 0 : "Number of downloads of layers by secondary mode locations"
1422 0 : )
1423 0 : .expect("failed to define a metric"),
1424 0 : });
1425 :
1426 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1427 : pub enum RemoteOpKind {
1428 : Upload,
1429 : Download,
1430 : Delete,
1431 : }
1432 : impl RemoteOpKind {
1433 6173 : pub fn as_str(&self) -> &'static str {
1434 6173 : match self {
1435 5585 : Self::Upload => "upload",
1436 40 : Self::Download => "download",
1437 548 : Self::Delete => "delete",
1438 : }
1439 6173 : }
1440 : }
1441 :
1442 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1443 : pub enum RemoteOpFileKind {
1444 : Layer,
1445 : Index,
1446 : }
1447 : impl RemoteOpFileKind {
1448 6173 : pub fn as_str(&self) -> &'static str {
1449 6173 : match self {
1450 3875 : Self::Layer => "layer",
1451 2298 : Self::Index => "index",
1452 : }
1453 6173 : }
1454 : }
1455 :
1456 74 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1457 74 : register_histogram_vec!(
1458 74 : "pageserver_remote_operation_seconds",
1459 74 : "Time spent on remote storage operations. \
1460 74 : Grouped by tenant, timeline, operation_kind and status. \
1461 74 : Does not account for time spent waiting in remote timeline client's queues.",
1462 74 : &["file_kind", "op_kind", "status"]
1463 74 : )
1464 74 : .expect("failed to define a metric")
1465 74 : });
1466 :
1467 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1468 0 : register_int_counter_vec!(
1469 0 : "pageserver_tenant_task_events",
1470 0 : "Number of task start/stop/fail events.",
1471 0 : &["event"],
1472 0 : )
1473 0 : .expect("Failed to register tenant_task_events metric")
1474 0 : });
1475 :
1476 10 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
1477 20 : register_int_counter_pair_vec!(
1478 20 : "pageserver_background_loop_semaphore_wait_start_count",
1479 20 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1480 20 : "pageserver_background_loop_semaphore_wait_finish_count",
1481 20 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1482 20 : &["task"],
1483 20 : )
1484 10 : .unwrap()
1485 10 : });
1486 :
1487 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
1488 0 : register_int_counter_vec!(
1489 0 : "pageserver_background_loop_period_overrun_count",
1490 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
1491 0 : &["task", "period"],
1492 0 : )
1493 0 : .expect("failed to define a metric")
1494 0 : });
1495 :
1496 : // walreceiver metrics
1497 :
1498 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
1499 0 : register_int_counter!(
1500 0 : "pageserver_walreceiver_started_connections_total",
1501 0 : "Number of started walreceiver connections"
1502 0 : )
1503 0 : .expect("failed to define a metric")
1504 0 : });
1505 :
1506 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
1507 0 : register_int_gauge!(
1508 0 : "pageserver_walreceiver_active_managers",
1509 0 : "Number of active walreceiver managers"
1510 0 : )
1511 0 : .expect("failed to define a metric")
1512 0 : });
1513 :
1514 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
1515 0 : register_int_counter_vec!(
1516 0 : "pageserver_walreceiver_switches_total",
1517 0 : "Number of walreceiver manager change_connection calls",
1518 0 : &["reason"]
1519 0 : )
1520 0 : .expect("failed to define a metric")
1521 0 : });
1522 :
1523 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
1524 0 : register_int_counter!(
1525 0 : "pageserver_walreceiver_broker_updates_total",
1526 0 : "Number of received broker updates in walreceiver"
1527 0 : )
1528 0 : .expect("failed to define a metric")
1529 0 : });
1530 :
1531 2 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1532 2 : register_int_counter_vec!(
1533 2 : "pageserver_walreceiver_candidates_events_total",
1534 2 : "Number of walreceiver candidate events",
1535 2 : &["event"]
1536 2 : )
1537 2 : .expect("failed to define a metric")
1538 2 : });
1539 :
1540 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
1541 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
1542 :
1543 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
1544 2 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
1545 :
1546 : // Metrics collected on WAL redo operations
1547 : //
1548 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
1549 : // for access to the postgres process ('wait') since there is only one for
1550 : // each tenant.
1551 :
1552 : /// Time buckets are small because we want to be able to measure the
1553 : /// smallest redo processing times. These buckets allow us to measure down
1554 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
1555 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
1556 : ///
1557 : /// Values up to 1s are recorded because metrics show that we have redo
1558 : /// durations and lock times larger than 0.250s.
1559 : macro_rules! redo_histogram_time_buckets {
1560 : () => {
1561 : vec![
1562 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
1563 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1564 : 1.000_000,
1565 : ]
1566 : };
1567 : }
1568 :
1569 : /// While we're at it, also measure the amount of records replayed in each
1570 : /// operation. We have a global 'total replayed' counter, but that's not
1571 : /// as useful as 'what is the skew for how many records we replay in one
1572 : /// operation'.
1573 : macro_rules! redo_histogram_count_buckets {
1574 : () => {
1575 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
1576 : };
1577 : }
1578 :
1579 : macro_rules! redo_bytes_histogram_count_buckets {
1580 : () => {
1581 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
1582 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
1583 : vec![
1584 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
1585 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
1586 : ]
1587 : };
1588 : }
1589 :
1590 6 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
1591 6 : register_histogram!(
1592 6 : "pageserver_wal_redo_seconds",
1593 6 : "Time spent on WAL redo",
1594 6 : redo_histogram_time_buckets!()
1595 6 : )
1596 6 : .expect("failed to define a metric")
1597 6 : });
1598 :
1599 6 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1600 6 : register_histogram!(
1601 6 : "pageserver_wal_redo_records_histogram",
1602 6 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
1603 6 : redo_histogram_count_buckets!(),
1604 6 : )
1605 6 : .expect("failed to define a metric")
1606 6 : });
1607 :
1608 6 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1609 6 : register_histogram!(
1610 6 : "pageserver_wal_redo_bytes_histogram",
1611 6 : "Histogram of number of records replayed per redo sent to Postgres",
1612 6 : redo_bytes_histogram_count_buckets!(),
1613 6 : )
1614 6 : .expect("failed to define a metric")
1615 6 : });
1616 :
1617 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
1618 6 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
1619 6 : register_int_counter!(
1620 6 : "pageserver_replayed_wal_records_total",
1621 6 : "Number of WAL records replayed in WAL redo process"
1622 6 : )
1623 6 : .unwrap()
1624 6 : });
1625 :
1626 : #[rustfmt::skip]
1627 6 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1628 6 : register_histogram!(
1629 6 : "pageserver_wal_redo_process_launch_duration",
1630 6 : "Histogram of the duration of successful WalRedoProcess::launch calls",
1631 6 : vec![
1632 6 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
1633 6 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
1634 6 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
1635 6 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
1636 6 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
1637 6 : ],
1638 6 : )
1639 6 : .expect("failed to define a metric")
1640 6 : });
1641 :
1642 : pub(crate) struct WalRedoProcessCounters {
1643 : pub(crate) started: IntCounter,
1644 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
1645 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
1646 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
1647 : }
1648 :
1649 26 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
1650 : pub(crate) enum WalRedoKillCause {
1651 : WalRedoProcessDrop,
1652 : NoLeakChildDrop,
1653 : Startup,
1654 : }
1655 :
1656 : impl Default for WalRedoProcessCounters {
1657 6 : fn default() -> Self {
1658 6 : let started = register_int_counter!(
1659 6 : "pageserver_wal_redo_process_started_total",
1660 6 : "Number of WAL redo processes started",
1661 6 : )
1662 6 : .unwrap();
1663 6 :
1664 6 : let killed = register_int_counter_vec!(
1665 6 : "pageserver_wal_redo_process_stopped_total",
1666 6 : "Number of WAL redo processes stopped",
1667 6 : &["cause"],
1668 6 : )
1669 6 : .unwrap();
1670 6 :
1671 6 : let active_stderr_logger_tasks_started = register_int_counter!(
1672 6 : "pageserver_walredo_stderr_logger_tasks_started_total",
1673 6 : "Number of active walredo stderr logger tasks that have started",
1674 6 : )
1675 6 : .unwrap();
1676 6 :
1677 6 : let active_stderr_logger_tasks_finished = register_int_counter!(
1678 6 : "pageserver_walredo_stderr_logger_tasks_finished_total",
1679 6 : "Number of active walredo stderr logger tasks that have finished",
1680 6 : )
1681 6 : .unwrap();
1682 6 :
1683 6 : Self {
1684 6 : started,
1685 18 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
1686 18 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
1687 18 : let cause_str: &'static str = cause.into();
1688 18 : killed.with_label_values(&[cause_str])
1689 18 : })),
1690 6 : active_stderr_logger_tasks_started,
1691 6 : active_stderr_logger_tasks_finished,
1692 6 : }
1693 6 : }
1694 : }
1695 :
1696 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
1697 : Lazy::new(WalRedoProcessCounters::default);
1698 :
1699 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
1700 : pub(crate) struct StorageTimeMetricsTimer {
1701 : metrics: StorageTimeMetrics,
1702 : start: Instant,
1703 : }
1704 :
1705 : impl StorageTimeMetricsTimer {
1706 1834 : fn new(metrics: StorageTimeMetrics) -> Self {
1707 1834 : Self {
1708 1834 : metrics,
1709 1834 : start: Instant::now(),
1710 1834 : }
1711 1834 : }
1712 :
1713 : /// Record the time from creation to now.
1714 1834 : pub fn stop_and_record(self) {
1715 1834 : let duration = self.start.elapsed().as_secs_f64();
1716 1834 : self.metrics.timeline_sum.inc_by(duration);
1717 1834 : self.metrics.timeline_count.inc();
1718 1834 : self.metrics.global_histogram.observe(duration);
1719 1834 : }
1720 : }
1721 :
1722 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
1723 : /// timeline total sum and count.
1724 1834 : #[derive(Clone, Debug)]
1725 : pub(crate) struct StorageTimeMetrics {
1726 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
1727 : timeline_sum: Counter,
1728 : /// Number of oeprations, per operation, tenant_id and timeline_id
1729 : timeline_count: IntCounter,
1730 : /// Global histogram having only the "operation" label.
1731 : global_histogram: Histogram,
1732 : }
1733 :
1734 : impl StorageTimeMetrics {
1735 2072 : pub fn new(
1736 2072 : operation: StorageTimeOperation,
1737 2072 : tenant_id: &str,
1738 2072 : shard_id: &str,
1739 2072 : timeline_id: &str,
1740 2072 : ) -> Self {
1741 2072 : let operation: &'static str = operation.into();
1742 2072 :
1743 2072 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
1744 2072 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1745 2072 : .unwrap();
1746 2072 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
1747 2072 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1748 2072 : .unwrap();
1749 2072 : let global_histogram = STORAGE_TIME_GLOBAL
1750 2072 : .get_metric_with_label_values(&[operation])
1751 2072 : .unwrap();
1752 2072 :
1753 2072 : StorageTimeMetrics {
1754 2072 : timeline_sum,
1755 2072 : timeline_count,
1756 2072 : global_histogram,
1757 2072 : }
1758 2072 : }
1759 :
1760 : /// Starts timing a new operation.
1761 : ///
1762 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
1763 1834 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
1764 1834 : StorageTimeMetricsTimer::new(self.clone())
1765 1834 : }
1766 : }
1767 :
1768 0 : #[derive(Debug)]
1769 : pub(crate) struct TimelineMetrics {
1770 : tenant_id: String,
1771 : shard_id: String,
1772 : timeline_id: String,
1773 : pub flush_time_histo: StorageTimeMetrics,
1774 : pub compact_time_histo: StorageTimeMetrics,
1775 : pub create_images_time_histo: StorageTimeMetrics,
1776 : pub logical_size_histo: StorageTimeMetrics,
1777 : pub imitate_logical_size_histo: StorageTimeMetrics,
1778 : pub load_layer_map_histo: StorageTimeMetrics,
1779 : pub garbage_collect_histo: StorageTimeMetrics,
1780 : pub last_record_gauge: IntGauge,
1781 : resident_physical_size_gauge: UIntGauge,
1782 : /// copy of LayeredTimeline.current_logical_size
1783 : pub current_logical_size_gauge: UIntGauge,
1784 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
1785 : pub evictions: IntCounter,
1786 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
1787 : }
1788 :
1789 : impl TimelineMetrics {
1790 296 : pub fn new(
1791 296 : tenant_shard_id: &TenantShardId,
1792 296 : timeline_id_raw: &TimelineId,
1793 296 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
1794 296 : ) -> Self {
1795 296 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1796 296 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
1797 296 : let timeline_id = timeline_id_raw.to_string();
1798 296 : let flush_time_histo = StorageTimeMetrics::new(
1799 296 : StorageTimeOperation::LayerFlush,
1800 296 : &tenant_id,
1801 296 : &shard_id,
1802 296 : &timeline_id,
1803 296 : );
1804 296 : let compact_time_histo = StorageTimeMetrics::new(
1805 296 : StorageTimeOperation::Compact,
1806 296 : &tenant_id,
1807 296 : &shard_id,
1808 296 : &timeline_id,
1809 296 : );
1810 296 : let create_images_time_histo = StorageTimeMetrics::new(
1811 296 : StorageTimeOperation::CreateImages,
1812 296 : &tenant_id,
1813 296 : &shard_id,
1814 296 : &timeline_id,
1815 296 : );
1816 296 : let logical_size_histo = StorageTimeMetrics::new(
1817 296 : StorageTimeOperation::LogicalSize,
1818 296 : &tenant_id,
1819 296 : &shard_id,
1820 296 : &timeline_id,
1821 296 : );
1822 296 : let imitate_logical_size_histo = StorageTimeMetrics::new(
1823 296 : StorageTimeOperation::ImitateLogicalSize,
1824 296 : &tenant_id,
1825 296 : &shard_id,
1826 296 : &timeline_id,
1827 296 : );
1828 296 : let load_layer_map_histo = StorageTimeMetrics::new(
1829 296 : StorageTimeOperation::LoadLayerMap,
1830 296 : &tenant_id,
1831 296 : &shard_id,
1832 296 : &timeline_id,
1833 296 : );
1834 296 : let garbage_collect_histo = StorageTimeMetrics::new(
1835 296 : StorageTimeOperation::Gc,
1836 296 : &tenant_id,
1837 296 : &shard_id,
1838 296 : &timeline_id,
1839 296 : );
1840 296 : let last_record_gauge = LAST_RECORD_LSN
1841 296 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1842 296 : .unwrap();
1843 296 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
1844 296 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1845 296 : .unwrap();
1846 296 : // TODO: we shouldn't expose this metric
1847 296 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
1848 296 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1849 296 : .unwrap();
1850 296 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
1851 296 : let directory_entries_count_gauge_closure = {
1852 296 : let tenant_shard_id = *tenant_shard_id;
1853 296 : let timeline_id_raw = *timeline_id_raw;
1854 0 : move || {
1855 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1856 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
1857 0 : let timeline_id = timeline_id_raw.to_string();
1858 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
1859 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1860 0 : .unwrap();
1861 0 : gauge
1862 0 : }
1863 : };
1864 296 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
1865 296 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
1866 296 : let evictions = EVICTIONS
1867 296 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1868 296 : .unwrap();
1869 296 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
1870 296 : .build(&tenant_id, &shard_id, &timeline_id);
1871 296 :
1872 296 : TimelineMetrics {
1873 296 : tenant_id,
1874 296 : shard_id,
1875 296 : timeline_id,
1876 296 : flush_time_histo,
1877 296 : compact_time_histo,
1878 296 : create_images_time_histo,
1879 296 : logical_size_histo,
1880 296 : imitate_logical_size_histo,
1881 296 : garbage_collect_histo,
1882 296 : load_layer_map_histo,
1883 296 : last_record_gauge,
1884 296 : resident_physical_size_gauge,
1885 296 : current_logical_size_gauge,
1886 296 : directory_entries_count_gauge,
1887 296 : evictions,
1888 296 : evictions_with_low_residence_duration: std::sync::RwLock::new(
1889 296 : evictions_with_low_residence_duration,
1890 296 : ),
1891 296 : }
1892 296 : }
1893 :
1894 554 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
1895 554 : self.resident_physical_size_add(sz);
1896 554 : }
1897 :
1898 306 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
1899 306 : self.resident_physical_size_gauge.sub(sz);
1900 306 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
1901 306 : }
1902 :
1903 578 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
1904 578 : self.resident_physical_size_gauge.add(sz);
1905 578 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
1906 578 : }
1907 :
1908 8 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
1909 8 : self.resident_physical_size_gauge.get()
1910 8 : }
1911 : }
1912 :
1913 : impl Drop for TimelineMetrics {
1914 8 : fn drop(&mut self) {
1915 8 : let tenant_id = &self.tenant_id;
1916 8 : let timeline_id = &self.timeline_id;
1917 8 : let shard_id = &self.shard_id;
1918 8 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1919 8 : {
1920 8 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
1921 8 : let _ =
1922 8 : RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1923 8 : }
1924 8 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1925 8 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
1926 0 : let _ = metric.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1927 8 : }
1928 8 : let _ = EVICTIONS.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1929 8 :
1930 8 : self.evictions_with_low_residence_duration
1931 8 : .write()
1932 8 : .unwrap()
1933 8 : .remove(tenant_id, shard_id, timeline_id);
1934 :
1935 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
1936 : // removed at the end of it. The idea is to have the metrics outlive the
1937 : // entity during which they're observed, e.g., the smgr metrics shall
1938 : // outlive an individual smgr connection, but not the timeline.
1939 :
1940 72 : for op in StorageTimeOperation::VARIANTS {
1941 64 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
1942 64 : op,
1943 64 : tenant_id,
1944 64 : shard_id,
1945 64 : timeline_id,
1946 64 : ]);
1947 64 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
1948 64 : op,
1949 64 : tenant_id,
1950 64 : shard_id,
1951 64 : timeline_id,
1952 64 : ]);
1953 64 : }
1954 :
1955 24 : for op in STORAGE_IO_SIZE_OPERATIONS {
1956 16 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
1957 16 : }
1958 :
1959 48 : for op in SmgrQueryType::iter() {
1960 40 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
1961 40 : op.into(),
1962 40 : tenant_id,
1963 40 : shard_id,
1964 40 : timeline_id,
1965 40 : ]);
1966 40 : }
1967 8 : }
1968 : }
1969 :
1970 88 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
1971 88 : // Only shard zero deals in synthetic sizes
1972 88 : if tenant_shard_id.is_zero() {
1973 88 : let tid = tenant_shard_id.tenant_id.to_string();
1974 88 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
1975 88 : }
1976 :
1977 : // we leave the BROKEN_TENANTS_SET entry if any
1978 88 : }
1979 :
1980 : use futures::Future;
1981 : use pin_project_lite::pin_project;
1982 : use std::collections::HashMap;
1983 : use std::pin::Pin;
1984 : use std::sync::{Arc, Mutex};
1985 : use std::task::{Context, Poll};
1986 : use std::time::{Duration, Instant};
1987 :
1988 : use crate::context::{PageContentKind, RequestContext};
1989 : use crate::task_mgr::TaskKind;
1990 :
1991 : /// Maintain a per timeline gauge in addition to the global gauge.
1992 : struct PerTimelineRemotePhysicalSizeGauge {
1993 : last_set: u64,
1994 : gauge: UIntGauge,
1995 : }
1996 :
1997 : impl PerTimelineRemotePhysicalSizeGauge {
1998 296 : fn new(per_timeline_gauge: UIntGauge) -> Self {
1999 296 : Self {
2000 296 : last_set: per_timeline_gauge.get(),
2001 296 : gauge: per_timeline_gauge,
2002 296 : }
2003 296 : }
2004 1041 : fn set(&mut self, sz: u64) {
2005 1041 : self.gauge.set(sz);
2006 1041 : if sz < self.last_set {
2007 24 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
2008 1017 : } else {
2009 1017 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
2010 1017 : };
2011 1041 : self.last_set = sz;
2012 1041 : }
2013 0 : fn get(&self) -> u64 {
2014 0 : self.gauge.get()
2015 0 : }
2016 : }
2017 :
2018 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2019 8 : fn drop(&mut self) {
2020 8 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
2021 8 : }
2022 : }
2023 :
2024 : pub(crate) struct RemoteTimelineClientMetrics {
2025 : tenant_id: String,
2026 : shard_id: String,
2027 : timeline_id: String,
2028 : remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
2029 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
2030 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2031 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2032 : }
2033 :
2034 : impl RemoteTimelineClientMetrics {
2035 306 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2036 306 : RemoteTimelineClientMetrics {
2037 306 : tenant_id: tenant_shard_id.tenant_id.to_string(),
2038 306 : shard_id: format!("{}", tenant_shard_id.shard_slug()),
2039 306 : timeline_id: timeline_id.to_string(),
2040 306 : calls: Mutex::new(HashMap::default()),
2041 306 : bytes_started_counter: Mutex::new(HashMap::default()),
2042 306 : bytes_finished_counter: Mutex::new(HashMap::default()),
2043 306 : remote_physical_size_gauge: Mutex::new(None),
2044 306 : }
2045 306 : }
2046 :
2047 1041 : pub(crate) fn remote_physical_size_set(&self, sz: u64) {
2048 1041 : let mut guard = self.remote_physical_size_gauge.lock().unwrap();
2049 1041 : let gauge = guard.get_or_insert_with(|| {
2050 296 : PerTimelineRemotePhysicalSizeGauge::new(
2051 296 : REMOTE_PHYSICAL_SIZE
2052 296 : .get_metric_with_label_values(&[
2053 296 : &self.tenant_id,
2054 296 : &self.shard_id,
2055 296 : &self.timeline_id,
2056 296 : ])
2057 296 : .unwrap(),
2058 296 : )
2059 1041 : });
2060 1041 : gauge.set(sz);
2061 1041 : }
2062 :
2063 0 : pub(crate) fn remote_physical_size_get(&self) -> u64 {
2064 0 : let guard = self.remote_physical_size_gauge.lock().unwrap();
2065 0 : guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
2066 0 : }
2067 :
2068 1308 : pub fn remote_operation_time(
2069 1308 : &self,
2070 1308 : file_kind: &RemoteOpFileKind,
2071 1308 : op_kind: &RemoteOpKind,
2072 1308 : status: &'static str,
2073 1308 : ) -> Histogram {
2074 1308 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2075 1308 : REMOTE_OPERATION_TIME
2076 1308 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2077 1308 : .unwrap()
2078 1308 : }
2079 :
2080 3186 : fn calls_counter_pair(
2081 3186 : &self,
2082 3186 : file_kind: &RemoteOpFileKind,
2083 3186 : op_kind: &RemoteOpKind,
2084 3186 : ) -> IntCounterPair {
2085 3186 : let mut guard = self.calls.lock().unwrap();
2086 3186 : let key = (file_kind.as_str(), op_kind.as_str());
2087 3186 : let metric = guard.entry(key).or_insert_with(move || {
2088 496 : REMOTE_TIMELINE_CLIENT_CALLS
2089 496 : .get_metric_with_label_values(&[
2090 496 : &self.tenant_id,
2091 496 : &self.shard_id,
2092 496 : &self.timeline_id,
2093 496 : key.0,
2094 496 : key.1,
2095 496 : ])
2096 496 : .unwrap()
2097 3186 : });
2098 3186 : metric.clone()
2099 3186 : }
2100 :
2101 562 : fn bytes_started_counter(
2102 562 : &self,
2103 562 : file_kind: &RemoteOpFileKind,
2104 562 : op_kind: &RemoteOpKind,
2105 562 : ) -> IntCounter {
2106 562 : let mut guard = self.bytes_started_counter.lock().unwrap();
2107 562 : let key = (file_kind.as_str(), op_kind.as_str());
2108 562 : let metric = guard.entry(key).or_insert_with(move || {
2109 182 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2110 182 : .get_metric_with_label_values(&[
2111 182 : &self.tenant_id,
2112 182 : &self.shard_id,
2113 182 : &self.timeline_id,
2114 182 : key.0,
2115 182 : key.1,
2116 182 : ])
2117 182 : .unwrap()
2118 562 : });
2119 562 : metric.clone()
2120 562 : }
2121 :
2122 1105 : fn bytes_finished_counter(
2123 1105 : &self,
2124 1105 : file_kind: &RemoteOpFileKind,
2125 1105 : op_kind: &RemoteOpKind,
2126 1105 : ) -> IntCounter {
2127 1105 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2128 1105 : let key = (file_kind.as_str(), op_kind.as_str());
2129 1105 : let metric = guard.entry(key).or_insert_with(move || {
2130 182 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2131 182 : .get_metric_with_label_values(&[
2132 182 : &self.tenant_id,
2133 182 : &self.shard_id,
2134 182 : &self.timeline_id,
2135 182 : key.0,
2136 182 : key.1,
2137 182 : ])
2138 182 : .unwrap()
2139 1105 : });
2140 1105 : metric.clone()
2141 1105 : }
2142 : }
2143 :
2144 : #[cfg(test)]
2145 : impl RemoteTimelineClientMetrics {
2146 6 : pub fn get_bytes_started_counter_value(
2147 6 : &self,
2148 6 : file_kind: &RemoteOpFileKind,
2149 6 : op_kind: &RemoteOpKind,
2150 6 : ) -> Option<u64> {
2151 6 : let guard = self.bytes_started_counter.lock().unwrap();
2152 6 : let key = (file_kind.as_str(), op_kind.as_str());
2153 6 : guard.get(&key).map(|counter| counter.get())
2154 6 : }
2155 :
2156 6 : pub fn get_bytes_finished_counter_value(
2157 6 : &self,
2158 6 : file_kind: &RemoteOpFileKind,
2159 6 : op_kind: &RemoteOpKind,
2160 6 : ) -> Option<u64> {
2161 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2162 6 : let key = (file_kind.as_str(), op_kind.as_str());
2163 6 : guard.get(&key).map(|counter| counter.get())
2164 6 : }
2165 : }
2166 :
2167 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2168 : #[must_use]
2169 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2170 : /// Decremented on drop.
2171 : calls_counter_pair: Option<IntCounterPair>,
2172 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2173 : bytes_finished: Option<(IntCounter, u64)>,
2174 : }
2175 :
2176 : impl RemoteTimelineClientCallMetricGuard {
2177 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2178 : /// The caller vouches to do the metric updates manually.
2179 1634 : pub fn will_decrement_manually(mut self) {
2180 1634 : let RemoteTimelineClientCallMetricGuard {
2181 1634 : calls_counter_pair,
2182 1634 : bytes_finished,
2183 1634 : } = &mut self;
2184 1634 : calls_counter_pair.take();
2185 1634 : bytes_finished.take();
2186 1634 : }
2187 : }
2188 :
2189 : impl Drop for RemoteTimelineClientCallMetricGuard {
2190 1654 : fn drop(&mut self) {
2191 1654 : let RemoteTimelineClientCallMetricGuard {
2192 1654 : calls_counter_pair,
2193 1654 : bytes_finished,
2194 1654 : } = self;
2195 1654 : if let Some(guard) = calls_counter_pair.take() {
2196 20 : guard.dec();
2197 1634 : }
2198 1654 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2199 0 : bytes_finished_metric.inc_by(*value);
2200 1654 : }
2201 1654 : }
2202 : }
2203 :
2204 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2205 : /// track the byte size of this call in applicable metric(s).
2206 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2207 : /// Do not account for this call's byte size in any metrics.
2208 : /// The `reason` field is there to make the call sites self-documenting
2209 : /// about why they don't need the metric.
2210 : DontTrackSize { reason: &'static str },
2211 : /// Track the byte size of the call in applicable metric(s).
2212 : Bytes(u64),
2213 : }
2214 :
2215 : impl RemoteTimelineClientMetrics {
2216 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2217 : ///
2218 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2219 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2220 : /// is more suitable.
2221 : /// Never do both.
2222 1654 : pub(crate) fn call_begin(
2223 1654 : &self,
2224 1654 : file_kind: &RemoteOpFileKind,
2225 1654 : op_kind: &RemoteOpKind,
2226 1654 : size: RemoteTimelineClientMetricsCallTrackSize,
2227 1654 : ) -> RemoteTimelineClientCallMetricGuard {
2228 1654 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2229 1654 : calls_counter_pair.inc();
2230 :
2231 1654 : let bytes_finished = match size {
2232 1092 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2233 1092 : // nothing to do
2234 1092 : None
2235 : }
2236 562 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2237 562 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2238 562 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2239 562 : Some((finished_counter, size))
2240 : }
2241 : };
2242 1654 : RemoteTimelineClientCallMetricGuard {
2243 1654 : calls_counter_pair: Some(calls_counter_pair),
2244 1654 : bytes_finished,
2245 1654 : }
2246 1654 : }
2247 :
2248 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2249 : /// Using the guard object is generally preferable.
2250 : /// See [`call_begin`](Self::call_begin) for more context.
2251 1532 : pub(crate) fn call_end(
2252 1532 : &self,
2253 1532 : file_kind: &RemoteOpFileKind,
2254 1532 : op_kind: &RemoteOpKind,
2255 1532 : size: RemoteTimelineClientMetricsCallTrackSize,
2256 1532 : ) {
2257 1532 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2258 1532 : calls_counter_pair.dec();
2259 1532 : match size {
2260 989 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2261 543 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2262 543 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2263 543 : }
2264 : }
2265 1532 : }
2266 : }
2267 :
2268 : impl Drop for RemoteTimelineClientMetrics {
2269 18 : fn drop(&mut self) {
2270 18 : let RemoteTimelineClientMetrics {
2271 18 : tenant_id,
2272 18 : shard_id,
2273 18 : timeline_id,
2274 18 : remote_physical_size_gauge,
2275 18 : calls,
2276 18 : bytes_started_counter,
2277 18 : bytes_finished_counter,
2278 18 : } = self;
2279 22 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
2280 22 : let mut res = [Ok(()), Ok(())];
2281 22 : REMOTE_TIMELINE_CLIENT_CALLS
2282 22 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
2283 22 : // don't care about results
2284 22 : }
2285 18 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2286 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2287 6 : tenant_id,
2288 6 : shard_id,
2289 6 : timeline_id,
2290 6 : a,
2291 6 : b,
2292 6 : ]);
2293 6 : }
2294 18 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2295 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2296 6 : tenant_id,
2297 6 : shard_id,
2298 6 : timeline_id,
2299 6 : a,
2300 6 : b,
2301 6 : ]);
2302 6 : }
2303 18 : {
2304 18 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2305 18 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2306 18 : }
2307 18 : }
2308 : }
2309 :
2310 : /// Wrapper future that measures the time spent by a remote storage operation,
2311 : /// and records the time and success/failure as a prometheus metric.
2312 : pub(crate) trait MeasureRemoteOp: Sized {
2313 1322 : fn measure_remote_op(
2314 1322 : self,
2315 1322 : file_kind: RemoteOpFileKind,
2316 1322 : op: RemoteOpKind,
2317 1322 : metrics: Arc<RemoteTimelineClientMetrics>,
2318 1322 : ) -> MeasuredRemoteOp<Self> {
2319 1322 : let start = Instant::now();
2320 1322 : MeasuredRemoteOp {
2321 1322 : inner: self,
2322 1322 : file_kind,
2323 1322 : op,
2324 1322 : start,
2325 1322 : metrics,
2326 1322 : }
2327 1322 : }
2328 : }
2329 :
2330 : impl<T: Sized> MeasureRemoteOp for T {}
2331 :
2332 : pin_project! {
2333 : pub(crate) struct MeasuredRemoteOp<F>
2334 : {
2335 : #[pin]
2336 : inner: F,
2337 : file_kind: RemoteOpFileKind,
2338 : op: RemoteOpKind,
2339 : start: Instant,
2340 : metrics: Arc<RemoteTimelineClientMetrics>,
2341 : }
2342 : }
2343 :
2344 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
2345 : type Output = Result<O, E>;
2346 :
2347 23456 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
2348 23456 : let this = self.project();
2349 23456 : let poll_result = this.inner.poll(cx);
2350 23456 : if let Poll::Ready(ref res) = poll_result {
2351 1308 : let duration = this.start.elapsed();
2352 1308 : let status = if res.is_ok() { &"success" } else { &"failure" };
2353 1308 : this.metrics
2354 1308 : .remote_operation_time(this.file_kind, this.op, status)
2355 1308 : .observe(duration.as_secs_f64());
2356 22148 : }
2357 23456 : poll_result
2358 23456 : }
2359 : }
2360 :
2361 : pub mod tokio_epoll_uring {
2362 : use metrics::UIntGauge;
2363 :
2364 : pub struct Collector {
2365 : descs: Vec<metrics::core::Desc>,
2366 : systems_created: UIntGauge,
2367 : systems_destroyed: UIntGauge,
2368 : }
2369 :
2370 : const NMETRICS: usize = 2;
2371 :
2372 : impl metrics::core::Collector for Collector {
2373 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
2374 0 : self.descs.iter().collect()
2375 0 : }
2376 :
2377 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
2378 0 : let mut mfs = Vec::with_capacity(NMETRICS);
2379 0 : let tokio_epoll_uring::metrics::Metrics {
2380 0 : systems_created,
2381 0 : systems_destroyed,
2382 0 : } = tokio_epoll_uring::metrics::global();
2383 0 : self.systems_created.set(systems_created);
2384 0 : mfs.extend(self.systems_created.collect());
2385 0 : self.systems_destroyed.set(systems_destroyed);
2386 0 : mfs.extend(self.systems_destroyed.collect());
2387 0 : mfs
2388 0 : }
2389 : }
2390 :
2391 : impl Collector {
2392 : #[allow(clippy::new_without_default)]
2393 0 : pub fn new() -> Self {
2394 0 : let mut descs = Vec::new();
2395 0 :
2396 0 : let systems_created = UIntGauge::new(
2397 0 : "pageserver_tokio_epoll_uring_systems_created",
2398 0 : "counter of tokio-epoll-uring systems that were created",
2399 0 : )
2400 0 : .unwrap();
2401 0 : descs.extend(
2402 0 : metrics::core::Collector::desc(&systems_created)
2403 0 : .into_iter()
2404 0 : .cloned(),
2405 0 : );
2406 0 :
2407 0 : let systems_destroyed = UIntGauge::new(
2408 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
2409 0 : "counter of tokio-epoll-uring systems that were destroyed",
2410 0 : )
2411 0 : .unwrap();
2412 0 : descs.extend(
2413 0 : metrics::core::Collector::desc(&systems_destroyed)
2414 0 : .into_iter()
2415 0 : .cloned(),
2416 0 : );
2417 0 :
2418 0 : Self {
2419 0 : descs,
2420 0 : systems_created,
2421 0 : systems_destroyed,
2422 0 : }
2423 0 : }
2424 : }
2425 : }
2426 :
2427 : pub(crate) mod tenant_throttling {
2428 : use metrics::{register_int_counter_vec, IntCounter};
2429 : use once_cell::sync::Lazy;
2430 :
2431 : use crate::tenant::{self, throttle::Metric};
2432 :
2433 : pub(crate) struct TimelineGet {
2434 : wait_time: IntCounter,
2435 : count: IntCounter,
2436 : }
2437 :
2438 82 : pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
2439 82 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2440 82 : register_int_counter_vec!(
2441 82 : "pageserver_tenant_throttling_wait_usecs_sum_global",
2442 82 : "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
2443 82 : &["kind"]
2444 82 : )
2445 82 : .unwrap()
2446 82 : });
2447 82 :
2448 82 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2449 82 : register_int_counter_vec!(
2450 82 : "pageserver_tenant_throttling_count_global",
2451 82 : "Count of tenant throttlings, by kind of throttle.",
2452 82 : &["kind"]
2453 82 : )
2454 82 : .unwrap()
2455 82 : });
2456 82 :
2457 82 : let kind = "timeline_get";
2458 82 : TimelineGet {
2459 82 : wait_time: WAIT_USECS.with_label_values(&[kind]),
2460 82 : count: WAIT_COUNT.with_label_values(&[kind]),
2461 82 : }
2462 82 : });
2463 :
2464 : impl Metric for &'static TimelineGet {
2465 : #[inline(always)]
2466 0 : fn observe_throttling(
2467 0 : &self,
2468 0 : tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
2469 0 : ) {
2470 0 : let val = u64::try_from(wait_time.as_micros()).unwrap();
2471 0 : self.wait_time.inc_by(val);
2472 0 : self.count.inc();
2473 0 : }
2474 : }
2475 : }
2476 :
2477 0 : pub fn preinitialize_metrics() {
2478 0 : // Python tests need these and on some we do alerting.
2479 0 : //
2480 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
2481 0 : // order:
2482 0 : // - global metrics reside in a Lazy<PageserverMetrics>
2483 0 : // - access via crate::metrics::PS_METRICS.materialized_page_cache_hit.inc()
2484 0 : // - could move the statics into TimelineMetrics::new()?
2485 0 :
2486 0 : // counters
2487 0 : [
2488 0 : &MATERIALIZED_PAGE_CACHE_HIT,
2489 0 : &MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
2490 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
2491 0 : &WALRECEIVER_STARTED_CONNECTIONS,
2492 0 : &WALRECEIVER_BROKER_UPDATES,
2493 0 : &WALRECEIVER_CANDIDATES_ADDED,
2494 0 : &WALRECEIVER_CANDIDATES_REMOVED,
2495 0 : ]
2496 0 : .into_iter()
2497 0 : .for_each(|c| {
2498 0 : Lazy::force(c);
2499 0 : });
2500 0 :
2501 0 : // Deletion queue stats
2502 0 : Lazy::force(&DELETION_QUEUE);
2503 0 :
2504 0 : // Tenant stats
2505 0 : Lazy::force(&TENANT);
2506 0 :
2507 0 : // Tenant manager stats
2508 0 : Lazy::force(&TENANT_MANAGER);
2509 0 :
2510 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
2511 0 :
2512 0 : // countervecs
2513 0 : [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
2514 0 : .into_iter()
2515 0 : .for_each(|c| {
2516 0 : Lazy::force(c);
2517 0 : });
2518 0 :
2519 0 : // gauges
2520 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
2521 0 :
2522 0 : // histograms
2523 0 : [
2524 0 : &READ_NUM_FS_LAYERS,
2525 0 : &WAIT_LSN_TIME,
2526 0 : &WAL_REDO_TIME,
2527 0 : &WAL_REDO_RECORDS_HISTOGRAM,
2528 0 : &WAL_REDO_BYTES_HISTOGRAM,
2529 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
2530 0 : ]
2531 0 : .into_iter()
2532 0 : .for_each(|h| {
2533 0 : Lazy::force(h);
2534 0 : });
2535 0 :
2536 0 : // Custom
2537 0 : Lazy::force(&RECONSTRUCT_TIME);
2538 0 : Lazy::force(&tenant_throttling::TIMELINE_GET);
2539 0 : }
|