Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::metric_vec_duration::DurationResultObserver;
3 : use metrics::{
4 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
5 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
6 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
7 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
8 : IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
9 : };
10 : use once_cell::sync::Lazy;
11 : use pageserver_api::shard::TenantShardId;
12 : use strum::{EnumCount, IntoEnumIterator, VariantNames};
13 : use strum_macros::{EnumVariantNames, IntoStaticStr};
14 : use utils::id::TimelineId;
15 :
16 : /// Prometheus histogram buckets (in seconds) for operations in the critical
17 : /// path. In other words, operations that directly affect that latency of user
18 : /// queries.
19 : ///
20 : /// The buckets capture the majority of latencies in the microsecond and
21 : /// millisecond range but also extend far enough up to distinguish "bad" from
22 : /// "really bad".
23 : const CRITICAL_OP_BUCKETS: &[f64] = &[
24 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
25 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
26 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
27 : ];
28 :
29 : // Metrics collected on operations on the storage repository.
30 11099 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
31 : #[strum(serialize_all = "kebab_case")]
32 : pub(crate) enum StorageTimeOperation {
33 : #[strum(serialize = "layer flush")]
34 : LayerFlush,
35 :
36 : #[strum(serialize = "compact")]
37 : Compact,
38 :
39 : #[strum(serialize = "create images")]
40 : CreateImages,
41 :
42 : #[strum(serialize = "logical size")]
43 : LogicalSize,
44 :
45 : #[strum(serialize = "imitate logical size")]
46 : ImitateLogicalSize,
47 :
48 : #[strum(serialize = "load layer map")]
49 : LoadLayerMap,
50 :
51 : #[strum(serialize = "gc")]
52 : Gc,
53 :
54 : #[strum(serialize = "create tenant")]
55 : CreateTenant,
56 : }
57 :
58 637 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
59 637 : register_counter_vec!(
60 637 : "pageserver_storage_operations_seconds_sum",
61 637 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
62 637 : &["operation", "tenant_id", "shard_id", "timeline_id"],
63 637 : )
64 637 : .expect("failed to define a metric")
65 637 : });
66 :
67 637 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
68 637 : register_int_counter_vec!(
69 637 : "pageserver_storage_operations_seconds_count",
70 637 : "Count of storage operations with operation, tenant and timeline dimensions",
71 637 : &["operation", "tenant_id", "shard_id", "timeline_id"],
72 637 : )
73 637 : .expect("failed to define a metric")
74 637 : });
75 :
76 : // Buckets for background operations like compaction, GC, size calculation
77 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
78 :
79 639 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
80 639 : register_histogram_vec!(
81 639 : "pageserver_storage_operations_seconds_global",
82 639 : "Time spent on storage operations",
83 639 : &["operation"],
84 639 : STORAGE_OP_BUCKETS.into(),
85 639 : )
86 639 : .expect("failed to define a metric")
87 639 : });
88 :
89 676 : pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
90 676 : register_histogram!(
91 676 : "pageserver_read_num_fs_layers",
92 676 : "Number of persistent layers accessed for processing a read request, including those in the cache",
93 676 : vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
94 676 : )
95 676 : .expect("failed to define a metric")
96 676 : });
97 :
98 : // Metrics collected on operations on the storage repository.
99 :
100 : pub(crate) struct ReconstructTimeMetrics {
101 : ok: Histogram,
102 : err: Histogram,
103 : }
104 :
105 676 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
106 676 : let inner = register_histogram_vec!(
107 676 : "pageserver_getpage_reconstruct_seconds",
108 676 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
109 676 : &["result"],
110 676 : CRITICAL_OP_BUCKETS.into(),
111 676 : )
112 676 : .expect("failed to define a metric");
113 676 : ReconstructTimeMetrics {
114 676 : ok: inner.get_metric_with_label_values(&["ok"]).unwrap(),
115 676 : err: inner.get_metric_with_label_values(&["err"]).unwrap(),
116 676 : }
117 676 : });
118 :
119 : impl ReconstructTimeMetrics {
120 8623832 : pub(crate) fn for_result<T, E>(&self, result: &Result<T, E>) -> &Histogram {
121 8623832 : match result {
122 8623832 : Ok(_) => &self.ok,
123 0 : Err(_) => &self.err,
124 : }
125 8623832 : }
126 : }
127 :
128 604 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
129 604 : register_int_counter!(
130 604 : "pageserver_materialized_cache_hits_direct_total",
131 604 : "Number of cache hits from materialized page cache without redo",
132 604 : )
133 604 : .expect("failed to define a metric")
134 604 : });
135 :
136 622 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<Histogram> = Lazy::new(|| {
137 622 : register_histogram!(
138 622 : "pageserver_getpage_get_reconstruct_data_seconds",
139 622 : "Time spent in get_reconstruct_value_data",
140 622 : CRITICAL_OP_BUCKETS.into(),
141 622 : )
142 622 : .expect("failed to define a metric")
143 622 : });
144 :
145 604 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
146 604 : register_int_counter!(
147 604 : "pageserver_materialized_cache_hits_total",
148 604 : "Number of cache hits from materialized page cache",
149 604 : )
150 604 : .expect("failed to define a metric")
151 604 : });
152 :
153 : pub(crate) struct GetVectoredLatency {
154 : map: EnumMap<TaskKind, Option<Histogram>>,
155 : }
156 :
157 : impl GetVectoredLatency {
158 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
159 : // cardinality of the metric.
160 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
161 :
162 72383 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
163 72383 : self.map[task_kind].as_ref()
164 72383 : }
165 : }
166 :
167 427 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
168 427 : let inner = register_histogram_vec!(
169 427 : "pageserver_get_vectored_seconds",
170 427 : "Time spent in get_vectored",
171 427 : &["task_kind"],
172 427 : CRITICAL_OP_BUCKETS.into(),
173 427 : )
174 427 : .expect("failed to define a metric");
175 427 :
176 427 : GetVectoredLatency {
177 11172 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
178 11172 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
179 11172 :
180 11172 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
181 854 : let task_kind = task_kind.into();
182 854 : Some(inner.with_label_values(&[task_kind]))
183 : } else {
184 10318 : None
185 : }
186 11172 : })),
187 427 : }
188 427 : });
189 :
190 : pub(crate) struct PageCacheMetricsForTaskKind {
191 : pub read_accesses_materialized_page: IntCounter,
192 : pub read_accesses_immutable: IntCounter,
193 :
194 : pub read_hits_immutable: IntCounter,
195 : pub read_hits_materialized_page_exact: IntCounter,
196 : pub read_hits_materialized_page_older_lsn: IntCounter,
197 : }
198 :
199 : pub(crate) struct PageCacheMetrics {
200 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
201 : }
202 :
203 624 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
204 624 : register_int_counter_vec!(
205 624 : "pageserver_page_cache_read_hits_total",
206 624 : "Number of read accesses to the page cache that hit",
207 624 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
208 624 : )
209 624 : .expect("failed to define a metric")
210 624 : });
211 :
212 624 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
213 624 : register_int_counter_vec!(
214 624 : "pageserver_page_cache_read_accesses_total",
215 624 : "Number of read accesses to the page cache",
216 624 : &["task_kind", "key_kind", "content_kind"]
217 624 : )
218 624 : .expect("failed to define a metric")
219 624 : });
220 :
221 624 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
222 16298 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
223 16298 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
224 16298 : let task_kind: &'static str = task_kind.into();
225 97788 : EnumMap::from_array(std::array::from_fn(|content_kind| {
226 97788 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
227 97788 : let content_kind: &'static str = content_kind.into();
228 97788 : PageCacheMetricsForTaskKind {
229 97788 : read_accesses_materialized_page: {
230 97788 : PAGE_CACHE_READ_ACCESSES
231 97788 : .get_metric_with_label_values(&[
232 97788 : task_kind,
233 97788 : "materialized_page",
234 97788 : content_kind,
235 97788 : ])
236 97788 : .unwrap()
237 97788 : },
238 97788 :
239 97788 : read_accesses_immutable: {
240 97788 : PAGE_CACHE_READ_ACCESSES
241 97788 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
242 97788 : .unwrap()
243 97788 : },
244 97788 :
245 97788 : read_hits_immutable: {
246 97788 : PAGE_CACHE_READ_HITS
247 97788 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
248 97788 : .unwrap()
249 97788 : },
250 97788 :
251 97788 : read_hits_materialized_page_exact: {
252 97788 : PAGE_CACHE_READ_HITS
253 97788 : .get_metric_with_label_values(&[
254 97788 : task_kind,
255 97788 : "materialized_page",
256 97788 : content_kind,
257 97788 : "exact",
258 97788 : ])
259 97788 : .unwrap()
260 97788 : },
261 97788 :
262 97788 : read_hits_materialized_page_older_lsn: {
263 97788 : PAGE_CACHE_READ_HITS
264 97788 : .get_metric_with_label_values(&[
265 97788 : task_kind,
266 97788 : "materialized_page",
267 97788 : content_kind,
268 97788 : "older_lsn",
269 97788 : ])
270 97788 : .unwrap()
271 97788 : },
272 97788 : }
273 97788 : }))
274 16298 : })),
275 624 : });
276 :
277 : impl PageCacheMetrics {
278 379007653 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
279 379007653 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
280 379007653 : }
281 : }
282 :
283 : pub(crate) struct PageCacheSizeMetrics {
284 : pub max_bytes: UIntGauge,
285 :
286 : pub current_bytes_immutable: UIntGauge,
287 : pub current_bytes_materialized_page: UIntGauge,
288 : }
289 :
290 678 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
291 678 : register_uint_gauge_vec!(
292 678 : "pageserver_page_cache_size_current_bytes",
293 678 : "Current size of the page cache in bytes, by key kind",
294 678 : &["key_kind"]
295 678 : )
296 678 : .expect("failed to define a metric")
297 678 : });
298 :
299 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
300 678 : Lazy::new(|| PageCacheSizeMetrics {
301 678 : max_bytes: {
302 678 : register_uint_gauge!(
303 678 : "pageserver_page_cache_size_max_bytes",
304 678 : "Maximum size of the page cache in bytes"
305 678 : )
306 678 : .expect("failed to define a metric")
307 678 : },
308 678 : current_bytes_immutable: {
309 678 : PAGE_CACHE_SIZE_CURRENT_BYTES
310 678 : .get_metric_with_label_values(&["immutable"])
311 678 : .unwrap()
312 678 : },
313 678 : current_bytes_materialized_page: {
314 678 : PAGE_CACHE_SIZE_CURRENT_BYTES
315 678 : .get_metric_with_label_values(&["materialized_page"])
316 678 : .unwrap()
317 678 : },
318 678 : });
319 :
320 : pub(crate) mod page_cache_eviction_metrics {
321 : use std::num::NonZeroUsize;
322 :
323 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
324 : use once_cell::sync::Lazy;
325 :
326 0 : #[derive(Clone, Copy)]
327 : pub(crate) enum Outcome {
328 : FoundSlotUnused { iters: NonZeroUsize },
329 : FoundSlotEvicted { iters: NonZeroUsize },
330 : ItersExceeded { iters: NonZeroUsize },
331 : }
332 :
333 578 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
334 578 : register_int_counter_vec!(
335 578 : "pageserver_page_cache_find_victim_iters_total",
336 578 : "Counter for the number of iterations in the find_victim loop",
337 578 : &["outcome"],
338 578 : )
339 578 : .expect("failed to define a metric")
340 578 : });
341 :
342 578 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
343 578 : register_int_counter_vec!(
344 578 : "pageserver_page_cache_find_victim_calls",
345 578 : "Incremented at the end of each find_victim() call.\
346 578 : Filter by outcome to get e.g., eviction rate.",
347 578 : &["outcome"]
348 578 : )
349 578 : .unwrap()
350 578 : });
351 :
352 12871926 : pub(crate) fn observe(outcome: Outcome) {
353 12871926 : macro_rules! dry {
354 12871926 : ($label:literal, $iters:expr) => {{
355 12871926 : static LABEL: &'static str = $label;
356 12871926 : static ITERS_TOTAL: Lazy<IntCounter> =
357 12871926 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
358 12871926 : static CALLS: Lazy<IntCounter> =
359 12871926 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
360 12871926 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
361 12871926 : CALLS.inc();
362 12871926 : }};
363 12871926 : }
364 12871926 : match outcome {
365 2851616 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
366 10020310 : Outcome::FoundSlotEvicted { iters } => {
367 10020310 : dry!("found_evicted", iters)
368 : }
369 0 : Outcome::ItersExceeded { iters } => {
370 0 : dry!("err_iters_exceeded", iters);
371 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
372 0 : }
373 : }
374 12871926 : }
375 : }
376 :
377 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
378 0 : register_int_counter_vec!(
379 0 : "page_cache_errors_total",
380 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
381 0 : &["error_kind"]
382 0 : )
383 0 : .expect("failed to define a metric")
384 0 : });
385 :
386 0 : #[derive(IntoStaticStr)]
387 : #[strum(serialize_all = "kebab_case")]
388 : pub(crate) enum PageCacheErrorKind {
389 : AcquirePinnedSlotTimeout,
390 : EvictIterLimit,
391 : }
392 :
393 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
394 0 : PAGE_CACHE_ERRORS
395 0 : .get_metric_with_label_values(&[error_kind.into()])
396 0 : .unwrap()
397 0 : .inc();
398 0 : }
399 :
400 614 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
401 614 : register_histogram!(
402 614 : "pageserver_wait_lsn_seconds",
403 614 : "Time spent waiting for WAL to arrive",
404 614 : CRITICAL_OP_BUCKETS.into(),
405 614 : )
406 614 : .expect("failed to define a metric")
407 614 : });
408 :
409 637 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
410 637 : register_int_gauge_vec!(
411 637 : "pageserver_last_record_lsn",
412 637 : "Last record LSN grouped by timeline",
413 637 : &["tenant_id", "shard_id", "timeline_id"]
414 637 : )
415 637 : .expect("failed to define a metric")
416 637 : });
417 :
418 637 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
419 637 : register_uint_gauge_vec!(
420 637 : "pageserver_resident_physical_size",
421 637 : "The size of the layer files present in the pageserver's filesystem.",
422 637 : &["tenant_id", "shard_id", "timeline_id"]
423 637 : )
424 637 : .expect("failed to define a metric")
425 637 : });
426 :
427 635 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
428 635 : register_uint_gauge!(
429 635 : "pageserver_resident_physical_size_global",
430 635 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
431 635 : )
432 635 : .expect("failed to define a metric")
433 635 : });
434 :
435 637 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
436 637 : register_uint_gauge_vec!(
437 637 : "pageserver_remote_physical_size",
438 637 : "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
439 637 : // Corollary: If any files are missing from the index part, they won't be included here.
440 637 : &["tenant_id", "shard_id", "timeline_id"]
441 637 : )
442 637 : .expect("failed to define a metric")
443 637 : });
444 :
445 637 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
446 637 : register_uint_gauge!(
447 637 : "pageserver_remote_physical_size_global",
448 637 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
449 637 : )
450 637 : .expect("failed to define a metric")
451 637 : });
452 :
453 76 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
454 76 : register_int_counter!(
455 76 : "pageserver_remote_ondemand_downloaded_layers_total",
456 76 : "Total on-demand downloaded layers"
457 76 : )
458 76 : .unwrap()
459 76 : });
460 :
461 76 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
462 76 : register_int_counter!(
463 76 : "pageserver_remote_ondemand_downloaded_bytes_total",
464 76 : "Total bytes of layers on-demand downloaded",
465 76 : )
466 76 : .unwrap()
467 76 : });
468 :
469 637 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
470 637 : register_uint_gauge_vec!(
471 637 : "pageserver_current_logical_size",
472 637 : "Current logical size grouped by timeline",
473 637 : &["tenant_id", "shard_id", "timeline_id"]
474 637 : )
475 637 : .expect("failed to define current logical size metric")
476 637 : });
477 :
478 : pub(crate) mod initial_logical_size {
479 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
480 : use once_cell::sync::Lazy;
481 :
482 : pub(crate) struct StartCalculation(IntCounterVec);
483 624 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
484 624 : StartCalculation(
485 624 : register_int_counter_vec!(
486 624 : "pageserver_initial_logical_size_start_calculation",
487 624 : "Incremented each time we start an initial logical size calculation attempt. \
488 624 : The `circumstances` label provides some additional details.",
489 624 : &["attempt", "circumstances"]
490 624 : )
491 624 : .unwrap(),
492 624 : )
493 624 : });
494 :
495 : struct DropCalculation {
496 : first: IntCounter,
497 : retry: IntCounter,
498 : }
499 :
500 624 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
501 624 : let vec = register_int_counter_vec!(
502 624 : "pageserver_initial_logical_size_drop_calculation",
503 624 : "Incremented each time we abort a started size calculation attmpt.",
504 624 : &["attempt"]
505 624 : )
506 624 : .unwrap();
507 624 : DropCalculation {
508 624 : first: vec.with_label_values(&["first"]),
509 624 : retry: vec.with_label_values(&["retry"]),
510 624 : }
511 624 : });
512 :
513 : pub(crate) struct Calculated {
514 : pub(crate) births: IntCounter,
515 : pub(crate) deaths: IntCounter,
516 : }
517 :
518 621 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
519 621 : births: register_int_counter!(
520 621 : "pageserver_initial_logical_size_finish_calculation",
521 621 : "Incremented every time we finish calculation of initial logical size.\
522 621 : If everything is working well, this should happen at most once per Timeline object."
523 621 : )
524 621 : .unwrap(),
525 621 : deaths: register_int_counter!(
526 621 : "pageserver_initial_logical_size_drop_finished_calculation",
527 621 : "Incremented when we drop a finished initial logical size calculation result.\
528 621 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
529 621 : )
530 621 : .unwrap(),
531 621 : });
532 :
533 : pub(crate) struct OngoingCalculationGuard {
534 : inc_drop_calculation: Option<IntCounter>,
535 : }
536 :
537 1301 : #[derive(strum_macros::IntoStaticStr)]
538 : pub(crate) enum StartCircumstances {
539 : EmptyInitial,
540 : SkippedConcurrencyLimiter,
541 : AfterBackgroundTasksRateLimit,
542 : }
543 :
544 : impl StartCalculation {
545 1301 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
546 1301 : let circumstances_label: &'static str = circumstances.into();
547 1301 : self.0
548 1301 : .with_label_values(&["first", circumstances_label])
549 1301 : .inc();
550 1301 : OngoingCalculationGuard {
551 1301 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
552 1301 : }
553 1301 : }
554 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
555 0 : let circumstances_label: &'static str = circumstances.into();
556 0 : self.0
557 0 : .with_label_values(&["retry", circumstances_label])
558 0 : .inc();
559 0 : OngoingCalculationGuard {
560 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
561 0 : }
562 0 : }
563 : }
564 :
565 : impl Drop for OngoingCalculationGuard {
566 1299 : fn drop(&mut self) {
567 1299 : if let Some(counter) = self.inc_drop_calculation.take() {
568 41 : counter.inc();
569 1258 : }
570 1299 : }
571 : }
572 :
573 : impl OngoingCalculationGuard {
574 1258 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
575 1258 : drop(self.inc_drop_calculation.take());
576 1258 : CALCULATED.births.inc();
577 1258 : FinishedCalculationGuard {
578 1258 : inc_on_drop: CALCULATED.deaths.clone(),
579 1258 : }
580 1258 : }
581 : }
582 :
583 : pub(crate) struct FinishedCalculationGuard {
584 : inc_on_drop: IntCounter,
585 : }
586 :
587 : impl Drop for FinishedCalculationGuard {
588 267 : fn drop(&mut self) {
589 267 : self.inc_on_drop.inc();
590 267 : }
591 : }
592 :
593 : // context: https://github.com/neondatabase/neon/issues/5963
594 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
595 38 : Lazy::new(|| {
596 38 : register_int_counter!(
597 38 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
598 38 : "Counter for the following event: walreceiver calls\
599 38 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
600 38 : )
601 38 : .unwrap()
602 38 : });
603 : }
604 :
605 659 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
606 659 : register_uint_gauge_vec!(
607 659 : "pageserver_tenant_states_count",
608 659 : "Count of tenants per state",
609 659 : &["state"]
610 659 : )
611 659 : .expect("Failed to register pageserver_tenant_states_count metric")
612 659 : });
613 :
614 : /// A set of broken tenants.
615 : ///
616 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
617 : /// tenant.
618 175 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
619 175 : register_uint_gauge_vec!(
620 175 : "pageserver_broken_tenants_count",
621 175 : "Set of broken tenants",
622 175 : &["tenant_id", "shard_id"]
623 175 : )
624 175 : .expect("Failed to register pageserver_tenant_states_count metric")
625 175 : });
626 :
627 217 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
628 217 : register_uint_gauge_vec!(
629 217 : "pageserver_tenant_synthetic_cached_size_bytes",
630 217 : "Synthetic size of each tenant in bytes",
631 217 : &["tenant_id"]
632 217 : )
633 217 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
634 217 : });
635 :
636 : // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
637 : // or in testing they estimate how much we would upload if we did.
638 637 : static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
639 637 : register_int_counter_vec!(
640 637 : "pageserver_created_persistent_files_total",
641 637 : "Number of files created that are meant to be uploaded to cloud storage",
642 637 : &["tenant_id", "shard_id", "timeline_id"]
643 637 : )
644 637 : .expect("failed to define a metric")
645 637 : });
646 :
647 637 : static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
648 637 : register_int_counter_vec!(
649 637 : "pageserver_written_persistent_bytes_total",
650 637 : "Total bytes written that are meant to be uploaded to cloud storage",
651 637 : &["tenant_id", "shard_id", "timeline_id"]
652 637 : )
653 637 : .expect("failed to define a metric")
654 637 : });
655 :
656 2 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
657 2 : register_histogram_vec!(
658 2 : "pageserver_eviction_iteration_duration_seconds_global",
659 2 : "Time spent on a single eviction iteration",
660 2 : &["period_secs", "threshold_secs"],
661 2 : STORAGE_OP_BUCKETS.into(),
662 2 : )
663 2 : .expect("failed to define a metric")
664 2 : });
665 :
666 637 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
667 637 : register_int_counter_vec!(
668 637 : "pageserver_evictions",
669 637 : "Number of layers evicted from the pageserver",
670 637 : &["tenant_id", "shard_id", "timeline_id"]
671 637 : )
672 637 : .expect("failed to define a metric")
673 637 : });
674 :
675 637 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
676 637 : register_int_counter_vec!(
677 637 : "pageserver_evictions_with_low_residence_duration",
678 637 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
679 637 : Residence duration is determined using the `residence_duration_data_source`.",
680 637 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
681 637 : )
682 637 : .expect("failed to define a metric")
683 637 : });
684 :
685 604 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
686 604 : register_int_counter!(
687 604 : "pageserver_unexpected_ondemand_downloads_count",
688 604 : "Number of unexpected on-demand downloads. \
689 604 : We log more context for each increment, so, forgo any labels in this metric.",
690 604 : )
691 604 : .expect("failed to define a metric")
692 604 : });
693 :
694 : /// How long did we take to start up? Broken down by labels to describe
695 : /// different phases of startup.
696 604 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
697 604 : register_gauge_vec!(
698 604 : "pageserver_startup_duration_seconds",
699 604 : "Time taken by phases of pageserver startup, in seconds",
700 604 : &["phase"]
701 604 : )
702 604 : .expect("Failed to register pageserver_startup_duration_seconds metric")
703 604 : });
704 :
705 604 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
706 604 : register_uint_gauge!(
707 604 : "pageserver_startup_is_loading",
708 604 : "1 while in initial startup load of tenants, 0 at other times"
709 604 : )
710 604 : .expect("Failed to register pageserver_startup_is_loading")
711 604 : });
712 :
713 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
714 : /// like how long it took to load.
715 : ///
716 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
717 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
718 : /// at a timeline level than tenant level.
719 : pub(crate) struct TenantMetrics {
720 : /// How long did tenants take to go from construction to active state?
721 : pub(crate) activation: Histogram,
722 : pub(crate) preload: Histogram,
723 : pub(crate) attach: Histogram,
724 :
725 : /// How many tenants are included in the initial startup of the pagesrever?
726 : pub(crate) startup_scheduled: IntCounter,
727 : pub(crate) startup_complete: IntCounter,
728 : }
729 :
730 604 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
731 604 : TenantMetrics {
732 604 : activation: register_histogram!(
733 604 : "pageserver_tenant_activation_seconds",
734 604 : "Time taken by tenants to activate, in seconds",
735 604 : CRITICAL_OP_BUCKETS.into()
736 604 : )
737 604 : .expect("Failed to register metric"),
738 604 : preload: register_histogram!(
739 604 : "pageserver_tenant_preload_seconds",
740 604 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
741 604 : CRITICAL_OP_BUCKETS.into()
742 604 : )
743 604 : .expect("Failed to register metric"),
744 604 : attach: register_histogram!(
745 604 : "pageserver_tenant_attach_seconds",
746 604 : "Time taken by tenants to intialize, after remote metadata is already loaded",
747 604 : CRITICAL_OP_BUCKETS.into()
748 604 : )
749 604 : .expect("Failed to register metric"),
750 604 : startup_scheduled: register_int_counter!(
751 604 : "pageserver_tenant_startup_scheduled",
752 604 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
753 604 : ).expect("Failed to register metric"),
754 604 : startup_complete: register_int_counter!(
755 604 : "pageserver_tenant_startup_complete",
756 604 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
757 604 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
758 604 : tenants: such cases will lead to this metric never reaching the scheduled count."
759 604 : ).expect("Failed to register metric"),
760 604 : }
761 604 : });
762 :
763 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
764 0 : #[derive(Debug)]
765 : pub(crate) struct EvictionsWithLowResidenceDuration {
766 : data_source: &'static str,
767 : threshold: Duration,
768 : counter: Option<IntCounter>,
769 : }
770 :
771 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
772 : data_source: &'static str,
773 : threshold: Duration,
774 : }
775 :
776 : impl EvictionsWithLowResidenceDurationBuilder {
777 1576 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
778 1576 : Self {
779 1576 : data_source,
780 1576 : threshold,
781 1576 : }
782 1576 : }
783 :
784 1576 : fn build(
785 1576 : &self,
786 1576 : tenant_id: &str,
787 1576 : shard_id: &str,
788 1576 : timeline_id: &str,
789 1576 : ) -> EvictionsWithLowResidenceDuration {
790 1576 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
791 1576 : .get_metric_with_label_values(&[
792 1576 : tenant_id,
793 1576 : shard_id,
794 1576 : timeline_id,
795 1576 : self.data_source,
796 1576 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
797 1576 : ])
798 1576 : .unwrap();
799 1576 : EvictionsWithLowResidenceDuration {
800 1576 : data_source: self.data_source,
801 1576 : threshold: self.threshold,
802 1576 : counter: Some(counter),
803 1576 : }
804 1576 : }
805 : }
806 :
807 : impl EvictionsWithLowResidenceDuration {
808 1925 : fn threshold_label_value(threshold: Duration) -> String {
809 1925 : format!("{}", threshold.as_secs())
810 1925 : }
811 :
812 2561 : pub fn observe(&self, observed_value: Duration) {
813 2561 : if observed_value < self.threshold {
814 2561 : self.counter
815 2561 : .as_ref()
816 2561 : .expect("nobody calls this function after `remove_from_vec`")
817 2561 : .inc();
818 2561 : }
819 2561 : }
820 :
821 53 : pub fn change_threshold(
822 53 : &mut self,
823 53 : tenant_id: &str,
824 53 : shard_id: &str,
825 53 : timeline_id: &str,
826 53 : new_threshold: Duration,
827 53 : ) {
828 53 : if new_threshold == self.threshold {
829 45 : return;
830 8 : }
831 8 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
832 8 : self.data_source,
833 8 : new_threshold,
834 8 : )
835 8 : .build(tenant_id, shard_id, timeline_id);
836 8 : std::mem::swap(self, &mut with_new);
837 8 : with_new.remove(tenant_id, shard_id, timeline_id);
838 53 : }
839 :
840 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
841 349 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
842 349 : let Some(_counter) = self.counter.take() else {
843 0 : return;
844 : };
845 :
846 349 : let threshold = Self::threshold_label_value(self.threshold);
847 349 :
848 349 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
849 349 : tenant_id,
850 349 : shard_id,
851 349 : timeline_id,
852 349 : self.data_source,
853 349 : &threshold,
854 349 : ]);
855 349 :
856 349 : match removed {
857 0 : Err(e) => {
858 0 : // this has been hit in staging as
859 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
860 0 : // because we can be in the drop path already, don't risk:
861 0 : // - "double-panic => illegal instruction" or
862 0 : // - future "drop panick => abort"
863 0 : //
864 0 : // so just nag: (the error has the labels)
865 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
866 : }
867 : Ok(()) => {
868 : // to help identify cases where we double-remove the same values, let's log all
869 : // deletions?
870 349 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
871 : }
872 : }
873 349 : }
874 : }
875 :
876 : // Metrics collected on disk IO operations
877 : //
878 : // Roughly logarithmic scale.
879 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
880 : 0.000030, // 30 usec
881 : 0.001000, // 1000 usec
882 : 0.030, // 30 ms
883 : 1.000, // 1000 ms
884 : 30.000, // 30000 ms
885 : ];
886 :
887 : /// VirtualFile fs operation variants.
888 : ///
889 : /// Operations:
890 : /// - open ([`std::fs::OpenOptions::open`])
891 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
892 : /// - close-by-replace (close by replacement algorithm)
893 : /// - read (`read_at`)
894 : /// - write (`write_at`)
895 : /// - seek (modify internal position or file length query)
896 : /// - fsync ([`std::fs::File::sync_all`])
897 : /// - metadata ([`std::fs::File::metadata`])
898 : #[derive(
899 6219 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
900 : )]
901 : pub(crate) enum StorageIoOperation {
902 : Open,
903 : OpenAfterReplace,
904 : Close,
905 : CloseByReplace,
906 : Read,
907 : Write,
908 : Seek,
909 : Fsync,
910 : Metadata,
911 : }
912 :
913 : impl StorageIoOperation {
914 6219 : pub fn as_str(&self) -> &'static str {
915 6219 : match self {
916 691 : StorageIoOperation::Open => "open",
917 691 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
918 691 : StorageIoOperation::Close => "close",
919 691 : StorageIoOperation::CloseByReplace => "close-by-replace",
920 691 : StorageIoOperation::Read => "read",
921 691 : StorageIoOperation::Write => "write",
922 691 : StorageIoOperation::Seek => "seek",
923 691 : StorageIoOperation::Fsync => "fsync",
924 691 : StorageIoOperation::Metadata => "metadata",
925 : }
926 6219 : }
927 : }
928 :
929 : /// Tracks time taken by fs operations near VirtualFile.
930 0 : #[derive(Debug)]
931 : pub(crate) struct StorageIoTime {
932 : metrics: [Histogram; StorageIoOperation::COUNT],
933 : }
934 :
935 : impl StorageIoTime {
936 691 : fn new() -> Self {
937 691 : let storage_io_histogram_vec = register_histogram_vec!(
938 691 : "pageserver_io_operations_seconds",
939 691 : "Time spent in IO operations",
940 691 : &["operation"],
941 691 : STORAGE_IO_TIME_BUCKETS.into()
942 691 : )
943 691 : .expect("failed to define a metric");
944 6219 : let metrics = std::array::from_fn(|i| {
945 6219 : let op = StorageIoOperation::from_repr(i).unwrap();
946 6219 : storage_io_histogram_vec
947 6219 : .get_metric_with_label_values(&[op.as_str()])
948 6219 : .unwrap()
949 6219 : });
950 691 : Self { metrics }
951 691 : }
952 :
953 16068595 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
954 16068595 : &self.metrics[op as usize]
955 16068595 : }
956 : }
957 :
958 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
959 :
960 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
961 :
962 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
963 691 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
964 691 : register_int_gauge_vec!(
965 691 : "pageserver_io_operations_bytes_total",
966 691 : "Total amount of bytes read/written in IO operations",
967 691 : &["operation", "tenant_id", "shard_id", "timeline_id"]
968 691 : )
969 691 : .expect("failed to define a metric")
970 691 : });
971 :
972 : #[cfg(not(test))]
973 : pub(crate) mod virtual_file_descriptor_cache {
974 : use super::*;
975 :
976 604 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
977 604 : register_uint_gauge!(
978 604 : "pageserver_virtual_file_descriptor_cache_size_max",
979 604 : "Maximum number of open file descriptors in the cache."
980 604 : )
981 604 : .unwrap()
982 604 : });
983 :
984 : // SIZE_CURRENT: derive it like so:
985 : // ```
986 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
987 : // -ignoring(operation)
988 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
989 : // ```
990 : }
991 :
992 : #[cfg(not(test))]
993 : pub(crate) mod virtual_file_io_engine {
994 : use super::*;
995 :
996 604 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
997 604 : register_uint_gauge_vec!(
998 604 : "pageserver_virtual_file_io_engine_kind",
999 604 : "The configured io engine for VirtualFile",
1000 604 : &["kind"],
1001 604 : )
1002 604 : .unwrap()
1003 604 : });
1004 : }
1005 :
1006 0 : #[derive(Debug)]
1007 : struct GlobalAndPerTimelineHistogram {
1008 : global: Histogram,
1009 : per_tenant_timeline: Histogram,
1010 : }
1011 :
1012 : impl GlobalAndPerTimelineHistogram {
1013 5618363 : fn observe(&self, value: f64) {
1014 5618363 : self.global.observe(value);
1015 5618363 : self.per_tenant_timeline.observe(value);
1016 5618363 : }
1017 : }
1018 :
1019 : struct GlobalAndPerTimelineHistogramTimer<'a> {
1020 : h: &'a GlobalAndPerTimelineHistogram,
1021 : start: std::time::Instant,
1022 : }
1023 :
1024 : impl<'a> Drop for GlobalAndPerTimelineHistogramTimer<'a> {
1025 5618363 : fn drop(&mut self) {
1026 5618363 : let elapsed = self.start.elapsed();
1027 5618363 : self.h.observe(elapsed.as_secs_f64());
1028 5618363 : }
1029 : }
1030 :
1031 : #[derive(
1032 0 : Debug,
1033 0 : Clone,
1034 : Copy,
1035 17495 : IntoStaticStr,
1036 : strum_macros::EnumCount,
1037 3775 : strum_macros::EnumIter,
1038 7890 : strum_macros::FromRepr,
1039 : )]
1040 : #[strum(serialize_all = "snake_case")]
1041 : pub enum SmgrQueryType {
1042 : GetRelExists,
1043 : GetRelSize,
1044 : GetPageAtLsn,
1045 : GetDbSize,
1046 : GetSlruSegment,
1047 : }
1048 :
1049 0 : #[derive(Debug)]
1050 : pub(crate) struct SmgrQueryTimePerTimeline {
1051 : metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
1052 : }
1053 :
1054 639 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1055 639 : register_histogram_vec!(
1056 639 : "pageserver_smgr_query_seconds",
1057 639 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1058 639 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1059 639 : CRITICAL_OP_BUCKETS.into(),
1060 639 : )
1061 639 : .expect("failed to define a metric")
1062 639 : });
1063 :
1064 639 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1065 639 : [
1066 639 : 1,
1067 639 : 10,
1068 639 : 20,
1069 639 : 40,
1070 639 : 60,
1071 639 : 80,
1072 639 : 100,
1073 639 : 200,
1074 639 : 300,
1075 639 : 400,
1076 639 : 500,
1077 639 : 600,
1078 639 : 700,
1079 639 : 800,
1080 639 : 900,
1081 639 : 1_000, // 1ms
1082 639 : 2_000,
1083 639 : 4_000,
1084 639 : 6_000,
1085 639 : 8_000,
1086 639 : 10_000, // 10ms
1087 639 : 20_000,
1088 639 : 40_000,
1089 639 : 60_000,
1090 639 : 80_000,
1091 639 : 100_000,
1092 639 : 200_000,
1093 639 : 400_000,
1094 639 : 600_000,
1095 639 : 800_000,
1096 639 : 1_000_000, // 1s
1097 639 : 2_000_000,
1098 639 : 4_000_000,
1099 639 : 6_000_000,
1100 639 : 8_000_000,
1101 639 : 10_000_000, // 10s
1102 639 : 20_000_000,
1103 639 : 50_000_000,
1104 639 : 100_000_000,
1105 639 : 200_000_000,
1106 639 : 1_000_000_000, // 1000s
1107 639 : ]
1108 639 : .into_iter()
1109 639 : .map(Duration::from_micros)
1110 26199 : .map(|d| d.as_secs_f64())
1111 639 : .collect()
1112 639 : });
1113 :
1114 639 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1115 639 : register_histogram_vec!(
1116 639 : "pageserver_smgr_query_seconds_global",
1117 639 : "Time spent on smgr query handling, aggregated by query type.",
1118 639 : &["smgr_query_type"],
1119 639 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1120 639 : )
1121 639 : .expect("failed to define a metric")
1122 639 : });
1123 :
1124 : impl SmgrQueryTimePerTimeline {
1125 1578 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1126 1578 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1127 1578 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1128 1578 : let timeline_id = timeline_id.to_string();
1129 7890 : let metrics = std::array::from_fn(|i| {
1130 7890 : let op = SmgrQueryType::from_repr(i).unwrap();
1131 7890 : let global = SMGR_QUERY_TIME_GLOBAL
1132 7890 : .get_metric_with_label_values(&[op.into()])
1133 7890 : .unwrap();
1134 7890 : let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1135 7890 : .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id])
1136 7890 : .unwrap();
1137 7890 : GlobalAndPerTimelineHistogram {
1138 7890 : global,
1139 7890 : per_tenant_timeline,
1140 7890 : }
1141 7890 : });
1142 1578 : Self { metrics }
1143 1578 : }
1144 5618364 : pub(crate) fn start_timer(&self, op: SmgrQueryType) -> impl Drop + '_ {
1145 5618364 : let metric = &self.metrics[op as usize];
1146 5618364 : GlobalAndPerTimelineHistogramTimer {
1147 5618364 : h: metric,
1148 5618364 : start: std::time::Instant::now(),
1149 5618364 : }
1150 5618364 : }
1151 : }
1152 :
1153 : #[cfg(test)]
1154 : mod smgr_query_time_tests {
1155 : use pageserver_api::shard::TenantShardId;
1156 : use strum::IntoEnumIterator;
1157 : use utils::id::{TenantId, TimelineId};
1158 :
1159 : // Regression test, we used hard-coded string constants before using an enum.
1160 2 : #[test]
1161 2 : fn op_label_name() {
1162 2 : use super::SmgrQueryType::*;
1163 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1164 2 : (GetRelExists, "get_rel_exists"),
1165 2 : (GetRelSize, "get_rel_size"),
1166 2 : (GetPageAtLsn, "get_page_at_lsn"),
1167 2 : (GetDbSize, "get_db_size"),
1168 2 : (GetSlruSegment, "get_slru_segment"),
1169 2 : ];
1170 12 : for (op, expect) in expect {
1171 10 : let actual: &'static str = op.into();
1172 10 : assert_eq!(actual, expect);
1173 : }
1174 2 : }
1175 :
1176 2 : #[test]
1177 2 : fn basic() {
1178 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1179 :
1180 12 : for op in &ops {
1181 10 : let tenant_id = TenantId::generate();
1182 10 : let timeline_id = TimelineId::generate();
1183 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1184 10 : &TenantShardId::unsharded(tenant_id),
1185 10 : &timeline_id,
1186 10 : );
1187 10 :
1188 20 : let get_counts = || {
1189 20 : let global: u64 = ops
1190 20 : .iter()
1191 100 : .map(|op| metrics.metrics[*op as usize].global.get_sample_count())
1192 20 : .sum();
1193 20 : let per_tenant_timeline: u64 = ops
1194 20 : .iter()
1195 100 : .map(|op| {
1196 100 : metrics.metrics[*op as usize]
1197 100 : .per_tenant_timeline
1198 100 : .get_sample_count()
1199 100 : })
1200 20 : .sum();
1201 20 : (global, per_tenant_timeline)
1202 20 : };
1203 :
1204 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1205 10 : assert_eq!(pre_per_tenant_timeline, 0);
1206 :
1207 10 : let timer = metrics.start_timer(*op);
1208 10 : drop(timer);
1209 10 :
1210 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1211 10 : assert_eq!(post_per_tenant_timeline, 1);
1212 10 : assert!(post_global > pre_global);
1213 : }
1214 2 : }
1215 : }
1216 :
1217 : // keep in sync with control plane Go code so that we can validate
1218 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1219 357 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1220 357 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1221 357 : [
1222 357 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1223 357 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1224 357 : ]
1225 9996 : .map(|ms| (ms as f64) / 1000.0)
1226 357 : });
1227 :
1228 : pub(crate) struct BasebackupQueryTime(HistogramVec);
1229 357 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1230 357 : BasebackupQueryTime({
1231 357 : register_histogram_vec!(
1232 357 : "pageserver_basebackup_query_seconds",
1233 357 : "Histogram of basebackup queries durations, by result type",
1234 357 : &["result"],
1235 357 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1236 357 : )
1237 357 : .expect("failed to define a metric")
1238 357 : })
1239 357 : });
1240 :
1241 : impl DurationResultObserver for BasebackupQueryTime {
1242 630 : fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
1243 630 : let label_value = if res.is_ok() { "ok" } else { "error" };
1244 630 : let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
1245 630 : metric.observe(duration.as_secs_f64());
1246 630 : }
1247 : }
1248 :
1249 443 : pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
1250 443 : register_int_gauge_vec!(
1251 443 : "pageserver_live_connections",
1252 443 : "Number of live network connections",
1253 443 : &["pageserver_connection_kind"]
1254 443 : )
1255 443 : .expect("failed to define a metric")
1256 443 : });
1257 :
1258 : // remote storage metrics
1259 :
1260 : /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
1261 634 : static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::new(|| {
1262 634 : register_int_gauge_vec!(
1263 634 : "pageserver_remote_timeline_client_calls_unfinished",
1264 634 : "Number of ongoing calls to remote timeline client. \
1265 634 : Used to populate pageserver_remote_timeline_client_calls_started. \
1266 634 : This metric is not useful for sampling from Prometheus, but useful in tests.",
1267 634 : &[
1268 634 : "tenant_id",
1269 634 : "shard_id",
1270 634 : "timeline_id",
1271 634 : "file_kind",
1272 634 : "op_kind"
1273 634 : ],
1274 634 : )
1275 634 : .expect("failed to define a metric")
1276 634 : });
1277 :
1278 634 : static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new(|| {
1279 634 : register_histogram_vec!(
1280 634 : "pageserver_remote_timeline_client_calls_started",
1281 634 : "When calling a remote timeline client method, we record the current value \
1282 634 : of the calls_unfinished gauge in this histogram. Plot the histogram \
1283 634 : over time in a heatmap to visualize how many operations were ongoing \
1284 634 : at a given instant. It gives you a better idea of the queue depth \
1285 634 : than plotting the gauge directly, since operations may complete faster \
1286 634 : than the sampling interval.",
1287 634 : &["file_kind", "op_kind"],
1288 634 : // The calls_unfinished gauge is an integer gauge, hence we have integer buckets.
1289 634 : vec![0.0, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 15.0, 20.0, 40.0, 60.0, 80.0, 100.0, 500.0],
1290 634 : )
1291 634 : .expect("failed to define a metric")
1292 634 : });
1293 :
1294 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1295 498 : Lazy::new(|| {
1296 498 : register_int_counter_vec!(
1297 498 : "pageserver_remote_timeline_client_bytes_started",
1298 498 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1299 498 : The increment happens when the operation is scheduled.",
1300 498 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1301 498 : )
1302 498 : .expect("failed to define a metric")
1303 498 : });
1304 :
1305 498 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1306 498 : register_int_counter_vec!(
1307 498 : "pageserver_remote_timeline_client_bytes_finished",
1308 498 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1309 498 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1310 498 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1311 498 : )
1312 498 : .expect("failed to define a metric")
1313 498 : });
1314 :
1315 : pub(crate) struct TenantManagerMetrics {
1316 : pub(crate) tenant_slots: UIntGauge,
1317 : pub(crate) tenant_slot_writes: IntCounter,
1318 : pub(crate) unexpected_errors: IntCounter,
1319 : }
1320 :
1321 606 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1322 606 : TenantManagerMetrics {
1323 606 : tenant_slots: register_uint_gauge!(
1324 606 : "pageserver_tenant_manager_slots",
1325 606 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1326 606 : )
1327 606 : .expect("failed to define a metric"),
1328 606 : tenant_slot_writes: register_int_counter!(
1329 606 : "pageserver_tenant_manager_slot_writes",
1330 606 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1331 606 : )
1332 606 : .expect("failed to define a metric"),
1333 606 : unexpected_errors: register_int_counter!(
1334 606 : "pageserver_tenant_manager_unexpected_errors_total",
1335 606 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1336 606 : )
1337 606 : .expect("failed to define a metric"),
1338 606 : }
1339 606 : });
1340 :
1341 : pub(crate) struct DeletionQueueMetrics {
1342 : pub(crate) keys_submitted: IntCounter,
1343 : pub(crate) keys_dropped: IntCounter,
1344 : pub(crate) keys_executed: IntCounter,
1345 : pub(crate) keys_validated: IntCounter,
1346 : pub(crate) dropped_lsn_updates: IntCounter,
1347 : pub(crate) unexpected_errors: IntCounter,
1348 : pub(crate) remote_errors: IntCounterVec,
1349 : }
1350 617 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1351 617 : DeletionQueueMetrics{
1352 617 :
1353 617 : keys_submitted: register_int_counter!(
1354 617 : "pageserver_deletion_queue_submitted_total",
1355 617 : "Number of objects submitted for deletion"
1356 617 : )
1357 617 : .expect("failed to define a metric"),
1358 617 :
1359 617 : keys_dropped: register_int_counter!(
1360 617 : "pageserver_deletion_queue_dropped_total",
1361 617 : "Number of object deletions dropped due to stale generation."
1362 617 : )
1363 617 : .expect("failed to define a metric"),
1364 617 :
1365 617 : keys_executed: register_int_counter!(
1366 617 : "pageserver_deletion_queue_executed_total",
1367 617 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1368 617 : )
1369 617 : .expect("failed to define a metric"),
1370 617 :
1371 617 : keys_validated: register_int_counter!(
1372 617 : "pageserver_deletion_queue_validated_total",
1373 617 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1374 617 : )
1375 617 : .expect("failed to define a metric"),
1376 617 :
1377 617 : dropped_lsn_updates: register_int_counter!(
1378 617 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1379 617 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1380 617 : )
1381 617 : .expect("failed to define a metric"),
1382 617 : unexpected_errors: register_int_counter!(
1383 617 : "pageserver_deletion_queue_unexpected_errors_total",
1384 617 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1385 617 : )
1386 617 : .expect("failed to define a metric"),
1387 617 : remote_errors: register_int_counter_vec!(
1388 617 : "pageserver_deletion_queue_remote_errors_total",
1389 617 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1390 617 : &["op_kind"],
1391 617 : )
1392 617 : .expect("failed to define a metric")
1393 617 : }
1394 617 : });
1395 :
1396 : pub(crate) struct WalIngestMetrics {
1397 : pub(crate) records_received: IntCounter,
1398 : pub(crate) records_committed: IntCounter,
1399 : pub(crate) records_filtered: IntCounter,
1400 : }
1401 :
1402 480 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
1403 480 : records_received: register_int_counter!(
1404 480 : "pageserver_wal_ingest_records_received",
1405 480 : "Number of WAL records received from safekeepers"
1406 480 : )
1407 480 : .expect("failed to define a metric"),
1408 480 : records_committed: register_int_counter!(
1409 480 : "pageserver_wal_ingest_records_committed",
1410 480 : "Number of WAL records which resulted in writes to pageserver storage"
1411 480 : )
1412 480 : .expect("failed to define a metric"),
1413 480 : records_filtered: register_int_counter!(
1414 480 : "pageserver_wal_ingest_records_filtered",
1415 480 : "Number of WAL records filtered out due to sharding"
1416 480 : )
1417 480 : .expect("failed to define a metric"),
1418 480 : });
1419 : pub(crate) struct SecondaryModeMetrics {
1420 : pub(crate) upload_heatmap: IntCounter,
1421 : pub(crate) upload_heatmap_errors: IntCounter,
1422 : pub(crate) upload_heatmap_duration: Histogram,
1423 : pub(crate) download_heatmap: IntCounter,
1424 : pub(crate) download_layer: IntCounter,
1425 : }
1426 8 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
1427 8 : upload_heatmap: register_int_counter!(
1428 8 : "pageserver_secondary_upload_heatmap",
1429 8 : "Number of heatmaps written to remote storage by attached tenants"
1430 8 : )
1431 8 : .expect("failed to define a metric"),
1432 8 : upload_heatmap_errors: register_int_counter!(
1433 8 : "pageserver_secondary_upload_heatmap_errors",
1434 8 : "Failures writing heatmap to remote storage"
1435 8 : )
1436 8 : .expect("failed to define a metric"),
1437 8 : upload_heatmap_duration: register_histogram!(
1438 8 : "pageserver_secondary_upload_heatmap_duration",
1439 8 : "Time to build and upload a heatmap, including any waiting inside the S3 client"
1440 8 : )
1441 8 : .expect("failed to define a metric"),
1442 8 : download_heatmap: register_int_counter!(
1443 8 : "pageserver_secondary_download_heatmap",
1444 8 : "Number of downloads of heatmaps by secondary mode locations"
1445 8 : )
1446 8 : .expect("failed to define a metric"),
1447 8 : download_layer: register_int_counter!(
1448 8 : "pageserver_secondary_download_layer",
1449 8 : "Number of downloads of layers by secondary mode locations"
1450 8 : )
1451 8 : .expect("failed to define a metric"),
1452 8 : });
1453 :
1454 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1455 : pub enum RemoteOpKind {
1456 : Upload,
1457 : Download,
1458 : Delete,
1459 : }
1460 : impl RemoteOpKind {
1461 222413 : pub fn as_str(&self) -> &'static str {
1462 222413 : match self {
1463 174357 : Self::Upload => "upload",
1464 30674 : Self::Download => "download",
1465 17382 : Self::Delete => "delete",
1466 : }
1467 222413 : }
1468 : }
1469 :
1470 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1471 : pub enum RemoteOpFileKind {
1472 : Layer,
1473 : Index,
1474 : }
1475 : impl RemoteOpFileKind {
1476 222413 : pub fn as_str(&self) -> &'static str {
1477 222413 : match self {
1478 195907 : Self::Layer => "layer",
1479 26506 : Self::Index => "index",
1480 : }
1481 222413 : }
1482 : }
1483 :
1484 634 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1485 634 : register_histogram_vec!(
1486 634 : "pageserver_remote_operation_seconds",
1487 634 : "Time spent on remote storage operations. \
1488 634 : Grouped by tenant, timeline, operation_kind and status. \
1489 634 : Does not account for time spent waiting in remote timeline client's queues.",
1490 634 : &["file_kind", "op_kind", "status"]
1491 634 : )
1492 634 : .expect("failed to define a metric")
1493 634 : });
1494 :
1495 556 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1496 556 : register_int_counter_vec!(
1497 556 : "pageserver_tenant_task_events",
1498 556 : "Number of task start/stop/fail events.",
1499 556 : &["event"],
1500 556 : )
1501 556 : .expect("Failed to register tenant_task_events metric")
1502 556 : });
1503 :
1504 494 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
1505 988 : register_int_counter_pair_vec!(
1506 988 : "pageserver_background_loop_semaphore_wait_start_count",
1507 988 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1508 988 : "pageserver_background_loop_semaphore_wait_finish_count",
1509 988 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1510 988 : &["task"],
1511 988 : )
1512 494 : .unwrap()
1513 494 : });
1514 :
1515 604 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
1516 604 : register_int_counter_vec!(
1517 604 : "pageserver_background_loop_period_overrun_count",
1518 604 : "Incremented whenever warn_when_period_overrun() logs a warning.",
1519 604 : &["task", "period"],
1520 604 : )
1521 604 : .expect("failed to define a metric")
1522 604 : });
1523 :
1524 : // walreceiver metrics
1525 :
1526 604 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
1527 604 : register_int_counter!(
1528 604 : "pageserver_walreceiver_started_connections_total",
1529 604 : "Number of started walreceiver connections"
1530 604 : )
1531 604 : .expect("failed to define a metric")
1532 604 : });
1533 :
1534 604 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
1535 604 : register_int_gauge!(
1536 604 : "pageserver_walreceiver_active_managers",
1537 604 : "Number of active walreceiver managers"
1538 604 : )
1539 604 : .expect("failed to define a metric")
1540 604 : });
1541 :
1542 419 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
1543 419 : register_int_counter_vec!(
1544 419 : "pageserver_walreceiver_switches_total",
1545 419 : "Number of walreceiver manager change_connection calls",
1546 419 : &["reason"]
1547 419 : )
1548 419 : .expect("failed to define a metric")
1549 419 : });
1550 :
1551 604 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
1552 604 : register_int_counter!(
1553 604 : "pageserver_walreceiver_broker_updates_total",
1554 604 : "Number of received broker updates in walreceiver"
1555 604 : )
1556 604 : .expect("failed to define a metric")
1557 604 : });
1558 :
1559 606 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1560 606 : register_int_counter_vec!(
1561 606 : "pageserver_walreceiver_candidates_events_total",
1562 606 : "Number of walreceiver candidate events",
1563 606 : &["event"]
1564 606 : )
1565 606 : .expect("failed to define a metric")
1566 606 : });
1567 :
1568 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
1569 604 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
1570 :
1571 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
1572 606 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
1573 :
1574 : // Metrics collected on WAL redo operations
1575 : //
1576 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
1577 : // for access to the postgres process ('wait') since there is only one for
1578 : // each tenant.
1579 :
1580 : /// Time buckets are small because we want to be able to measure the
1581 : /// smallest redo processing times. These buckets allow us to measure down
1582 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
1583 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
1584 : ///
1585 : /// Values up to 1s are recorded because metrics show that we have redo
1586 : /// durations and lock times larger than 0.250s.
1587 : macro_rules! redo_histogram_time_buckets {
1588 : () => {
1589 : vec![
1590 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
1591 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1592 : 1.000_000,
1593 : ]
1594 : };
1595 : }
1596 :
1597 : /// While we're at it, also measure the amount of records replayed in each
1598 : /// operation. We have a global 'total replayed' counter, but that's not
1599 : /// as useful as 'what is the skew for how many records we replay in one
1600 : /// operation'.
1601 : macro_rules! redo_histogram_count_buckets {
1602 : () => {
1603 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
1604 : };
1605 : }
1606 :
1607 : macro_rules! redo_bytes_histogram_count_buckets {
1608 : () => {
1609 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
1610 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
1611 : vec![
1612 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
1613 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
1614 : ]
1615 : };
1616 : }
1617 :
1618 610 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
1619 610 : register_histogram!(
1620 610 : "pageserver_wal_redo_seconds",
1621 610 : "Time spent on WAL redo",
1622 610 : redo_histogram_time_buckets!()
1623 610 : )
1624 610 : .expect("failed to define a metric")
1625 610 : });
1626 :
1627 610 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1628 610 : register_histogram!(
1629 610 : "pageserver_wal_redo_records_histogram",
1630 610 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
1631 610 : redo_histogram_count_buckets!(),
1632 610 : )
1633 610 : .expect("failed to define a metric")
1634 610 : });
1635 :
1636 610 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1637 610 : register_histogram!(
1638 610 : "pageserver_wal_redo_bytes_histogram",
1639 610 : "Histogram of number of records replayed per redo sent to Postgres",
1640 610 : redo_bytes_histogram_count_buckets!(),
1641 610 : )
1642 610 : .expect("failed to define a metric")
1643 610 : });
1644 :
1645 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
1646 386 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
1647 386 : register_int_counter!(
1648 386 : "pageserver_replayed_wal_records_total",
1649 386 : "Number of WAL records replayed in WAL redo process"
1650 386 : )
1651 386 : .unwrap()
1652 386 : });
1653 :
1654 : #[rustfmt::skip]
1655 610 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1656 610 : register_histogram!(
1657 610 : "pageserver_wal_redo_process_launch_duration",
1658 610 : "Histogram of the duration of successful WalRedoProcess::launch calls",
1659 610 : vec![
1660 610 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
1661 610 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
1662 610 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
1663 610 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
1664 610 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
1665 610 : ],
1666 610 : )
1667 610 : .expect("failed to define a metric")
1668 610 : });
1669 :
1670 : pub(crate) struct WalRedoProcessCounters {
1671 : pub(crate) started: IntCounter,
1672 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
1673 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
1674 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
1675 : }
1676 :
1677 1447 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
1678 : pub(crate) enum WalRedoKillCause {
1679 : WalRedoProcessDrop,
1680 : NoLeakChildDrop,
1681 : Startup,
1682 : }
1683 :
1684 : impl Default for WalRedoProcessCounters {
1685 386 : fn default() -> Self {
1686 386 : let started = register_int_counter!(
1687 386 : "pageserver_wal_redo_process_started_total",
1688 386 : "Number of WAL redo processes started",
1689 386 : )
1690 386 : .unwrap();
1691 386 :
1692 386 : let killed = register_int_counter_vec!(
1693 386 : "pageserver_wal_redo_process_stopped_total",
1694 386 : "Number of WAL redo processes stopped",
1695 386 : &["cause"],
1696 386 : )
1697 386 : .unwrap();
1698 386 :
1699 386 : let active_stderr_logger_tasks_started = register_int_counter!(
1700 386 : "pageserver_walredo_stderr_logger_tasks_started_total",
1701 386 : "Number of active walredo stderr logger tasks that have started",
1702 386 : )
1703 386 : .unwrap();
1704 386 :
1705 386 : let active_stderr_logger_tasks_finished = register_int_counter!(
1706 386 : "pageserver_walredo_stderr_logger_tasks_finished_total",
1707 386 : "Number of active walredo stderr logger tasks that have finished",
1708 386 : )
1709 386 : .unwrap();
1710 386 :
1711 386 : Self {
1712 386 : started,
1713 1158 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
1714 1158 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
1715 1158 : let cause_str: &'static str = cause.into();
1716 1158 : killed.with_label_values(&[cause_str])
1717 1158 : })),
1718 386 : active_stderr_logger_tasks_started,
1719 386 : active_stderr_logger_tasks_finished,
1720 386 : }
1721 386 : }
1722 : }
1723 :
1724 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
1725 : Lazy::new(WalRedoProcessCounters::default);
1726 :
1727 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
1728 : pub(crate) struct StorageTimeMetricsTimer {
1729 : metrics: StorageTimeMetrics,
1730 : start: Instant,
1731 : }
1732 :
1733 : impl StorageTimeMetricsTimer {
1734 10033 : fn new(metrics: StorageTimeMetrics) -> Self {
1735 10033 : Self {
1736 10033 : metrics,
1737 10033 : start: Instant::now(),
1738 10033 : }
1739 10033 : }
1740 :
1741 : /// Record the time from creation to now.
1742 9982 : pub fn stop_and_record(self) {
1743 9982 : let duration = self.start.elapsed().as_secs_f64();
1744 9982 : self.metrics.timeline_sum.inc_by(duration);
1745 9982 : self.metrics.timeline_count.inc();
1746 9982 : self.metrics.global_histogram.observe(duration);
1747 9982 : }
1748 : }
1749 :
1750 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
1751 : /// timeline total sum and count.
1752 10033 : #[derive(Clone, Debug)]
1753 : pub(crate) struct StorageTimeMetrics {
1754 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
1755 : timeline_sum: Counter,
1756 : /// Number of oeprations, per operation, tenant_id and timeline_id
1757 : timeline_count: IntCounter,
1758 : /// Global histogram having only the "operation" label.
1759 : global_histogram: Histogram,
1760 : }
1761 :
1762 : impl StorageTimeMetrics {
1763 10976 : pub fn new(
1764 10976 : operation: StorageTimeOperation,
1765 10976 : tenant_id: &str,
1766 10976 : shard_id: &str,
1767 10976 : timeline_id: &str,
1768 10976 : ) -> Self {
1769 10976 : let operation: &'static str = operation.into();
1770 10976 :
1771 10976 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
1772 10976 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1773 10976 : .unwrap();
1774 10976 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
1775 10976 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1776 10976 : .unwrap();
1777 10976 : let global_histogram = STORAGE_TIME_GLOBAL
1778 10976 : .get_metric_with_label_values(&[operation])
1779 10976 : .unwrap();
1780 10976 :
1781 10976 : StorageTimeMetrics {
1782 10976 : timeline_sum,
1783 10976 : timeline_count,
1784 10976 : global_histogram,
1785 10976 : }
1786 10976 : }
1787 :
1788 : /// Starts timing a new operation.
1789 : ///
1790 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
1791 10033 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
1792 10033 : StorageTimeMetricsTimer::new(self.clone())
1793 10033 : }
1794 : }
1795 :
1796 0 : #[derive(Debug)]
1797 : pub(crate) struct TimelineMetrics {
1798 : tenant_id: String,
1799 : shard_id: String,
1800 : timeline_id: String,
1801 : pub flush_time_histo: StorageTimeMetrics,
1802 : pub compact_time_histo: StorageTimeMetrics,
1803 : pub create_images_time_histo: StorageTimeMetrics,
1804 : pub logical_size_histo: StorageTimeMetrics,
1805 : pub imitate_logical_size_histo: StorageTimeMetrics,
1806 : pub load_layer_map_histo: StorageTimeMetrics,
1807 : pub garbage_collect_histo: StorageTimeMetrics,
1808 : pub last_record_gauge: IntGauge,
1809 : resident_physical_size_gauge: UIntGauge,
1810 : /// copy of LayeredTimeline.current_logical_size
1811 : pub current_logical_size_gauge: UIntGauge,
1812 : pub num_persistent_files_created: IntCounter,
1813 : pub persistent_bytes_written: IntCounter,
1814 : pub evictions: IntCounter,
1815 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
1816 : }
1817 :
1818 : impl TimelineMetrics {
1819 1568 : pub fn new(
1820 1568 : tenant_shard_id: &TenantShardId,
1821 1568 : timeline_id: &TimelineId,
1822 1568 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
1823 1568 : ) -> Self {
1824 1568 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1825 1568 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
1826 1568 : let timeline_id = timeline_id.to_string();
1827 1568 : let flush_time_histo = StorageTimeMetrics::new(
1828 1568 : StorageTimeOperation::LayerFlush,
1829 1568 : &tenant_id,
1830 1568 : &shard_id,
1831 1568 : &timeline_id,
1832 1568 : );
1833 1568 : let compact_time_histo = StorageTimeMetrics::new(
1834 1568 : StorageTimeOperation::Compact,
1835 1568 : &tenant_id,
1836 1568 : &shard_id,
1837 1568 : &timeline_id,
1838 1568 : );
1839 1568 : let create_images_time_histo = StorageTimeMetrics::new(
1840 1568 : StorageTimeOperation::CreateImages,
1841 1568 : &tenant_id,
1842 1568 : &shard_id,
1843 1568 : &timeline_id,
1844 1568 : );
1845 1568 : let logical_size_histo = StorageTimeMetrics::new(
1846 1568 : StorageTimeOperation::LogicalSize,
1847 1568 : &tenant_id,
1848 1568 : &shard_id,
1849 1568 : &timeline_id,
1850 1568 : );
1851 1568 : let imitate_logical_size_histo = StorageTimeMetrics::new(
1852 1568 : StorageTimeOperation::ImitateLogicalSize,
1853 1568 : &tenant_id,
1854 1568 : &shard_id,
1855 1568 : &timeline_id,
1856 1568 : );
1857 1568 : let load_layer_map_histo = StorageTimeMetrics::new(
1858 1568 : StorageTimeOperation::LoadLayerMap,
1859 1568 : &tenant_id,
1860 1568 : &shard_id,
1861 1568 : &timeline_id,
1862 1568 : );
1863 1568 : let garbage_collect_histo = StorageTimeMetrics::new(
1864 1568 : StorageTimeOperation::Gc,
1865 1568 : &tenant_id,
1866 1568 : &shard_id,
1867 1568 : &timeline_id,
1868 1568 : );
1869 1568 : let last_record_gauge = LAST_RECORD_LSN
1870 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1871 1568 : .unwrap();
1872 1568 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
1873 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1874 1568 : .unwrap();
1875 1568 : // TODO: we shouldn't expose this metric
1876 1568 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
1877 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1878 1568 : .unwrap();
1879 1568 : let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
1880 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1881 1568 : .unwrap();
1882 1568 : let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
1883 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1884 1568 : .unwrap();
1885 1568 : let evictions = EVICTIONS
1886 1568 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1887 1568 : .unwrap();
1888 1568 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
1889 1568 : .build(&tenant_id, &shard_id, &timeline_id);
1890 1568 :
1891 1568 : TimelineMetrics {
1892 1568 : tenant_id,
1893 1568 : shard_id,
1894 1568 : timeline_id,
1895 1568 : flush_time_histo,
1896 1568 : compact_time_histo,
1897 1568 : create_images_time_histo,
1898 1568 : logical_size_histo,
1899 1568 : imitate_logical_size_histo,
1900 1568 : garbage_collect_histo,
1901 1568 : load_layer_map_histo,
1902 1568 : last_record_gauge,
1903 1568 : resident_physical_size_gauge,
1904 1568 : current_logical_size_gauge,
1905 1568 : num_persistent_files_created,
1906 1568 : persistent_bytes_written,
1907 1568 : evictions,
1908 1568 : evictions_with_low_residence_duration: std::sync::RwLock::new(
1909 1568 : evictions_with_low_residence_duration,
1910 1568 : ),
1911 1568 : }
1912 1568 : }
1913 :
1914 21942 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
1915 21942 : self.resident_physical_size_add(sz);
1916 21942 : self.num_persistent_files_created.inc_by(1);
1917 21942 : self.persistent_bytes_written.inc_by(sz);
1918 21942 : }
1919 :
1920 7969 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
1921 7969 : self.resident_physical_size_gauge.sub(sz);
1922 7969 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
1923 7969 : }
1924 :
1925 44430 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
1926 44430 : self.resident_physical_size_gauge.add(sz);
1927 44430 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
1928 44430 : }
1929 :
1930 356 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
1931 356 : self.resident_physical_size_gauge.get()
1932 356 : }
1933 : }
1934 :
1935 : impl Drop for TimelineMetrics {
1936 341 : fn drop(&mut self) {
1937 341 : let tenant_id = &self.tenant_id;
1938 341 : let timeline_id = &self.timeline_id;
1939 341 : let shard_id = &self.shard_id;
1940 341 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1941 341 : {
1942 341 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
1943 341 : let _ =
1944 341 : RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1945 341 : }
1946 341 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1947 341 : let _ =
1948 341 : NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1949 341 : let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1950 341 : let _ = EVICTIONS.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1951 341 :
1952 341 : self.evictions_with_low_residence_duration
1953 341 : .write()
1954 341 : .unwrap()
1955 341 : .remove(tenant_id, shard_id, timeline_id);
1956 :
1957 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
1958 : // removed at the end of it. The idea is to have the metrics outlive the
1959 : // entity during which they're observed, e.g., the smgr metrics shall
1960 : // outlive an individual smgr connection, but not the timeline.
1961 :
1962 3069 : for op in StorageTimeOperation::VARIANTS {
1963 2728 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
1964 2728 : op,
1965 2728 : tenant_id,
1966 2728 : shard_id,
1967 2728 : timeline_id,
1968 2728 : ]);
1969 2728 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
1970 2728 : op,
1971 2728 : tenant_id,
1972 2728 : shard_id,
1973 2728 : timeline_id,
1974 2728 : ]);
1975 2728 : }
1976 :
1977 1023 : for op in STORAGE_IO_SIZE_OPERATIONS {
1978 682 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
1979 682 : }
1980 :
1981 2046 : for op in SmgrQueryType::iter() {
1982 1705 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
1983 1705 : op.into(),
1984 1705 : tenant_id,
1985 1705 : shard_id,
1986 1705 : timeline_id,
1987 1705 : ]);
1988 1705 : }
1989 341 : }
1990 : }
1991 :
1992 314 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
1993 314 : // Only shard zero deals in synthetic sizes
1994 314 : if tenant_shard_id.is_zero() {
1995 306 : let tid = tenant_shard_id.tenant_id.to_string();
1996 306 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
1997 306 : }
1998 :
1999 : // we leave the BROKEN_TENANTS_SET entry if any
2000 314 : }
2001 :
2002 : use futures::Future;
2003 : use pin_project_lite::pin_project;
2004 : use std::collections::HashMap;
2005 : use std::pin::Pin;
2006 : use std::sync::{Arc, Mutex};
2007 : use std::task::{Context, Poll};
2008 : use std::time::{Duration, Instant};
2009 :
2010 : use crate::context::{PageContentKind, RequestContext};
2011 : use crate::task_mgr::TaskKind;
2012 :
2013 : /// Maintain a per timeline gauge in addition to the global gauge.
2014 : struct PerTimelineRemotePhysicalSizeGauge {
2015 : last_set: u64,
2016 : gauge: UIntGauge,
2017 : }
2018 :
2019 : impl PerTimelineRemotePhysicalSizeGauge {
2020 1568 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2021 1568 : Self {
2022 1568 : last_set: per_timeline_gauge.get(),
2023 1568 : gauge: per_timeline_gauge,
2024 1568 : }
2025 1568 : }
2026 7629 : fn set(&mut self, sz: u64) {
2027 7629 : self.gauge.set(sz);
2028 7629 : if sz < self.last_set {
2029 72 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
2030 7557 : } else {
2031 7557 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
2032 7557 : };
2033 7629 : self.last_set = sz;
2034 7629 : }
2035 15 : fn get(&self) -> u64 {
2036 15 : self.gauge.get()
2037 15 : }
2038 : }
2039 :
2040 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2041 341 : fn drop(&mut self) {
2042 341 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
2043 341 : }
2044 : }
2045 :
2046 : pub(crate) struct RemoteTimelineClientMetrics {
2047 : tenant_id: String,
2048 : shard_id: String,
2049 : timeline_id: String,
2050 : remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
2051 : calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
2052 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2053 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2054 : }
2055 :
2056 : impl RemoteTimelineClientMetrics {
2057 1581 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2058 1581 : RemoteTimelineClientMetrics {
2059 1581 : tenant_id: tenant_shard_id.tenant_id.to_string(),
2060 1581 : shard_id: format!("{}", tenant_shard_id.shard_slug()),
2061 1581 : timeline_id: timeline_id.to_string(),
2062 1581 : calls_unfinished_gauge: Mutex::new(HashMap::default()),
2063 1581 : bytes_started_counter: Mutex::new(HashMap::default()),
2064 1581 : bytes_finished_counter: Mutex::new(HashMap::default()),
2065 1581 : remote_physical_size_gauge: Mutex::new(None),
2066 1581 : }
2067 1581 : }
2068 :
2069 7629 : pub(crate) fn remote_physical_size_set(&self, sz: u64) {
2070 7629 : let mut guard = self.remote_physical_size_gauge.lock().unwrap();
2071 7629 : let gauge = guard.get_or_insert_with(|| {
2072 1568 : PerTimelineRemotePhysicalSizeGauge::new(
2073 1568 : REMOTE_PHYSICAL_SIZE
2074 1568 : .get_metric_with_label_values(&[
2075 1568 : &self.tenant_id,
2076 1568 : &self.shard_id,
2077 1568 : &self.timeline_id,
2078 1568 : ])
2079 1568 : .unwrap(),
2080 1568 : )
2081 7629 : });
2082 7629 : gauge.set(sz);
2083 7629 : }
2084 :
2085 15 : pub(crate) fn remote_physical_size_get(&self) -> u64 {
2086 15 : let guard = self.remote_physical_size_gauge.lock().unwrap();
2087 15 : guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
2088 15 : }
2089 :
2090 40145 : pub fn remote_operation_time(
2091 40145 : &self,
2092 40145 : file_kind: &RemoteOpFileKind,
2093 40145 : op_kind: &RemoteOpKind,
2094 40145 : status: &'static str,
2095 40145 : ) -> Histogram {
2096 40145 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2097 40145 : REMOTE_OPERATION_TIME
2098 40145 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2099 40145 : .unwrap()
2100 40145 : }
2101 :
2102 75091 : fn calls_unfinished_gauge(
2103 75091 : &self,
2104 75091 : file_kind: &RemoteOpFileKind,
2105 75091 : op_kind: &RemoteOpKind,
2106 75091 : ) -> IntGauge {
2107 75091 : let mut guard = self.calls_unfinished_gauge.lock().unwrap();
2108 75091 : let key = (file_kind.as_str(), op_kind.as_str());
2109 75091 : let metric = guard.entry(key).or_insert_with(move || {
2110 3301 : REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
2111 3301 : .get_metric_with_label_values(&[
2112 3301 : &self.tenant_id,
2113 3301 : &self.shard_id,
2114 3301 : &self.timeline_id,
2115 3301 : key.0,
2116 3301 : key.1,
2117 3301 : ])
2118 3301 : .unwrap()
2119 75091 : });
2120 75091 : metric.clone()
2121 75091 : }
2122 :
2123 44187 : fn calls_started_hist(
2124 44187 : &self,
2125 44187 : file_kind: &RemoteOpFileKind,
2126 44187 : op_kind: &RemoteOpKind,
2127 44187 : ) -> Histogram {
2128 44187 : let key = (file_kind.as_str(), op_kind.as_str());
2129 44187 : REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST
2130 44187 : .get_metric_with_label_values(&[key.0, key.1])
2131 44187 : .unwrap()
2132 44187 : }
2133 :
2134 21990 : fn bytes_started_counter(
2135 21990 : &self,
2136 21990 : file_kind: &RemoteOpFileKind,
2137 21990 : op_kind: &RemoteOpKind,
2138 21990 : ) -> IntCounter {
2139 21990 : let mut guard = self.bytes_started_counter.lock().unwrap();
2140 21990 : let key = (file_kind.as_str(), op_kind.as_str());
2141 21990 : let metric = guard.entry(key).or_insert_with(move || {
2142 942 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2143 942 : .get_metric_with_label_values(&[
2144 942 : &self.tenant_id,
2145 942 : &self.shard_id,
2146 942 : &self.timeline_id,
2147 942 : key.0,
2148 942 : key.1,
2149 942 : ])
2150 942 : .unwrap()
2151 21990 : });
2152 21990 : metric.clone()
2153 21990 : }
2154 :
2155 40988 : fn bytes_finished_counter(
2156 40988 : &self,
2157 40988 : file_kind: &RemoteOpFileKind,
2158 40988 : op_kind: &RemoteOpKind,
2159 40988 : ) -> IntCounter {
2160 40988 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2161 40988 : let key = (file_kind.as_str(), op_kind.as_str());
2162 40988 : let metric = guard.entry(key).or_insert_with(move || {
2163 942 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2164 942 : .get_metric_with_label_values(&[
2165 942 : &self.tenant_id,
2166 942 : &self.shard_id,
2167 942 : &self.timeline_id,
2168 942 : key.0,
2169 942 : key.1,
2170 942 : ])
2171 942 : .unwrap()
2172 40988 : });
2173 40988 : metric.clone()
2174 40988 : }
2175 : }
2176 :
2177 : #[cfg(test)]
2178 : impl RemoteTimelineClientMetrics {
2179 6 : pub fn get_bytes_started_counter_value(
2180 6 : &self,
2181 6 : file_kind: &RemoteOpFileKind,
2182 6 : op_kind: &RemoteOpKind,
2183 6 : ) -> Option<u64> {
2184 6 : let guard = self.bytes_started_counter.lock().unwrap();
2185 6 : let key = (file_kind.as_str(), op_kind.as_str());
2186 6 : guard.get(&key).map(|counter| counter.get())
2187 6 : }
2188 :
2189 6 : pub fn get_bytes_finished_counter_value(
2190 6 : &self,
2191 6 : file_kind: &RemoteOpFileKind,
2192 6 : op_kind: &RemoteOpKind,
2193 6 : ) -> Option<u64> {
2194 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2195 6 : let key = (file_kind.as_str(), op_kind.as_str());
2196 6 : guard.get(&key).map(|counter| counter.get())
2197 6 : }
2198 : }
2199 :
2200 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2201 : #[must_use]
2202 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2203 : /// Decremented on drop.
2204 : calls_unfinished_metric: Option<IntGauge>,
2205 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2206 : bytes_finished: Option<(IntCounter, u64)>,
2207 : }
2208 :
2209 : impl RemoteTimelineClientCallMetricGuard {
2210 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2211 : /// The caller vouches to do the metric updates manually.
2212 33962 : pub fn will_decrement_manually(mut self) {
2213 33962 : let RemoteTimelineClientCallMetricGuard {
2214 33962 : calls_unfinished_metric,
2215 33962 : bytes_finished,
2216 33962 : } = &mut self;
2217 33962 : calls_unfinished_metric.take();
2218 33962 : bytes_finished.take();
2219 33962 : }
2220 : }
2221 :
2222 : impl Drop for RemoteTimelineClientCallMetricGuard {
2223 44186 : fn drop(&mut self) {
2224 44186 : let RemoteTimelineClientCallMetricGuard {
2225 44186 : calls_unfinished_metric,
2226 44186 : bytes_finished,
2227 44186 : } = self;
2228 44186 : if let Some(guard) = calls_unfinished_metric.take() {
2229 10224 : guard.dec();
2230 33962 : }
2231 44186 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2232 0 : bytes_finished_metric.inc_by(*value);
2233 44186 : }
2234 44186 : }
2235 : }
2236 :
2237 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2238 : /// track the byte size of this call in applicable metric(s).
2239 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2240 : /// Do not account for this call's byte size in any metrics.
2241 : /// The `reason` field is there to make the call sites self-documenting
2242 : /// about why they don't need the metric.
2243 : DontTrackSize { reason: &'static str },
2244 : /// Track the byte size of the call in applicable metric(s).
2245 : Bytes(u64),
2246 : }
2247 :
2248 : impl RemoteTimelineClientMetrics {
2249 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2250 : ///
2251 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2252 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2253 : /// is more suitable.
2254 : /// Never do both.
2255 44187 : pub(crate) fn call_begin(
2256 44187 : &self,
2257 44187 : file_kind: &RemoteOpFileKind,
2258 44187 : op_kind: &RemoteOpKind,
2259 44187 : size: RemoteTimelineClientMetricsCallTrackSize,
2260 44187 : ) -> RemoteTimelineClientCallMetricGuard {
2261 44187 : let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
2262 44187 : self.calls_started_hist(file_kind, op_kind)
2263 44187 : .observe(calls_unfinished_metric.get() as f64);
2264 44187 : calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric
2265 :
2266 44187 : let bytes_finished = match size {
2267 22197 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2268 22197 : // nothing to do
2269 22197 : None
2270 : }
2271 21990 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2272 21990 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2273 21990 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2274 21990 : Some((finished_counter, size))
2275 : }
2276 : };
2277 44187 : RemoteTimelineClientCallMetricGuard {
2278 44187 : calls_unfinished_metric: Some(calls_unfinished_metric),
2279 44187 : bytes_finished,
2280 44187 : }
2281 44187 : }
2282 :
2283 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2284 : /// Using the guard object is generally preferable.
2285 : /// See [`call_begin`](Self::call_begin) for more context.
2286 30904 : pub(crate) fn call_end(
2287 30904 : &self,
2288 30904 : file_kind: &RemoteOpFileKind,
2289 30904 : op_kind: &RemoteOpKind,
2290 30904 : size: RemoteTimelineClientMetricsCallTrackSize,
2291 30904 : ) {
2292 30904 : let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
2293 : debug_assert!(
2294 30904 : calls_unfinished_metric.get() > 0,
2295 0 : "begin and end should cancel out"
2296 : );
2297 30904 : calls_unfinished_metric.dec();
2298 30904 : match size {
2299 11906 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2300 18998 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2301 18998 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2302 18998 : }
2303 : }
2304 30904 : }
2305 : }
2306 :
2307 : impl Drop for RemoteTimelineClientMetrics {
2308 354 : fn drop(&mut self) {
2309 354 : let RemoteTimelineClientMetrics {
2310 354 : tenant_id,
2311 354 : shard_id,
2312 354 : timeline_id,
2313 354 : remote_physical_size_gauge,
2314 354 : calls_unfinished_gauge,
2315 354 : bytes_started_counter,
2316 354 : bytes_finished_counter,
2317 354 : } = self;
2318 869 : for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
2319 869 : let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
2320 869 : tenant_id,
2321 869 : shard_id,
2322 869 : timeline_id,
2323 869 : a,
2324 869 : b,
2325 869 : ]);
2326 869 : }
2327 354 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2328 216 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2329 216 : tenant_id,
2330 216 : shard_id,
2331 216 : timeline_id,
2332 216 : a,
2333 216 : b,
2334 216 : ]);
2335 216 : }
2336 354 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2337 216 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2338 216 : tenant_id,
2339 216 : shard_id,
2340 216 : timeline_id,
2341 216 : a,
2342 216 : b,
2343 216 : ]);
2344 216 : }
2345 354 : {
2346 354 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2347 354 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2348 354 : }
2349 354 : }
2350 : }
2351 :
2352 : /// Wrapper future that measures the time spent by a remote storage operation,
2353 : /// and records the time and success/failure as a prometheus metric.
2354 : pub(crate) trait MeasureRemoteOp: Sized {
2355 40155 : fn measure_remote_op(
2356 40155 : self,
2357 40155 : file_kind: RemoteOpFileKind,
2358 40155 : op: RemoteOpKind,
2359 40155 : metrics: Arc<RemoteTimelineClientMetrics>,
2360 40155 : ) -> MeasuredRemoteOp<Self> {
2361 40155 : let start = Instant::now();
2362 40155 : MeasuredRemoteOp {
2363 40155 : inner: self,
2364 40155 : file_kind,
2365 40155 : op,
2366 40155 : start,
2367 40155 : metrics,
2368 40155 : }
2369 40155 : }
2370 : }
2371 :
2372 : impl<T: Sized> MeasureRemoteOp for T {}
2373 :
2374 : pin_project! {
2375 : pub(crate) struct MeasuredRemoteOp<F>
2376 : {
2377 : #[pin]
2378 : inner: F,
2379 : file_kind: RemoteOpFileKind,
2380 : op: RemoteOpKind,
2381 : start: Instant,
2382 : metrics: Arc<RemoteTimelineClientMetrics>,
2383 : }
2384 : }
2385 :
2386 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
2387 : type Output = Result<O, E>;
2388 :
2389 1599134 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
2390 1599134 : let this = self.project();
2391 1599134 : let poll_result = this.inner.poll(cx);
2392 1599134 : if let Poll::Ready(ref res) = poll_result {
2393 40145 : let duration = this.start.elapsed();
2394 40145 : let status = if res.is_ok() { &"success" } else { &"failure" };
2395 40145 : this.metrics
2396 40145 : .remote_operation_time(this.file_kind, this.op, status)
2397 40145 : .observe(duration.as_secs_f64());
2398 1558989 : }
2399 1599134 : poll_result
2400 1599134 : }
2401 : }
2402 :
2403 604 : pub fn preinitialize_metrics() {
2404 604 : // Python tests need these and on some we do alerting.
2405 604 : //
2406 604 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
2407 604 : // order:
2408 604 : // - global metrics reside in a Lazy<PageserverMetrics>
2409 604 : // - access via crate::metrics::PS_METRICS.materialized_page_cache_hit.inc()
2410 604 : // - could move the statics into TimelineMetrics::new()?
2411 604 :
2412 604 : // counters
2413 604 : [
2414 604 : &MATERIALIZED_PAGE_CACHE_HIT,
2415 604 : &MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
2416 604 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
2417 604 : &WALRECEIVER_STARTED_CONNECTIONS,
2418 604 : &WALRECEIVER_BROKER_UPDATES,
2419 604 : &WALRECEIVER_CANDIDATES_ADDED,
2420 604 : &WALRECEIVER_CANDIDATES_REMOVED,
2421 604 : ]
2422 604 : .into_iter()
2423 4228 : .for_each(|c| {
2424 4228 : Lazy::force(c);
2425 4228 : });
2426 604 :
2427 604 : // Deletion queue stats
2428 604 : Lazy::force(&DELETION_QUEUE);
2429 604 :
2430 604 : // Tenant stats
2431 604 : Lazy::force(&TENANT);
2432 604 :
2433 604 : // Tenant manager stats
2434 604 : Lazy::force(&TENANT_MANAGER);
2435 604 :
2436 604 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
2437 604 :
2438 604 : // countervecs
2439 604 : [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
2440 604 : .into_iter()
2441 604 : .for_each(|c| {
2442 604 : Lazy::force(c);
2443 604 : });
2444 604 :
2445 604 : // gauges
2446 604 : WALRECEIVER_ACTIVE_MANAGERS.get();
2447 604 :
2448 604 : // histograms
2449 604 : [
2450 604 : &READ_NUM_FS_LAYERS,
2451 604 : &WAIT_LSN_TIME,
2452 604 : &WAL_REDO_TIME,
2453 604 : &WAL_REDO_RECORDS_HISTOGRAM,
2454 604 : &WAL_REDO_BYTES_HISTOGRAM,
2455 604 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
2456 604 : ]
2457 604 : .into_iter()
2458 3624 : .for_each(|h| {
2459 3624 : Lazy::force(h);
2460 3624 : });
2461 604 :
2462 604 : // Custom
2463 604 : Lazy::force(&RECONSTRUCT_TIME);
2464 604 : }
|