Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::metric_vec_duration::DurationResultObserver;
3 : use metrics::{
4 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
5 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
6 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
7 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
8 : IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
9 : };
10 : use once_cell::sync::Lazy;
11 : use pageserver_api::shard::TenantShardId;
12 : use strum::{EnumCount, IntoEnumIterator, VariantNames};
13 : use strum_macros::{EnumVariantNames, IntoStaticStr};
14 : use utils::id::TimelineId;
15 :
16 : /// Prometheus histogram buckets (in seconds) for operations in the critical
17 : /// path. In other words, operations that directly affect that latency of user
18 : /// queries.
19 : ///
20 : /// The buckets capture the majority of latencies in the microsecond and
21 : /// millisecond range but also extend far enough up to distinguish "bad" from
22 : /// "really bad".
23 : const CRITICAL_OP_BUCKETS: &[f64] = &[
24 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
25 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
26 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
27 : ];
28 :
29 : // Metrics collected on operations on the storage repository.
30 11265 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
31 : #[strum(serialize_all = "kebab_case")]
32 : pub(crate) enum StorageTimeOperation {
33 : #[strum(serialize = "layer flush")]
34 : LayerFlush,
35 :
36 : #[strum(serialize = "compact")]
37 : Compact,
38 :
39 : #[strum(serialize = "create images")]
40 : CreateImages,
41 :
42 : #[strum(serialize = "logical size")]
43 : LogicalSize,
44 :
45 : #[strum(serialize = "imitate logical size")]
46 : ImitateLogicalSize,
47 :
48 : #[strum(serialize = "load layer map")]
49 : LoadLayerMap,
50 :
51 : #[strum(serialize = "gc")]
52 : Gc,
53 :
54 : #[strum(serialize = "create tenant")]
55 : CreateTenant,
56 : }
57 :
58 652 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
59 652 : register_counter_vec!(
60 652 : "pageserver_storage_operations_seconds_sum",
61 652 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
62 652 : &["operation", "tenant_id", "shard_id", "timeline_id"],
63 652 : )
64 652 : .expect("failed to define a metric")
65 652 : });
66 :
67 652 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
68 652 : register_int_counter_vec!(
69 652 : "pageserver_storage_operations_seconds_count",
70 652 : "Count of storage operations with operation, tenant and timeline dimensions",
71 652 : &["operation", "tenant_id", "shard_id", "timeline_id"],
72 652 : )
73 652 : .expect("failed to define a metric")
74 652 : });
75 :
76 : // Buckets for background operations like compaction, GC, size calculation
77 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
78 :
79 654 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
80 654 : register_histogram_vec!(
81 654 : "pageserver_storage_operations_seconds_global",
82 654 : "Time spent on storage operations",
83 654 : &["operation"],
84 654 : STORAGE_OP_BUCKETS.into(),
85 654 : )
86 654 : .expect("failed to define a metric")
87 654 : });
88 :
89 696 : pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
90 696 : register_histogram!(
91 696 : "pageserver_read_num_fs_layers",
92 696 : "Number of persistent layers accessed for processing a read request, including those in the cache",
93 696 : vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
94 696 : )
95 696 : .expect("failed to define a metric")
96 696 : });
97 :
98 : // Metrics collected on operations on the storage repository.
99 :
100 : pub(crate) struct ReconstructTimeMetrics {
101 : ok: Histogram,
102 : err: Histogram,
103 : }
104 :
105 696 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
106 696 : let inner = register_histogram_vec!(
107 696 : "pageserver_getpage_reconstruct_seconds",
108 696 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
109 696 : &["result"],
110 696 : CRITICAL_OP_BUCKETS.into(),
111 696 : )
112 696 : .expect("failed to define a metric");
113 696 : ReconstructTimeMetrics {
114 696 : ok: inner.get_metric_with_label_values(&["ok"]).unwrap(),
115 696 : err: inner.get_metric_with_label_values(&["err"]).unwrap(),
116 696 : }
117 696 : });
118 :
119 : impl ReconstructTimeMetrics {
120 7349665 : pub(crate) fn for_result<T, E>(&self, result: &Result<T, E>) -> &Histogram {
121 7349665 : match result {
122 7349665 : Ok(_) => &self.ok,
123 0 : Err(_) => &self.err,
124 : }
125 7349665 : }
126 : }
127 :
128 624 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
129 624 : register_int_counter!(
130 624 : "pageserver_materialized_cache_hits_direct_total",
131 624 : "Number of cache hits from materialized page cache without redo",
132 624 : )
133 624 : .expect("failed to define a metric")
134 624 : });
135 :
136 637 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<Histogram> = Lazy::new(|| {
137 637 : register_histogram!(
138 637 : "pageserver_getpage_get_reconstruct_data_seconds",
139 637 : "Time spent in get_reconstruct_value_data",
140 637 : CRITICAL_OP_BUCKETS.into(),
141 637 : )
142 637 : .expect("failed to define a metric")
143 637 : });
144 :
145 624 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
146 624 : register_int_counter!(
147 624 : "pageserver_materialized_cache_hits_total",
148 624 : "Number of cache hits from materialized page cache",
149 624 : )
150 624 : .expect("failed to define a metric")
151 624 : });
152 :
153 : pub(crate) struct GetVectoredLatency {
154 : map: EnumMap<TaskKind, Option<Histogram>>,
155 : }
156 :
157 : impl GetVectoredLatency {
158 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
159 : // cardinality of the metric.
160 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
161 :
162 78952 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
163 78952 : self.map[task_kind].as_ref()
164 78952 : }
165 : }
166 :
167 430 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
168 430 : let inner = register_histogram_vec!(
169 430 : "pageserver_get_vectored_seconds",
170 430 : "Time spent in get_vectored",
171 430 : &["task_kind"],
172 430 : CRITICAL_OP_BUCKETS.into(),
173 430 : )
174 430 : .expect("failed to define a metric");
175 430 :
176 430 : GetVectoredLatency {
177 11250 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
178 11250 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
179 11250 :
180 11250 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
181 860 : let task_kind = task_kind.into();
182 860 : Some(inner.with_label_values(&[task_kind]))
183 : } else {
184 10390 : None
185 : }
186 11250 : })),
187 430 : }
188 430 : });
189 :
190 : pub(crate) struct PageCacheMetricsForTaskKind {
191 : pub read_accesses_materialized_page: IntCounter,
192 : pub read_accesses_immutable: IntCounter,
193 :
194 : pub read_hits_immutable: IntCounter,
195 : pub read_hits_materialized_page_exact: IntCounter,
196 : pub read_hits_materialized_page_older_lsn: IntCounter,
197 : }
198 :
199 : pub(crate) struct PageCacheMetrics {
200 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
201 : }
202 :
203 639 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
204 639 : register_int_counter_vec!(
205 639 : "pageserver_page_cache_read_hits_total",
206 639 : "Number of read accesses to the page cache that hit",
207 639 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
208 639 : )
209 639 : .expect("failed to define a metric")
210 639 : });
211 :
212 639 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
213 639 : register_int_counter_vec!(
214 639 : "pageserver_page_cache_read_accesses_total",
215 639 : "Number of read accesses to the page cache",
216 639 : &["task_kind", "key_kind", "content_kind"]
217 639 : )
218 639 : .expect("failed to define a metric")
219 639 : });
220 :
221 639 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
222 16688 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
223 16688 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
224 16688 : let task_kind: &'static str = task_kind.into();
225 100128 : EnumMap::from_array(std::array::from_fn(|content_kind| {
226 100128 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
227 100128 : let content_kind: &'static str = content_kind.into();
228 100128 : PageCacheMetricsForTaskKind {
229 100128 : read_accesses_materialized_page: {
230 100128 : PAGE_CACHE_READ_ACCESSES
231 100128 : .get_metric_with_label_values(&[
232 100128 : task_kind,
233 100128 : "materialized_page",
234 100128 : content_kind,
235 100128 : ])
236 100128 : .unwrap()
237 100128 : },
238 100128 :
239 100128 : read_accesses_immutable: {
240 100128 : PAGE_CACHE_READ_ACCESSES
241 100128 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
242 100128 : .unwrap()
243 100128 : },
244 100128 :
245 100128 : read_hits_immutable: {
246 100128 : PAGE_CACHE_READ_HITS
247 100128 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
248 100128 : .unwrap()
249 100128 : },
250 100128 :
251 100128 : read_hits_materialized_page_exact: {
252 100128 : PAGE_CACHE_READ_HITS
253 100128 : .get_metric_with_label_values(&[
254 100128 : task_kind,
255 100128 : "materialized_page",
256 100128 : content_kind,
257 100128 : "exact",
258 100128 : ])
259 100128 : .unwrap()
260 100128 : },
261 100128 :
262 100128 : read_hits_materialized_page_older_lsn: {
263 100128 : PAGE_CACHE_READ_HITS
264 100128 : .get_metric_with_label_values(&[
265 100128 : task_kind,
266 100128 : "materialized_page",
267 100128 : content_kind,
268 100128 : "older_lsn",
269 100128 : ])
270 100128 : .unwrap()
271 100128 : },
272 100128 : }
273 100128 : }))
274 16688 : })),
275 639 : });
276 :
277 : impl PageCacheMetrics {
278 350194888 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
279 350194888 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
280 350194888 : }
281 : }
282 :
283 : pub(crate) struct PageCacheSizeMetrics {
284 : pub max_bytes: UIntGauge,
285 :
286 : pub current_bytes_immutable: UIntGauge,
287 : pub current_bytes_materialized_page: UIntGauge,
288 : }
289 :
290 698 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
291 698 : register_uint_gauge_vec!(
292 698 : "pageserver_page_cache_size_current_bytes",
293 698 : "Current size of the page cache in bytes, by key kind",
294 698 : &["key_kind"]
295 698 : )
296 698 : .expect("failed to define a metric")
297 698 : });
298 :
299 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
300 698 : Lazy::new(|| PageCacheSizeMetrics {
301 698 : max_bytes: {
302 698 : register_uint_gauge!(
303 698 : "pageserver_page_cache_size_max_bytes",
304 698 : "Maximum size of the page cache in bytes"
305 698 : )
306 698 : .expect("failed to define a metric")
307 698 : },
308 698 : current_bytes_immutable: {
309 698 : PAGE_CACHE_SIZE_CURRENT_BYTES
310 698 : .get_metric_with_label_values(&["immutable"])
311 698 : .unwrap()
312 698 : },
313 698 : current_bytes_materialized_page: {
314 698 : PAGE_CACHE_SIZE_CURRENT_BYTES
315 698 : .get_metric_with_label_values(&["materialized_page"])
316 698 : .unwrap()
317 698 : },
318 698 : });
319 :
320 : pub(crate) mod page_cache_eviction_metrics {
321 : use std::num::NonZeroUsize;
322 :
323 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
324 : use once_cell::sync::Lazy;
325 :
326 0 : #[derive(Clone, Copy)]
327 : pub(crate) enum Outcome {
328 : FoundSlotUnused { iters: NonZeroUsize },
329 : FoundSlotEvicted { iters: NonZeroUsize },
330 : ItersExceeded { iters: NonZeroUsize },
331 : }
332 :
333 593 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
334 593 : register_int_counter_vec!(
335 593 : "pageserver_page_cache_find_victim_iters_total",
336 593 : "Counter for the number of iterations in the find_victim loop",
337 593 : &["outcome"],
338 593 : )
339 593 : .expect("failed to define a metric")
340 593 : });
341 :
342 593 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
343 593 : register_int_counter_vec!(
344 593 : "pageserver_page_cache_find_victim_calls",
345 593 : "Incremented at the end of each find_victim() call.\
346 593 : Filter by outcome to get e.g., eviction rate.",
347 593 : &["outcome"]
348 593 : )
349 593 : .unwrap()
350 593 : });
351 :
352 11148096 : pub(crate) fn observe(outcome: Outcome) {
353 11148096 : macro_rules! dry {
354 11148096 : ($label:literal, $iters:expr) => {{
355 11148096 : static LABEL: &'static str = $label;
356 11148096 : static ITERS_TOTAL: Lazy<IntCounter> =
357 11148096 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
358 11148096 : static CALLS: Lazy<IntCounter> =
359 11148096 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
360 11148096 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
361 11148096 : CALLS.inc();
362 11148096 : }};
363 11148096 : }
364 11148096 : match outcome {
365 2866046 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
366 8282050 : Outcome::FoundSlotEvicted { iters } => {
367 8282050 : dry!("found_evicted", iters)
368 : }
369 0 : Outcome::ItersExceeded { iters } => {
370 0 : dry!("err_iters_exceeded", iters);
371 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
372 0 : }
373 : }
374 11148096 : }
375 : }
376 :
377 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
378 0 : register_int_counter_vec!(
379 0 : "page_cache_errors_total",
380 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
381 0 : &["error_kind"]
382 0 : )
383 0 : .expect("failed to define a metric")
384 0 : });
385 :
386 0 : #[derive(IntoStaticStr)]
387 : #[strum(serialize_all = "kebab_case")]
388 : pub(crate) enum PageCacheErrorKind {
389 : AcquirePinnedSlotTimeout,
390 : EvictIterLimit,
391 : }
392 :
393 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
394 0 : PAGE_CACHE_ERRORS
395 0 : .get_metric_with_label_values(&[error_kind.into()])
396 0 : .unwrap()
397 0 : .inc();
398 0 : }
399 :
400 634 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
401 634 : register_histogram!(
402 634 : "pageserver_wait_lsn_seconds",
403 634 : "Time spent waiting for WAL to arrive",
404 634 : CRITICAL_OP_BUCKETS.into(),
405 634 : )
406 634 : .expect("failed to define a metric")
407 634 : });
408 :
409 652 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
410 652 : register_int_gauge_vec!(
411 652 : "pageserver_last_record_lsn",
412 652 : "Last record LSN grouped by timeline",
413 652 : &["tenant_id", "shard_id", "timeline_id"]
414 652 : )
415 652 : .expect("failed to define a metric")
416 652 : });
417 :
418 652 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
419 652 : register_uint_gauge_vec!(
420 652 : "pageserver_resident_physical_size",
421 652 : "The size of the layer files present in the pageserver's filesystem.",
422 652 : &["tenant_id", "shard_id", "timeline_id"]
423 652 : )
424 652 : .expect("failed to define a metric")
425 652 : });
426 :
427 650 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
428 650 : register_uint_gauge!(
429 650 : "pageserver_resident_physical_size_global",
430 650 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
431 650 : )
432 650 : .expect("failed to define a metric")
433 650 : });
434 :
435 652 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
436 652 : register_uint_gauge_vec!(
437 652 : "pageserver_remote_physical_size",
438 652 : "The size of the layer files present in the remote storage that are listed in the the remote index_part.json.",
439 652 : // Corollary: If any files are missing from the index part, they won't be included here.
440 652 : &["tenant_id", "shard_id", "timeline_id"]
441 652 : )
442 652 : .expect("failed to define a metric")
443 652 : });
444 :
445 652 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
446 652 : register_uint_gauge!(
447 652 : "pageserver_remote_physical_size_global",
448 652 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
449 652 : )
450 652 : .expect("failed to define a metric")
451 652 : });
452 :
453 88 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
454 88 : register_int_counter!(
455 88 : "pageserver_remote_ondemand_downloaded_layers_total",
456 88 : "Total on-demand downloaded layers"
457 88 : )
458 88 : .unwrap()
459 88 : });
460 :
461 88 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
462 88 : register_int_counter!(
463 88 : "pageserver_remote_ondemand_downloaded_bytes_total",
464 88 : "Total bytes of layers on-demand downloaded",
465 88 : )
466 88 : .unwrap()
467 88 : });
468 :
469 652 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
470 652 : register_uint_gauge_vec!(
471 652 : "pageserver_current_logical_size",
472 652 : "Current logical size grouped by timeline",
473 652 : &["tenant_id", "shard_id", "timeline_id"]
474 652 : )
475 652 : .expect("failed to define current logical size metric")
476 652 : });
477 :
478 : pub(crate) mod initial_logical_size {
479 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
480 : use once_cell::sync::Lazy;
481 :
482 : pub(crate) struct StartCalculation(IntCounterVec);
483 633 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
484 633 : StartCalculation(
485 633 : register_int_counter_vec!(
486 633 : "pageserver_initial_logical_size_start_calculation",
487 633 : "Incremented each time we start an initial logical size calculation attempt. \
488 633 : The `circumstances` label provides some additional details.",
489 633 : &["attempt", "circumstances"]
490 633 : )
491 633 : .unwrap(),
492 633 : )
493 633 : });
494 :
495 : struct DropCalculation {
496 : first: IntCounter,
497 : retry: IntCounter,
498 : }
499 :
500 633 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
501 633 : let vec = register_int_counter_vec!(
502 633 : "pageserver_initial_logical_size_drop_calculation",
503 633 : "Incremented each time we abort a started size calculation attmpt.",
504 633 : &["attempt"]
505 633 : )
506 633 : .unwrap();
507 633 : DropCalculation {
508 633 : first: vec.with_label_values(&["first"]),
509 633 : retry: vec.with_label_values(&["retry"]),
510 633 : }
511 633 : });
512 :
513 : pub(crate) struct Calculated {
514 : pub(crate) births: IntCounter,
515 : pub(crate) deaths: IntCounter,
516 : }
517 :
518 630 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
519 630 : births: register_int_counter!(
520 630 : "pageserver_initial_logical_size_finish_calculation",
521 630 : "Incremented every time we finish calculation of initial logical size.\
522 630 : If everything is working well, this should happen at most once per Timeline object."
523 630 : )
524 630 : .unwrap(),
525 630 : deaths: register_int_counter!(
526 630 : "pageserver_initial_logical_size_drop_finished_calculation",
527 630 : "Incremented when we drop a finished initial logical size calculation result.\
528 630 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
529 630 : )
530 630 : .unwrap(),
531 630 : });
532 :
533 : pub(crate) struct OngoingCalculationGuard {
534 : inc_drop_calculation: Option<IntCounter>,
535 : }
536 :
537 1304 : #[derive(strum_macros::IntoStaticStr)]
538 : pub(crate) enum StartCircumstances {
539 : EmptyInitial,
540 : SkippedConcurrencyLimiter,
541 : AfterBackgroundTasksRateLimit,
542 : }
543 :
544 : impl StartCalculation {
545 1304 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
546 1304 : let circumstances_label: &'static str = circumstances.into();
547 1304 : self.0
548 1304 : .with_label_values(&["first", circumstances_label])
549 1304 : .inc();
550 1304 : OngoingCalculationGuard {
551 1304 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
552 1304 : }
553 1304 : }
554 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
555 0 : let circumstances_label: &'static str = circumstances.into();
556 0 : self.0
557 0 : .with_label_values(&["retry", circumstances_label])
558 0 : .inc();
559 0 : OngoingCalculationGuard {
560 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
561 0 : }
562 0 : }
563 : }
564 :
565 : impl Drop for OngoingCalculationGuard {
566 1303 : fn drop(&mut self) {
567 1303 : if let Some(counter) = self.inc_drop_calculation.take() {
568 33 : counter.inc();
569 1270 : }
570 1303 : }
571 : }
572 :
573 : impl OngoingCalculationGuard {
574 1270 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
575 1270 : drop(self.inc_drop_calculation.take());
576 1270 : CALCULATED.births.inc();
577 1270 : FinishedCalculationGuard {
578 1270 : inc_on_drop: CALCULATED.deaths.clone(),
579 1270 : }
580 1270 : }
581 : }
582 :
583 : pub(crate) struct FinishedCalculationGuard {
584 : inc_on_drop: IntCounter,
585 : }
586 :
587 : impl Drop for FinishedCalculationGuard {
588 276 : fn drop(&mut self) {
589 276 : self.inc_on_drop.inc();
590 276 : }
591 : }
592 :
593 : // context: https://github.com/neondatabase/neon/issues/5963
594 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
595 41 : Lazy::new(|| {
596 41 : register_int_counter!(
597 41 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
598 41 : "Counter for the following event: walreceiver calls\
599 41 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
600 41 : )
601 41 : .unwrap()
602 41 : });
603 : }
604 :
605 675 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
606 675 : register_uint_gauge_vec!(
607 675 : "pageserver_tenant_states_count",
608 675 : "Count of tenants per state",
609 675 : &["state"]
610 675 : )
611 675 : .expect("Failed to register pageserver_tenant_states_count metric")
612 675 : });
613 :
614 : /// A set of broken tenants.
615 : ///
616 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
617 : /// tenant.
618 184 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
619 184 : register_uint_gauge_vec!(
620 184 : "pageserver_broken_tenants_count",
621 184 : "Set of broken tenants",
622 184 : &["tenant_id", "shard_id"]
623 184 : )
624 184 : .expect("Failed to register pageserver_tenant_states_count metric")
625 184 : });
626 :
627 221 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
628 221 : register_uint_gauge_vec!(
629 221 : "pageserver_tenant_synthetic_cached_size_bytes",
630 221 : "Synthetic size of each tenant in bytes",
631 221 : &["tenant_id"]
632 221 : )
633 221 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
634 221 : });
635 :
636 : // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
637 : // or in testing they estimate how much we would upload if we did.
638 652 : static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
639 652 : register_int_counter_vec!(
640 652 : "pageserver_created_persistent_files_total",
641 652 : "Number of files created that are meant to be uploaded to cloud storage",
642 652 : &["tenant_id", "shard_id", "timeline_id"]
643 652 : )
644 652 : .expect("failed to define a metric")
645 652 : });
646 :
647 652 : static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
648 652 : register_int_counter_vec!(
649 652 : "pageserver_written_persistent_bytes_total",
650 652 : "Total bytes written that are meant to be uploaded to cloud storage",
651 652 : &["tenant_id", "shard_id", "timeline_id"]
652 652 : )
653 652 : .expect("failed to define a metric")
654 652 : });
655 :
656 1 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
657 1 : register_histogram_vec!(
658 1 : "pageserver_eviction_iteration_duration_seconds_global",
659 1 : "Time spent on a single eviction iteration",
660 1 : &["period_secs", "threshold_secs"],
661 1 : STORAGE_OP_BUCKETS.into(),
662 1 : )
663 1 : .expect("failed to define a metric")
664 1 : });
665 :
666 652 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
667 652 : register_int_counter_vec!(
668 652 : "pageserver_evictions",
669 652 : "Number of layers evicted from the pageserver",
670 652 : &["tenant_id", "shard_id", "timeline_id"]
671 652 : )
672 652 : .expect("failed to define a metric")
673 652 : });
674 :
675 652 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
676 652 : register_int_counter_vec!(
677 652 : "pageserver_evictions_with_low_residence_duration",
678 652 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
679 652 : Residence duration is determined using the `residence_duration_data_source`.",
680 652 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
681 652 : )
682 652 : .expect("failed to define a metric")
683 652 : });
684 :
685 624 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
686 624 : register_int_counter!(
687 624 : "pageserver_unexpected_ondemand_downloads_count",
688 624 : "Number of unexpected on-demand downloads. \
689 624 : We log more context for each increment, so, forgo any labels in this metric.",
690 624 : )
691 624 : .expect("failed to define a metric")
692 624 : });
693 :
694 : /// How long did we take to start up? Broken down by labels to describe
695 : /// different phases of startup.
696 624 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
697 624 : register_gauge_vec!(
698 624 : "pageserver_startup_duration_seconds",
699 624 : "Time taken by phases of pageserver startup, in seconds",
700 624 : &["phase"]
701 624 : )
702 624 : .expect("Failed to register pageserver_startup_duration_seconds metric")
703 624 : });
704 :
705 624 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
706 624 : register_uint_gauge!(
707 624 : "pageserver_startup_is_loading",
708 624 : "1 while in initial startup load of tenants, 0 at other times"
709 624 : )
710 624 : .expect("Failed to register pageserver_startup_is_loading")
711 624 : });
712 :
713 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
714 : /// like how long it took to load.
715 : ///
716 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
717 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
718 : /// at a timeline level than tenant level.
719 : pub(crate) struct TenantMetrics {
720 : /// How long did tenants take to go from construction to active state?
721 : pub(crate) activation: Histogram,
722 : pub(crate) preload: Histogram,
723 : pub(crate) attach: Histogram,
724 :
725 : /// How many tenants are included in the initial startup of the pagesrever?
726 : pub(crate) startup_scheduled: IntCounter,
727 : pub(crate) startup_complete: IntCounter,
728 : }
729 :
730 624 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
731 624 : TenantMetrics {
732 624 : activation: register_histogram!(
733 624 : "pageserver_tenant_activation_seconds",
734 624 : "Time taken by tenants to activate, in seconds",
735 624 : CRITICAL_OP_BUCKETS.into()
736 624 : )
737 624 : .expect("Failed to register metric"),
738 624 : preload: register_histogram!(
739 624 : "pageserver_tenant_preload_seconds",
740 624 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
741 624 : CRITICAL_OP_BUCKETS.into()
742 624 : )
743 624 : .expect("Failed to register metric"),
744 624 : attach: register_histogram!(
745 624 : "pageserver_tenant_attach_seconds",
746 624 : "Time taken by tenants to intialize, after remote metadata is already loaded",
747 624 : CRITICAL_OP_BUCKETS.into()
748 624 : )
749 624 : .expect("Failed to register metric"),
750 624 : startup_scheduled: register_int_counter!(
751 624 : "pageserver_tenant_startup_scheduled",
752 624 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
753 624 : ).expect("Failed to register metric"),
754 624 : startup_complete: register_int_counter!(
755 624 : "pageserver_tenant_startup_complete",
756 624 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
757 624 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
758 624 : tenants: such cases will lead to this metric never reaching the scheduled count."
759 624 : ).expect("Failed to register metric"),
760 624 : }
761 624 : });
762 :
763 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
764 0 : #[derive(Debug)]
765 : pub(crate) struct EvictionsWithLowResidenceDuration {
766 : data_source: &'static str,
767 : threshold: Duration,
768 : counter: Option<IntCounter>,
769 : }
770 :
771 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
772 : data_source: &'static str,
773 : threshold: Duration,
774 : }
775 :
776 : impl EvictionsWithLowResidenceDurationBuilder {
777 1598 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
778 1598 : Self {
779 1598 : data_source,
780 1598 : threshold,
781 1598 : }
782 1598 : }
783 :
784 1598 : fn build(
785 1598 : &self,
786 1598 : tenant_id: &str,
787 1598 : shard_id: &str,
788 1598 : timeline_id: &str,
789 1598 : ) -> EvictionsWithLowResidenceDuration {
790 1598 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
791 1598 : .get_metric_with_label_values(&[
792 1598 : tenant_id,
793 1598 : shard_id,
794 1598 : timeline_id,
795 1598 : self.data_source,
796 1598 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
797 1598 : ])
798 1598 : .unwrap();
799 1598 : EvictionsWithLowResidenceDuration {
800 1598 : data_source: self.data_source,
801 1598 : threshold: self.threshold,
802 1598 : counter: Some(counter),
803 1598 : }
804 1598 : }
805 : }
806 :
807 : impl EvictionsWithLowResidenceDuration {
808 1950 : fn threshold_label_value(threshold: Duration) -> String {
809 1950 : format!("{}", threshold.as_secs())
810 1950 : }
811 :
812 2552 : pub fn observe(&self, observed_value: Duration) {
813 2552 : if observed_value < self.threshold {
814 2552 : self.counter
815 2552 : .as_ref()
816 2552 : .expect("nobody calls this function after `remove_from_vec`")
817 2552 : .inc();
818 2552 : }
819 2552 : }
820 :
821 61 : pub fn change_threshold(
822 61 : &mut self,
823 61 : tenant_id: &str,
824 61 : shard_id: &str,
825 61 : timeline_id: &str,
826 61 : new_threshold: Duration,
827 61 : ) {
828 61 : if new_threshold == self.threshold {
829 53 : return;
830 8 : }
831 8 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
832 8 : self.data_source,
833 8 : new_threshold,
834 8 : )
835 8 : .build(tenant_id, shard_id, timeline_id);
836 8 : std::mem::swap(self, &mut with_new);
837 8 : with_new.remove(tenant_id, shard_id, timeline_id);
838 61 : }
839 :
840 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
841 352 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
842 352 : let Some(_counter) = self.counter.take() else {
843 0 : return;
844 : };
845 :
846 352 : let threshold = Self::threshold_label_value(self.threshold);
847 352 :
848 352 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
849 352 : tenant_id,
850 352 : shard_id,
851 352 : timeline_id,
852 352 : self.data_source,
853 352 : &threshold,
854 352 : ]);
855 352 :
856 352 : match removed {
857 0 : Err(e) => {
858 0 : // this has been hit in staging as
859 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
860 0 : // because we can be in the drop path already, don't risk:
861 0 : // - "double-panic => illegal instruction" or
862 0 : // - future "drop panick => abort"
863 0 : //
864 0 : // so just nag: (the error has the labels)
865 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
866 : }
867 : Ok(()) => {
868 : // to help identify cases where we double-remove the same values, let's log all
869 : // deletions?
870 352 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
871 : }
872 : }
873 352 : }
874 : }
875 :
876 : // Metrics collected on disk IO operations
877 : //
878 : // Roughly logarithmic scale.
879 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
880 : 0.000030, // 30 usec
881 : 0.001000, // 1000 usec
882 : 0.030, // 30 ms
883 : 1.000, // 1000 ms
884 : 30.000, // 30000 ms
885 : ];
886 :
887 : /// VirtualFile fs operation variants.
888 : ///
889 : /// Operations:
890 : /// - open ([`std::fs::OpenOptions::open`])
891 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
892 : /// - close-by-replace (close by replacement algorithm)
893 : /// - read (`read_at`)
894 : /// - write (`write_at`)
895 : /// - seek (modify internal position or file length query)
896 : /// - fsync ([`std::fs::File::sync_all`])
897 : /// - metadata ([`std::fs::File::metadata`])
898 : #[derive(
899 6363 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
900 : )]
901 : pub(crate) enum StorageIoOperation {
902 : Open,
903 : OpenAfterReplace,
904 : Close,
905 : CloseByReplace,
906 : Read,
907 : Write,
908 : Seek,
909 : Fsync,
910 : Metadata,
911 : }
912 :
913 : impl StorageIoOperation {
914 6363 : pub fn as_str(&self) -> &'static str {
915 6363 : match self {
916 707 : StorageIoOperation::Open => "open",
917 707 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
918 707 : StorageIoOperation::Close => "close",
919 707 : StorageIoOperation::CloseByReplace => "close-by-replace",
920 707 : StorageIoOperation::Read => "read",
921 707 : StorageIoOperation::Write => "write",
922 707 : StorageIoOperation::Seek => "seek",
923 707 : StorageIoOperation::Fsync => "fsync",
924 707 : StorageIoOperation::Metadata => "metadata",
925 : }
926 6363 : }
927 : }
928 :
929 : /// Tracks time taken by fs operations near VirtualFile.
930 0 : #[derive(Debug)]
931 : pub(crate) struct StorageIoTime {
932 : metrics: [Histogram; StorageIoOperation::COUNT],
933 : }
934 :
935 : impl StorageIoTime {
936 707 : fn new() -> Self {
937 707 : let storage_io_histogram_vec = register_histogram_vec!(
938 707 : "pageserver_io_operations_seconds",
939 707 : "Time spent in IO operations",
940 707 : &["operation"],
941 707 : STORAGE_IO_TIME_BUCKETS.into()
942 707 : )
943 707 : .expect("failed to define a metric");
944 6363 : let metrics = std::array::from_fn(|i| {
945 6363 : let op = StorageIoOperation::from_repr(i).unwrap();
946 6363 : storage_io_histogram_vec
947 6363 : .get_metric_with_label_values(&[op.as_str()])
948 6363 : .unwrap()
949 6363 : });
950 707 : Self { metrics }
951 707 : }
952 :
953 14741041 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
954 14741041 : &self.metrics[op as usize]
955 14741041 : }
956 : }
957 :
958 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
959 :
960 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
961 :
962 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
963 707 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
964 707 : register_int_gauge_vec!(
965 707 : "pageserver_io_operations_bytes_total",
966 707 : "Total amount of bytes read/written in IO operations",
967 707 : &["operation", "tenant_id", "shard_id", "timeline_id"]
968 707 : )
969 707 : .expect("failed to define a metric")
970 707 : });
971 :
972 : #[cfg(not(test))]
973 : pub(crate) mod virtual_file_descriptor_cache {
974 : use super::*;
975 :
976 624 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
977 624 : register_uint_gauge!(
978 624 : "pageserver_virtual_file_descriptor_cache_size_max",
979 624 : "Maximum number of open file descriptors in the cache."
980 624 : )
981 624 : .unwrap()
982 624 : });
983 :
984 : // SIZE_CURRENT: derive it like so:
985 : // ```
986 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
987 : // -ignoring(operation)
988 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
989 : // ```
990 : }
991 :
992 : #[cfg(not(test))]
993 : pub(crate) mod virtual_file_io_engine {
994 : use super::*;
995 :
996 624 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
997 624 : register_uint_gauge_vec!(
998 624 : "pageserver_virtual_file_io_engine_kind",
999 624 : "The configured io engine for VirtualFile",
1000 624 : &["kind"],
1001 624 : )
1002 624 : .unwrap()
1003 624 : });
1004 : }
1005 :
1006 0 : #[derive(Debug)]
1007 : struct GlobalAndPerTimelineHistogram {
1008 : global: Histogram,
1009 : per_tenant_timeline: Histogram,
1010 : }
1011 :
1012 : impl GlobalAndPerTimelineHistogram {
1013 4509147 : fn observe(&self, value: f64) {
1014 4509147 : self.global.observe(value);
1015 4509147 : self.per_tenant_timeline.observe(value);
1016 4509147 : }
1017 : }
1018 :
1019 : struct GlobalAndPerTimelineHistogramTimer<'a> {
1020 : h: &'a GlobalAndPerTimelineHistogram,
1021 : start: std::time::Instant,
1022 : }
1023 :
1024 : impl<'a> Drop for GlobalAndPerTimelineHistogramTimer<'a> {
1025 4509147 : fn drop(&mut self) {
1026 4509147 : let elapsed = self.start.elapsed();
1027 4509147 : self.h.observe(elapsed.as_secs_f64());
1028 4509147 : }
1029 : }
1030 :
1031 : #[derive(
1032 0 : Debug,
1033 0 : Clone,
1034 : Copy,
1035 17730 : IntoStaticStr,
1036 : strum_macros::EnumCount,
1037 3808 : strum_macros::EnumIter,
1038 8000 : strum_macros::FromRepr,
1039 : )]
1040 : #[strum(serialize_all = "snake_case")]
1041 : pub enum SmgrQueryType {
1042 : GetRelExists,
1043 : GetRelSize,
1044 : GetPageAtLsn,
1045 : GetDbSize,
1046 : GetSlruSegment,
1047 : }
1048 :
1049 0 : #[derive(Debug)]
1050 : pub(crate) struct SmgrQueryTimePerTimeline {
1051 : metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
1052 : }
1053 :
1054 654 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1055 654 : register_histogram_vec!(
1056 654 : "pageserver_smgr_query_seconds",
1057 654 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1058 654 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1059 654 : CRITICAL_OP_BUCKETS.into(),
1060 654 : )
1061 654 : .expect("failed to define a metric")
1062 654 : });
1063 :
1064 654 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1065 654 : [
1066 654 : 1,
1067 654 : 10,
1068 654 : 20,
1069 654 : 40,
1070 654 : 60,
1071 654 : 80,
1072 654 : 100,
1073 654 : 200,
1074 654 : 300,
1075 654 : 400,
1076 654 : 500,
1077 654 : 600,
1078 654 : 700,
1079 654 : 800,
1080 654 : 900,
1081 654 : 1_000, // 1ms
1082 654 : 2_000,
1083 654 : 4_000,
1084 654 : 6_000,
1085 654 : 8_000,
1086 654 : 10_000, // 10ms
1087 654 : 20_000,
1088 654 : 40_000,
1089 654 : 60_000,
1090 654 : 80_000,
1091 654 : 100_000,
1092 654 : 200_000,
1093 654 : 400_000,
1094 654 : 600_000,
1095 654 : 800_000,
1096 654 : 1_000_000, // 1s
1097 654 : 2_000_000,
1098 654 : 4_000_000,
1099 654 : 6_000_000,
1100 654 : 8_000_000,
1101 654 : 10_000_000, // 10s
1102 654 : 20_000_000,
1103 654 : 50_000_000,
1104 654 : 100_000_000,
1105 654 : 200_000_000,
1106 654 : 1_000_000_000, // 1000s
1107 654 : ]
1108 654 : .into_iter()
1109 654 : .map(Duration::from_micros)
1110 26814 : .map(|d| d.as_secs_f64())
1111 654 : .collect()
1112 654 : });
1113 :
1114 654 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1115 654 : register_histogram_vec!(
1116 654 : "pageserver_smgr_query_seconds_global",
1117 654 : "Time spent on smgr query handling, aggregated by query type.",
1118 654 : &["smgr_query_type"],
1119 654 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1120 654 : )
1121 654 : .expect("failed to define a metric")
1122 654 : });
1123 :
1124 : impl SmgrQueryTimePerTimeline {
1125 1600 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1126 1600 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1127 1600 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1128 1600 : let timeline_id = timeline_id.to_string();
1129 8000 : let metrics = std::array::from_fn(|i| {
1130 8000 : let op = SmgrQueryType::from_repr(i).unwrap();
1131 8000 : let global = SMGR_QUERY_TIME_GLOBAL
1132 8000 : .get_metric_with_label_values(&[op.into()])
1133 8000 : .unwrap();
1134 8000 : let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1135 8000 : .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id])
1136 8000 : .unwrap();
1137 8000 : GlobalAndPerTimelineHistogram {
1138 8000 : global,
1139 8000 : per_tenant_timeline,
1140 8000 : }
1141 8000 : });
1142 1600 : Self { metrics }
1143 1600 : }
1144 4509147 : pub(crate) fn start_timer(&self, op: SmgrQueryType) -> impl Drop + '_ {
1145 4509147 : let metric = &self.metrics[op as usize];
1146 4509147 : GlobalAndPerTimelineHistogramTimer {
1147 4509147 : h: metric,
1148 4509147 : start: std::time::Instant::now(),
1149 4509147 : }
1150 4509147 : }
1151 : }
1152 :
1153 : #[cfg(test)]
1154 : mod smgr_query_time_tests {
1155 : use pageserver_api::shard::TenantShardId;
1156 : use strum::IntoEnumIterator;
1157 : use utils::id::{TenantId, TimelineId};
1158 :
1159 : // Regression test, we used hard-coded string constants before using an enum.
1160 2 : #[test]
1161 2 : fn op_label_name() {
1162 2 : use super::SmgrQueryType::*;
1163 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1164 2 : (GetRelExists, "get_rel_exists"),
1165 2 : (GetRelSize, "get_rel_size"),
1166 2 : (GetPageAtLsn, "get_page_at_lsn"),
1167 2 : (GetDbSize, "get_db_size"),
1168 2 : (GetSlruSegment, "get_slru_segment"),
1169 2 : ];
1170 12 : for (op, expect) in expect {
1171 10 : let actual: &'static str = op.into();
1172 10 : assert_eq!(actual, expect);
1173 : }
1174 2 : }
1175 :
1176 2 : #[test]
1177 2 : fn basic() {
1178 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1179 :
1180 12 : for op in &ops {
1181 10 : let tenant_id = TenantId::generate();
1182 10 : let timeline_id = TimelineId::generate();
1183 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1184 10 : &TenantShardId::unsharded(tenant_id),
1185 10 : &timeline_id,
1186 10 : );
1187 10 :
1188 20 : let get_counts = || {
1189 20 : let global: u64 = ops
1190 20 : .iter()
1191 100 : .map(|op| metrics.metrics[*op as usize].global.get_sample_count())
1192 20 : .sum();
1193 20 : let per_tenant_timeline: u64 = ops
1194 20 : .iter()
1195 100 : .map(|op| {
1196 100 : metrics.metrics[*op as usize]
1197 100 : .per_tenant_timeline
1198 100 : .get_sample_count()
1199 100 : })
1200 20 : .sum();
1201 20 : (global, per_tenant_timeline)
1202 20 : };
1203 :
1204 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1205 10 : assert_eq!(pre_per_tenant_timeline, 0);
1206 :
1207 10 : let timer = metrics.start_timer(*op);
1208 10 : drop(timer);
1209 10 :
1210 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1211 10 : assert_eq!(post_per_tenant_timeline, 1);
1212 10 : assert!(post_global > pre_global);
1213 : }
1214 2 : }
1215 : }
1216 :
1217 : // keep in sync with control plane Go code so that we can validate
1218 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1219 357 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1220 357 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1221 357 : [
1222 357 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1223 357 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1224 357 : ]
1225 9996 : .map(|ms| (ms as f64) / 1000.0)
1226 357 : });
1227 :
1228 : pub(crate) struct BasebackupQueryTime(HistogramVec);
1229 357 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1230 357 : BasebackupQueryTime({
1231 357 : register_histogram_vec!(
1232 357 : "pageserver_basebackup_query_seconds",
1233 357 : "Histogram of basebackup queries durations, by result type",
1234 357 : &["result"],
1235 357 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1236 357 : )
1237 357 : .expect("failed to define a metric")
1238 357 : })
1239 357 : });
1240 :
1241 : impl DurationResultObserver for BasebackupQueryTime {
1242 628 : fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration) {
1243 628 : let label_value = if res.is_ok() { "ok" } else { "error" };
1244 628 : let metric = self.0.get_metric_with_label_values(&[label_value]).unwrap();
1245 628 : metric.observe(duration.as_secs_f64());
1246 628 : }
1247 : }
1248 :
1249 458 : pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
1250 458 : register_int_gauge_vec!(
1251 458 : "pageserver_live_connections",
1252 458 : "Number of live network connections",
1253 458 : &["pageserver_connection_kind"]
1254 458 : )
1255 458 : .expect("failed to define a metric")
1256 458 : });
1257 :
1258 : // remote storage metrics
1259 :
1260 : /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
1261 649 : static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::new(|| {
1262 649 : register_int_gauge_vec!(
1263 649 : "pageserver_remote_timeline_client_calls_unfinished",
1264 649 : "Number of ongoing calls to remote timeline client. \
1265 649 : Used to populate pageserver_remote_timeline_client_calls_started. \
1266 649 : This metric is not useful for sampling from Prometheus, but useful in tests.",
1267 649 : &[
1268 649 : "tenant_id",
1269 649 : "shard_id",
1270 649 : "timeline_id",
1271 649 : "file_kind",
1272 649 : "op_kind"
1273 649 : ],
1274 649 : )
1275 649 : .expect("failed to define a metric")
1276 649 : });
1277 :
1278 649 : static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new(|| {
1279 649 : register_histogram_vec!(
1280 649 : "pageserver_remote_timeline_client_calls_started",
1281 649 : "When calling a remote timeline client method, we record the current value \
1282 649 : of the calls_unfinished gauge in this histogram. Plot the histogram \
1283 649 : over time in a heatmap to visualize how many operations were ongoing \
1284 649 : at a given instant. It gives you a better idea of the queue depth \
1285 649 : than plotting the gauge directly, since operations may complete faster \
1286 649 : than the sampling interval.",
1287 649 : &["file_kind", "op_kind"],
1288 649 : // The calls_unfinished gauge is an integer gauge, hence we have integer buckets.
1289 649 : vec![0.0, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 15.0, 20.0, 40.0, 60.0, 80.0, 100.0, 500.0],
1290 649 : )
1291 649 : .expect("failed to define a metric")
1292 649 : });
1293 :
1294 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1295 506 : Lazy::new(|| {
1296 506 : register_int_counter_vec!(
1297 506 : "pageserver_remote_timeline_client_bytes_started",
1298 506 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1299 506 : The increment happens when the operation is scheduled.",
1300 506 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1301 506 : )
1302 506 : .expect("failed to define a metric")
1303 506 : });
1304 :
1305 506 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1306 506 : register_int_counter_vec!(
1307 506 : "pageserver_remote_timeline_client_bytes_finished",
1308 506 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1309 506 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1310 506 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1311 506 : )
1312 506 : .expect("failed to define a metric")
1313 506 : });
1314 :
1315 : pub(crate) struct TenantManagerMetrics {
1316 : pub(crate) tenant_slots: UIntGauge,
1317 : pub(crate) tenant_slot_writes: IntCounter,
1318 : pub(crate) unexpected_errors: IntCounter,
1319 : }
1320 :
1321 626 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1322 626 : TenantManagerMetrics {
1323 626 : tenant_slots: register_uint_gauge!(
1324 626 : "pageserver_tenant_manager_slots",
1325 626 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1326 626 : )
1327 626 : .expect("failed to define a metric"),
1328 626 : tenant_slot_writes: register_int_counter!(
1329 626 : "pageserver_tenant_manager_slot_writes",
1330 626 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1331 626 : )
1332 626 : .expect("failed to define a metric"),
1333 626 : unexpected_errors: register_int_counter!(
1334 626 : "pageserver_tenant_manager_unexpected_errors_total",
1335 626 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1336 626 : )
1337 626 : .expect("failed to define a metric"),
1338 626 : }
1339 626 : });
1340 :
1341 : pub(crate) struct DeletionQueueMetrics {
1342 : pub(crate) keys_submitted: IntCounter,
1343 : pub(crate) keys_dropped: IntCounter,
1344 : pub(crate) keys_executed: IntCounter,
1345 : pub(crate) keys_validated: IntCounter,
1346 : pub(crate) dropped_lsn_updates: IntCounter,
1347 : pub(crate) unexpected_errors: IntCounter,
1348 : pub(crate) remote_errors: IntCounterVec,
1349 : }
1350 637 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1351 637 : DeletionQueueMetrics{
1352 637 :
1353 637 : keys_submitted: register_int_counter!(
1354 637 : "pageserver_deletion_queue_submitted_total",
1355 637 : "Number of objects submitted for deletion"
1356 637 : )
1357 637 : .expect("failed to define a metric"),
1358 637 :
1359 637 : keys_dropped: register_int_counter!(
1360 637 : "pageserver_deletion_queue_dropped_total",
1361 637 : "Number of object deletions dropped due to stale generation."
1362 637 : )
1363 637 : .expect("failed to define a metric"),
1364 637 :
1365 637 : keys_executed: register_int_counter!(
1366 637 : "pageserver_deletion_queue_executed_total",
1367 637 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1368 637 : )
1369 637 : .expect("failed to define a metric"),
1370 637 :
1371 637 : keys_validated: register_int_counter!(
1372 637 : "pageserver_deletion_queue_validated_total",
1373 637 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1374 637 : )
1375 637 : .expect("failed to define a metric"),
1376 637 :
1377 637 : dropped_lsn_updates: register_int_counter!(
1378 637 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1379 637 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1380 637 : )
1381 637 : .expect("failed to define a metric"),
1382 637 : unexpected_errors: register_int_counter!(
1383 637 : "pageserver_deletion_queue_unexpected_errors_total",
1384 637 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1385 637 : )
1386 637 : .expect("failed to define a metric"),
1387 637 : remote_errors: register_int_counter_vec!(
1388 637 : "pageserver_deletion_queue_remote_errors_total",
1389 637 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1390 637 : &["op_kind"],
1391 637 : )
1392 637 : .expect("failed to define a metric")
1393 637 : }
1394 637 : });
1395 :
1396 : pub(crate) struct WalIngestMetrics {
1397 : pub(crate) records_received: IntCounter,
1398 : pub(crate) records_committed: IntCounter,
1399 : pub(crate) records_filtered: IntCounter,
1400 : }
1401 :
1402 496 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
1403 496 : records_received: register_int_counter!(
1404 496 : "pageserver_wal_ingest_records_received",
1405 496 : "Number of WAL records received from safekeepers"
1406 496 : )
1407 496 : .expect("failed to define a metric"),
1408 496 : records_committed: register_int_counter!(
1409 496 : "pageserver_wal_ingest_records_committed",
1410 496 : "Number of WAL records which resulted in writes to pageserver storage"
1411 496 : )
1412 496 : .expect("failed to define a metric"),
1413 496 : records_filtered: register_int_counter!(
1414 496 : "pageserver_wal_ingest_records_filtered",
1415 496 : "Number of WAL records filtered out due to sharding"
1416 496 : )
1417 496 : .expect("failed to define a metric"),
1418 496 : });
1419 : pub(crate) struct SecondaryModeMetrics {
1420 : pub(crate) upload_heatmap: IntCounter,
1421 : pub(crate) upload_heatmap_errors: IntCounter,
1422 : pub(crate) upload_heatmap_duration: Histogram,
1423 : pub(crate) download_heatmap: IntCounter,
1424 : pub(crate) download_layer: IntCounter,
1425 : }
1426 12 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
1427 12 : upload_heatmap: register_int_counter!(
1428 12 : "pageserver_secondary_upload_heatmap",
1429 12 : "Number of heatmaps written to remote storage by attached tenants"
1430 12 : )
1431 12 : .expect("failed to define a metric"),
1432 12 : upload_heatmap_errors: register_int_counter!(
1433 12 : "pageserver_secondary_upload_heatmap_errors",
1434 12 : "Failures writing heatmap to remote storage"
1435 12 : )
1436 12 : .expect("failed to define a metric"),
1437 12 : upload_heatmap_duration: register_histogram!(
1438 12 : "pageserver_secondary_upload_heatmap_duration",
1439 12 : "Time to build and upload a heatmap, including any waiting inside the S3 client"
1440 12 : )
1441 12 : .expect("failed to define a metric"),
1442 12 : download_heatmap: register_int_counter!(
1443 12 : "pageserver_secondary_download_heatmap",
1444 12 : "Number of downloads of heatmaps by secondary mode locations"
1445 12 : )
1446 12 : .expect("failed to define a metric"),
1447 12 : download_layer: register_int_counter!(
1448 12 : "pageserver_secondary_download_layer",
1449 12 : "Number of downloads of layers by secondary mode locations"
1450 12 : )
1451 12 : .expect("failed to define a metric"),
1452 12 : });
1453 :
1454 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1455 : pub enum RemoteOpKind {
1456 : Upload,
1457 : Download,
1458 : Delete,
1459 : }
1460 : impl RemoteOpKind {
1461 226394 : pub fn as_str(&self) -> &'static str {
1462 226394 : match self {
1463 179244 : Self::Upload => "upload",
1464 30279 : Self::Download => "download",
1465 16871 : Self::Delete => "delete",
1466 : }
1467 226394 : }
1468 : }
1469 :
1470 0 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1471 : pub enum RemoteOpFileKind {
1472 : Layer,
1473 : Index,
1474 : }
1475 : impl RemoteOpFileKind {
1476 226394 : pub fn as_str(&self) -> &'static str {
1477 226394 : match self {
1478 199901 : Self::Layer => "layer",
1479 26493 : Self::Index => "index",
1480 : }
1481 226394 : }
1482 : }
1483 :
1484 649 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1485 649 : register_histogram_vec!(
1486 649 : "pageserver_remote_operation_seconds",
1487 649 : "Time spent on remote storage operations. \
1488 649 : Grouped by tenant, timeline, operation_kind and status. \
1489 649 : Does not account for time spent waiting in remote timeline client's queues.",
1490 649 : &["file_kind", "op_kind", "status"]
1491 649 : )
1492 649 : .expect("failed to define a metric")
1493 649 : });
1494 :
1495 572 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1496 572 : register_int_counter_vec!(
1497 572 : "pageserver_tenant_task_events",
1498 572 : "Number of task start/stop/fail events.",
1499 572 : &["event"],
1500 572 : )
1501 572 : .expect("Failed to register tenant_task_events metric")
1502 572 : });
1503 :
1504 503 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
1505 1006 : register_int_counter_pair_vec!(
1506 1006 : "pageserver_background_loop_semaphore_wait_start_count",
1507 1006 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1508 1006 : "pageserver_background_loop_semaphore_wait_finish_count",
1509 1006 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1510 1006 : &["task"],
1511 1006 : )
1512 503 : .unwrap()
1513 503 : });
1514 :
1515 624 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
1516 624 : register_int_counter_vec!(
1517 624 : "pageserver_background_loop_period_overrun_count",
1518 624 : "Incremented whenever warn_when_period_overrun() logs a warning.",
1519 624 : &["task", "period"],
1520 624 : )
1521 624 : .expect("failed to define a metric")
1522 624 : });
1523 :
1524 : // walreceiver metrics
1525 :
1526 624 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
1527 624 : register_int_counter!(
1528 624 : "pageserver_walreceiver_started_connections_total",
1529 624 : "Number of started walreceiver connections"
1530 624 : )
1531 624 : .expect("failed to define a metric")
1532 624 : });
1533 :
1534 624 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
1535 624 : register_int_gauge!(
1536 624 : "pageserver_walreceiver_active_managers",
1537 624 : "Number of active walreceiver managers"
1538 624 : )
1539 624 : .expect("failed to define a metric")
1540 624 : });
1541 :
1542 430 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
1543 430 : register_int_counter_vec!(
1544 430 : "pageserver_walreceiver_switches_total",
1545 430 : "Number of walreceiver manager change_connection calls",
1546 430 : &["reason"]
1547 430 : )
1548 430 : .expect("failed to define a metric")
1549 430 : });
1550 :
1551 624 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
1552 624 : register_int_counter!(
1553 624 : "pageserver_walreceiver_broker_updates_total",
1554 624 : "Number of received broker updates in walreceiver"
1555 624 : )
1556 624 : .expect("failed to define a metric")
1557 624 : });
1558 :
1559 626 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1560 626 : register_int_counter_vec!(
1561 626 : "pageserver_walreceiver_candidates_events_total",
1562 626 : "Number of walreceiver candidate events",
1563 626 : &["event"]
1564 626 : )
1565 626 : .expect("failed to define a metric")
1566 626 : });
1567 :
1568 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
1569 624 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
1570 :
1571 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
1572 626 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
1573 :
1574 : // Metrics collected on WAL redo operations
1575 : //
1576 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
1577 : // for access to the postgres process ('wait') since there is only one for
1578 : // each tenant.
1579 :
1580 : /// Time buckets are small because we want to be able to measure the
1581 : /// smallest redo processing times. These buckets allow us to measure down
1582 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
1583 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
1584 : ///
1585 : /// Values up to 1s are recorded because metrics show that we have redo
1586 : /// durations and lock times larger than 0.250s.
1587 : macro_rules! redo_histogram_time_buckets {
1588 : () => {
1589 : vec![
1590 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
1591 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1592 : 1.000_000,
1593 : ]
1594 : };
1595 : }
1596 :
1597 : /// While we're at it, also measure the amount of records replayed in each
1598 : /// operation. We have a global 'total replayed' counter, but that's not
1599 : /// as useful as 'what is the skew for how many records we replay in one
1600 : /// operation'.
1601 : macro_rules! redo_histogram_count_buckets {
1602 : () => {
1603 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
1604 : };
1605 : }
1606 :
1607 : macro_rules! redo_bytes_histogram_count_buckets {
1608 : () => {
1609 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
1610 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
1611 : vec![
1612 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
1613 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
1614 : ]
1615 : };
1616 : }
1617 :
1618 630 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
1619 630 : register_histogram!(
1620 630 : "pageserver_wal_redo_seconds",
1621 630 : "Time spent on WAL redo",
1622 630 : redo_histogram_time_buckets!()
1623 630 : )
1624 630 : .expect("failed to define a metric")
1625 630 : });
1626 :
1627 630 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1628 630 : register_histogram!(
1629 630 : "pageserver_wal_redo_records_histogram",
1630 630 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
1631 630 : redo_histogram_count_buckets!(),
1632 630 : )
1633 630 : .expect("failed to define a metric")
1634 630 : });
1635 :
1636 630 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1637 630 : register_histogram!(
1638 630 : "pageserver_wal_redo_bytes_histogram",
1639 630 : "Histogram of number of records replayed per redo sent to Postgres",
1640 630 : redo_bytes_histogram_count_buckets!(),
1641 630 : )
1642 630 : .expect("failed to define a metric")
1643 630 : });
1644 :
1645 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
1646 400 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
1647 400 : register_int_counter!(
1648 400 : "pageserver_replayed_wal_records_total",
1649 400 : "Number of WAL records replayed in WAL redo process"
1650 400 : )
1651 400 : .unwrap()
1652 400 : });
1653 :
1654 : #[rustfmt::skip]
1655 630 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1656 630 : register_histogram!(
1657 630 : "pageserver_wal_redo_process_launch_duration",
1658 630 : "Histogram of the duration of successful WalRedoProcess::launch calls",
1659 630 : vec![
1660 630 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
1661 630 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
1662 630 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
1663 630 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
1664 630 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
1665 630 : ],
1666 630 : )
1667 630 : .expect("failed to define a metric")
1668 630 : });
1669 :
1670 : pub(crate) struct WalRedoProcessCounters {
1671 : pub(crate) started: IntCounter,
1672 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
1673 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
1674 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
1675 : }
1676 :
1677 1489 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
1678 : pub(crate) enum WalRedoKillCause {
1679 : WalRedoProcessDrop,
1680 : NoLeakChildDrop,
1681 : Startup,
1682 : }
1683 :
1684 : impl Default for WalRedoProcessCounters {
1685 400 : fn default() -> Self {
1686 400 : let started = register_int_counter!(
1687 400 : "pageserver_wal_redo_process_started_total",
1688 400 : "Number of WAL redo processes started",
1689 400 : )
1690 400 : .unwrap();
1691 400 :
1692 400 : let killed = register_int_counter_vec!(
1693 400 : "pageserver_wal_redo_process_stopped_total",
1694 400 : "Number of WAL redo processes stopped",
1695 400 : &["cause"],
1696 400 : )
1697 400 : .unwrap();
1698 400 :
1699 400 : let active_stderr_logger_tasks_started = register_int_counter!(
1700 400 : "pageserver_walredo_stderr_logger_tasks_started_total",
1701 400 : "Number of active walredo stderr logger tasks that have started",
1702 400 : )
1703 400 : .unwrap();
1704 400 :
1705 400 : let active_stderr_logger_tasks_finished = register_int_counter!(
1706 400 : "pageserver_walredo_stderr_logger_tasks_finished_total",
1707 400 : "Number of active walredo stderr logger tasks that have finished",
1708 400 : )
1709 400 : .unwrap();
1710 400 :
1711 400 : Self {
1712 400 : started,
1713 1200 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
1714 1200 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
1715 1200 : let cause_str: &'static str = cause.into();
1716 1200 : killed.with_label_values(&[cause_str])
1717 1200 : })),
1718 400 : active_stderr_logger_tasks_started,
1719 400 : active_stderr_logger_tasks_finished,
1720 400 : }
1721 400 : }
1722 : }
1723 :
1724 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
1725 : Lazy::new(WalRedoProcessCounters::default);
1726 :
1727 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
1728 : pub(crate) struct StorageTimeMetricsTimer {
1729 : metrics: StorageTimeMetrics,
1730 : start: Instant,
1731 : }
1732 :
1733 : impl StorageTimeMetricsTimer {
1734 10355 : fn new(metrics: StorageTimeMetrics) -> Self {
1735 10355 : Self {
1736 10355 : metrics,
1737 10355 : start: Instant::now(),
1738 10355 : }
1739 10355 : }
1740 :
1741 : /// Record the time from creation to now.
1742 10313 : pub fn stop_and_record(self) {
1743 10313 : let duration = self.start.elapsed().as_secs_f64();
1744 10313 : self.metrics.timeline_sum.inc_by(duration);
1745 10313 : self.metrics.timeline_count.inc();
1746 10313 : self.metrics.global_histogram.observe(duration);
1747 10313 : }
1748 : }
1749 :
1750 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
1751 : /// timeline total sum and count.
1752 10355 : #[derive(Clone, Debug)]
1753 : pub(crate) struct StorageTimeMetrics {
1754 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
1755 : timeline_sum: Counter,
1756 : /// Number of oeprations, per operation, tenant_id and timeline_id
1757 : timeline_count: IntCounter,
1758 : /// Global histogram having only the "operation" label.
1759 : global_histogram: Histogram,
1760 : }
1761 :
1762 : impl StorageTimeMetrics {
1763 11130 : pub fn new(
1764 11130 : operation: StorageTimeOperation,
1765 11130 : tenant_id: &str,
1766 11130 : shard_id: &str,
1767 11130 : timeline_id: &str,
1768 11130 : ) -> Self {
1769 11130 : let operation: &'static str = operation.into();
1770 11130 :
1771 11130 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
1772 11130 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1773 11130 : .unwrap();
1774 11130 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
1775 11130 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1776 11130 : .unwrap();
1777 11130 : let global_histogram = STORAGE_TIME_GLOBAL
1778 11130 : .get_metric_with_label_values(&[operation])
1779 11130 : .unwrap();
1780 11130 :
1781 11130 : StorageTimeMetrics {
1782 11130 : timeline_sum,
1783 11130 : timeline_count,
1784 11130 : global_histogram,
1785 11130 : }
1786 11130 : }
1787 :
1788 : /// Starts timing a new operation.
1789 : ///
1790 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
1791 10355 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
1792 10355 : StorageTimeMetricsTimer::new(self.clone())
1793 10355 : }
1794 : }
1795 :
1796 0 : #[derive(Debug)]
1797 : pub(crate) struct TimelineMetrics {
1798 : tenant_id: String,
1799 : shard_id: String,
1800 : timeline_id: String,
1801 : pub flush_time_histo: StorageTimeMetrics,
1802 : pub compact_time_histo: StorageTimeMetrics,
1803 : pub create_images_time_histo: StorageTimeMetrics,
1804 : pub logical_size_histo: StorageTimeMetrics,
1805 : pub imitate_logical_size_histo: StorageTimeMetrics,
1806 : pub load_layer_map_histo: StorageTimeMetrics,
1807 : pub garbage_collect_histo: StorageTimeMetrics,
1808 : pub last_record_gauge: IntGauge,
1809 : resident_physical_size_gauge: UIntGauge,
1810 : /// copy of LayeredTimeline.current_logical_size
1811 : pub current_logical_size_gauge: UIntGauge,
1812 : pub num_persistent_files_created: IntCounter,
1813 : pub persistent_bytes_written: IntCounter,
1814 : pub evictions: IntCounter,
1815 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
1816 : }
1817 :
1818 : impl TimelineMetrics {
1819 1590 : pub fn new(
1820 1590 : tenant_shard_id: &TenantShardId,
1821 1590 : timeline_id: &TimelineId,
1822 1590 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
1823 1590 : ) -> Self {
1824 1590 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1825 1590 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
1826 1590 : let timeline_id = timeline_id.to_string();
1827 1590 : let flush_time_histo = StorageTimeMetrics::new(
1828 1590 : StorageTimeOperation::LayerFlush,
1829 1590 : &tenant_id,
1830 1590 : &shard_id,
1831 1590 : &timeline_id,
1832 1590 : );
1833 1590 : let compact_time_histo = StorageTimeMetrics::new(
1834 1590 : StorageTimeOperation::Compact,
1835 1590 : &tenant_id,
1836 1590 : &shard_id,
1837 1590 : &timeline_id,
1838 1590 : );
1839 1590 : let create_images_time_histo = StorageTimeMetrics::new(
1840 1590 : StorageTimeOperation::CreateImages,
1841 1590 : &tenant_id,
1842 1590 : &shard_id,
1843 1590 : &timeline_id,
1844 1590 : );
1845 1590 : let logical_size_histo = StorageTimeMetrics::new(
1846 1590 : StorageTimeOperation::LogicalSize,
1847 1590 : &tenant_id,
1848 1590 : &shard_id,
1849 1590 : &timeline_id,
1850 1590 : );
1851 1590 : let imitate_logical_size_histo = StorageTimeMetrics::new(
1852 1590 : StorageTimeOperation::ImitateLogicalSize,
1853 1590 : &tenant_id,
1854 1590 : &shard_id,
1855 1590 : &timeline_id,
1856 1590 : );
1857 1590 : let load_layer_map_histo = StorageTimeMetrics::new(
1858 1590 : StorageTimeOperation::LoadLayerMap,
1859 1590 : &tenant_id,
1860 1590 : &shard_id,
1861 1590 : &timeline_id,
1862 1590 : );
1863 1590 : let garbage_collect_histo = StorageTimeMetrics::new(
1864 1590 : StorageTimeOperation::Gc,
1865 1590 : &tenant_id,
1866 1590 : &shard_id,
1867 1590 : &timeline_id,
1868 1590 : );
1869 1590 : let last_record_gauge = LAST_RECORD_LSN
1870 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1871 1590 : .unwrap();
1872 1590 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
1873 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1874 1590 : .unwrap();
1875 1590 : // TODO: we shouldn't expose this metric
1876 1590 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
1877 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1878 1590 : .unwrap();
1879 1590 : let num_persistent_files_created = NUM_PERSISTENT_FILES_CREATED
1880 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1881 1590 : .unwrap();
1882 1590 : let persistent_bytes_written = PERSISTENT_BYTES_WRITTEN
1883 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1884 1590 : .unwrap();
1885 1590 : let evictions = EVICTIONS
1886 1590 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1887 1590 : .unwrap();
1888 1590 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
1889 1590 : .build(&tenant_id, &shard_id, &timeline_id);
1890 1590 :
1891 1590 : TimelineMetrics {
1892 1590 : tenant_id,
1893 1590 : shard_id,
1894 1590 : timeline_id,
1895 1590 : flush_time_histo,
1896 1590 : compact_time_histo,
1897 1590 : create_images_time_histo,
1898 1590 : logical_size_histo,
1899 1590 : imitate_logical_size_histo,
1900 1590 : garbage_collect_histo,
1901 1590 : load_layer_map_histo,
1902 1590 : last_record_gauge,
1903 1590 : resident_physical_size_gauge,
1904 1590 : current_logical_size_gauge,
1905 1590 : num_persistent_files_created,
1906 1590 : persistent_bytes_written,
1907 1590 : evictions,
1908 1590 : evictions_with_low_residence_duration: std::sync::RwLock::new(
1909 1590 : evictions_with_low_residence_duration,
1910 1590 : ),
1911 1590 : }
1912 1590 : }
1913 :
1914 22363 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
1915 22363 : self.resident_physical_size_add(sz);
1916 22363 : self.num_persistent_files_created.inc_by(1);
1917 22363 : self.persistent_bytes_written.inc_by(sz);
1918 22363 : }
1919 :
1920 7769 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
1921 7769 : self.resident_physical_size_gauge.sub(sz);
1922 7769 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
1923 7769 : }
1924 :
1925 44418 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
1926 44418 : self.resident_physical_size_gauge.add(sz);
1927 44418 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
1928 44418 : }
1929 :
1930 356 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
1931 356 : self.resident_physical_size_gauge.get()
1932 356 : }
1933 : }
1934 :
1935 : impl Drop for TimelineMetrics {
1936 344 : fn drop(&mut self) {
1937 344 : let tenant_id = &self.tenant_id;
1938 344 : let timeline_id = &self.timeline_id;
1939 344 : let shard_id = &self.shard_id;
1940 344 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1941 344 : {
1942 344 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
1943 344 : let _ =
1944 344 : RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1945 344 : }
1946 344 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1947 344 : let _ =
1948 344 : NUM_PERSISTENT_FILES_CREATED.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1949 344 : let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1950 344 : let _ = EVICTIONS.remove_label_values(&[tenant_id, &shard_id, timeline_id]);
1951 344 :
1952 344 : self.evictions_with_low_residence_duration
1953 344 : .write()
1954 344 : .unwrap()
1955 344 : .remove(tenant_id, shard_id, timeline_id);
1956 :
1957 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
1958 : // removed at the end of it. The idea is to have the metrics outlive the
1959 : // entity during which they're observed, e.g., the smgr metrics shall
1960 : // outlive an individual smgr connection, but not the timeline.
1961 :
1962 3096 : for op in StorageTimeOperation::VARIANTS {
1963 2752 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
1964 2752 : op,
1965 2752 : tenant_id,
1966 2752 : shard_id,
1967 2752 : timeline_id,
1968 2752 : ]);
1969 2752 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
1970 2752 : op,
1971 2752 : tenant_id,
1972 2752 : shard_id,
1973 2752 : timeline_id,
1974 2752 : ]);
1975 2752 : }
1976 :
1977 1032 : for op in STORAGE_IO_SIZE_OPERATIONS {
1978 688 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
1979 688 : }
1980 :
1981 2064 : for op in SmgrQueryType::iter() {
1982 1720 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
1983 1720 : op.into(),
1984 1720 : tenant_id,
1985 1720 : shard_id,
1986 1720 : timeline_id,
1987 1720 : ]);
1988 1720 : }
1989 344 : }
1990 : }
1991 :
1992 316 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
1993 316 : // Only shard zero deals in synthetic sizes
1994 316 : if tenant_shard_id.is_zero() {
1995 302 : let tid = tenant_shard_id.tenant_id.to_string();
1996 302 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
1997 302 : }
1998 :
1999 : // we leave the BROKEN_TENANTS_SET entry if any
2000 316 : }
2001 :
2002 : use futures::Future;
2003 : use pin_project_lite::pin_project;
2004 : use std::collections::HashMap;
2005 : use std::pin::Pin;
2006 : use std::sync::{Arc, Mutex};
2007 : use std::task::{Context, Poll};
2008 : use std::time::{Duration, Instant};
2009 :
2010 : use crate::context::{PageContentKind, RequestContext};
2011 : use crate::task_mgr::TaskKind;
2012 :
2013 : /// Maintain a per timeline gauge in addition to the global gauge.
2014 : struct PerTimelineRemotePhysicalSizeGauge {
2015 : last_set: u64,
2016 : gauge: UIntGauge,
2017 : }
2018 :
2019 : impl PerTimelineRemotePhysicalSizeGauge {
2020 1590 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2021 1590 : Self {
2022 1590 : last_set: per_timeline_gauge.get(),
2023 1590 : gauge: per_timeline_gauge,
2024 1590 : }
2025 1590 : }
2026 7634 : fn set(&mut self, sz: u64) {
2027 7634 : self.gauge.set(sz);
2028 7634 : if sz < self.last_set {
2029 85 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
2030 7549 : } else {
2031 7549 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
2032 7549 : };
2033 7634 : self.last_set = sz;
2034 7634 : }
2035 12 : fn get(&self) -> u64 {
2036 12 : self.gauge.get()
2037 12 : }
2038 : }
2039 :
2040 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2041 344 : fn drop(&mut self) {
2042 344 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
2043 344 : }
2044 : }
2045 :
2046 : pub(crate) struct RemoteTimelineClientMetrics {
2047 : tenant_id: String,
2048 : shard_id: String,
2049 : timeline_id: String,
2050 : remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
2051 : calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
2052 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2053 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2054 : }
2055 :
2056 : impl RemoteTimelineClientMetrics {
2057 1603 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2058 1603 : RemoteTimelineClientMetrics {
2059 1603 : tenant_id: tenant_shard_id.tenant_id.to_string(),
2060 1603 : shard_id: format!("{}", tenant_shard_id.shard_slug()),
2061 1603 : timeline_id: timeline_id.to_string(),
2062 1603 : calls_unfinished_gauge: Mutex::new(HashMap::default()),
2063 1603 : bytes_started_counter: Mutex::new(HashMap::default()),
2064 1603 : bytes_finished_counter: Mutex::new(HashMap::default()),
2065 1603 : remote_physical_size_gauge: Mutex::new(None),
2066 1603 : }
2067 1603 : }
2068 :
2069 7634 : pub(crate) fn remote_physical_size_set(&self, sz: u64) {
2070 7634 : let mut guard = self.remote_physical_size_gauge.lock().unwrap();
2071 7634 : let gauge = guard.get_or_insert_with(|| {
2072 1590 : PerTimelineRemotePhysicalSizeGauge::new(
2073 1590 : REMOTE_PHYSICAL_SIZE
2074 1590 : .get_metric_with_label_values(&[
2075 1590 : &self.tenant_id,
2076 1590 : &self.shard_id,
2077 1590 : &self.timeline_id,
2078 1590 : ])
2079 1590 : .unwrap(),
2080 1590 : )
2081 7634 : });
2082 7634 : gauge.set(sz);
2083 7634 : }
2084 :
2085 12 : pub(crate) fn remote_physical_size_get(&self) -> u64 {
2086 12 : let guard = self.remote_physical_size_gauge.lock().unwrap();
2087 12 : guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
2088 12 : }
2089 :
2090 40521 : pub fn remote_operation_time(
2091 40521 : &self,
2092 40521 : file_kind: &RemoteOpFileKind,
2093 40521 : op_kind: &RemoteOpKind,
2094 40521 : status: &'static str,
2095 40521 : ) -> Histogram {
2096 40521 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2097 40521 : REMOTE_OPERATION_TIME
2098 40521 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2099 40521 : .unwrap()
2100 40521 : }
2101 :
2102 76382 : fn calls_unfinished_gauge(
2103 76382 : &self,
2104 76382 : file_kind: &RemoteOpFileKind,
2105 76382 : op_kind: &RemoteOpKind,
2106 76382 : ) -> IntGauge {
2107 76382 : let mut guard = self.calls_unfinished_gauge.lock().unwrap();
2108 76382 : let key = (file_kind.as_str(), op_kind.as_str());
2109 76382 : let metric = guard.entry(key).or_insert_with(move || {
2110 3378 : REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
2111 3378 : .get_metric_with_label_values(&[
2112 3378 : &self.tenant_id,
2113 3378 : &self.shard_id,
2114 3378 : &self.timeline_id,
2115 3378 : key.0,
2116 3378 : key.1,
2117 3378 : ])
2118 3378 : .unwrap()
2119 76382 : });
2120 76382 : metric.clone()
2121 76382 : }
2122 :
2123 44266 : fn calls_started_hist(
2124 44266 : &self,
2125 44266 : file_kind: &RemoteOpFileKind,
2126 44266 : op_kind: &RemoteOpKind,
2127 44266 : ) -> Histogram {
2128 44266 : let key = (file_kind.as_str(), op_kind.as_str());
2129 44266 : REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST
2130 44266 : .get_metric_with_label_values(&[key.0, key.1])
2131 44266 : .unwrap()
2132 44266 : }
2133 :
2134 22399 : fn bytes_started_counter(
2135 22399 : &self,
2136 22399 : file_kind: &RemoteOpFileKind,
2137 22399 : op_kind: &RemoteOpKind,
2138 22399 : ) -> IntCounter {
2139 22399 : let mut guard = self.bytes_started_counter.lock().unwrap();
2140 22399 : let key = (file_kind.as_str(), op_kind.as_str());
2141 22399 : let metric = guard.entry(key).or_insert_with(move || {
2142 953 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2143 953 : .get_metric_with_label_values(&[
2144 953 : &self.tenant_id,
2145 953 : &self.shard_id,
2146 953 : &self.timeline_id,
2147 953 : key.0,
2148 953 : key.1,
2149 953 : ])
2150 953 : .unwrap()
2151 22399 : });
2152 22399 : metric.clone()
2153 22399 : }
2154 :
2155 42814 : fn bytes_finished_counter(
2156 42814 : &self,
2157 42814 : file_kind: &RemoteOpFileKind,
2158 42814 : op_kind: &RemoteOpKind,
2159 42814 : ) -> IntCounter {
2160 42814 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2161 42814 : let key = (file_kind.as_str(), op_kind.as_str());
2162 42814 : let metric = guard.entry(key).or_insert_with(move || {
2163 953 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2164 953 : .get_metric_with_label_values(&[
2165 953 : &self.tenant_id,
2166 953 : &self.shard_id,
2167 953 : &self.timeline_id,
2168 953 : key.0,
2169 953 : key.1,
2170 953 : ])
2171 953 : .unwrap()
2172 42814 : });
2173 42814 : metric.clone()
2174 42814 : }
2175 : }
2176 :
2177 : #[cfg(test)]
2178 : impl RemoteTimelineClientMetrics {
2179 6 : pub fn get_bytes_started_counter_value(
2180 6 : &self,
2181 6 : file_kind: &RemoteOpFileKind,
2182 6 : op_kind: &RemoteOpKind,
2183 6 : ) -> Option<u64> {
2184 6 : let guard = self.bytes_started_counter.lock().unwrap();
2185 6 : let key = (file_kind.as_str(), op_kind.as_str());
2186 6 : guard.get(&key).map(|counter| counter.get())
2187 6 : }
2188 :
2189 6 : pub fn get_bytes_finished_counter_value(
2190 6 : &self,
2191 6 : file_kind: &RemoteOpFileKind,
2192 6 : op_kind: &RemoteOpKind,
2193 6 : ) -> Option<u64> {
2194 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2195 6 : let key = (file_kind.as_str(), op_kind.as_str());
2196 6 : guard.get(&key).map(|counter| counter.get())
2197 6 : }
2198 : }
2199 :
2200 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2201 : #[must_use]
2202 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2203 : /// Decremented on drop.
2204 : calls_unfinished_metric: Option<IntGauge>,
2205 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2206 : bytes_finished: Option<(IntCounter, u64)>,
2207 : }
2208 :
2209 : impl RemoteTimelineClientCallMetricGuard {
2210 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2211 : /// The caller vouches to do the metric updates manually.
2212 34173 : pub fn will_decrement_manually(mut self) {
2213 34173 : let RemoteTimelineClientCallMetricGuard {
2214 34173 : calls_unfinished_metric,
2215 34173 : bytes_finished,
2216 34173 : } = &mut self;
2217 34173 : calls_unfinished_metric.take();
2218 34173 : bytes_finished.take();
2219 34173 : }
2220 : }
2221 :
2222 : impl Drop for RemoteTimelineClientCallMetricGuard {
2223 44266 : fn drop(&mut self) {
2224 44266 : let RemoteTimelineClientCallMetricGuard {
2225 44266 : calls_unfinished_metric,
2226 44266 : bytes_finished,
2227 44266 : } = self;
2228 44266 : if let Some(guard) = calls_unfinished_metric.take() {
2229 10093 : guard.dec();
2230 34173 : }
2231 44266 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2232 0 : bytes_finished_metric.inc_by(*value);
2233 44266 : }
2234 44266 : }
2235 : }
2236 :
2237 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2238 : /// track the byte size of this call in applicable metric(s).
2239 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2240 : /// Do not account for this call's byte size in any metrics.
2241 : /// The `reason` field is there to make the call sites self-documenting
2242 : /// about why they don't need the metric.
2243 : DontTrackSize { reason: &'static str },
2244 : /// Track the byte size of the call in applicable metric(s).
2245 : Bytes(u64),
2246 : }
2247 :
2248 : impl RemoteTimelineClientMetrics {
2249 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2250 : ///
2251 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2252 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2253 : /// is more suitable.
2254 : /// Never do both.
2255 44266 : pub(crate) fn call_begin(
2256 44266 : &self,
2257 44266 : file_kind: &RemoteOpFileKind,
2258 44266 : op_kind: &RemoteOpKind,
2259 44266 : size: RemoteTimelineClientMetricsCallTrackSize,
2260 44266 : ) -> RemoteTimelineClientCallMetricGuard {
2261 44266 : let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
2262 44266 : self.calls_started_hist(file_kind, op_kind)
2263 44266 : .observe(calls_unfinished_metric.get() as f64);
2264 44266 : calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric
2265 :
2266 44266 : let bytes_finished = match size {
2267 21867 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2268 21867 : // nothing to do
2269 21867 : None
2270 : }
2271 22399 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2272 22399 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2273 22399 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2274 22399 : Some((finished_counter, size))
2275 : }
2276 : };
2277 44266 : RemoteTimelineClientCallMetricGuard {
2278 44266 : calls_unfinished_metric: Some(calls_unfinished_metric),
2279 44266 : bytes_finished,
2280 44266 : }
2281 44266 : }
2282 :
2283 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2284 : /// Using the guard object is generally preferable.
2285 : /// See [`call_begin`](Self::call_begin) for more context.
2286 32116 : pub(crate) fn call_end(
2287 32116 : &self,
2288 32116 : file_kind: &RemoteOpFileKind,
2289 32116 : op_kind: &RemoteOpKind,
2290 32116 : size: RemoteTimelineClientMetricsCallTrackSize,
2291 32116 : ) {
2292 32116 : let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
2293 : debug_assert!(
2294 32116 : calls_unfinished_metric.get() > 0,
2295 0 : "begin and end should cancel out"
2296 : );
2297 32116 : calls_unfinished_metric.dec();
2298 32116 : match size {
2299 11701 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2300 20415 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2301 20415 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2302 20415 : }
2303 : }
2304 32116 : }
2305 : }
2306 :
2307 : impl Drop for RemoteTimelineClientMetrics {
2308 357 : fn drop(&mut self) {
2309 357 : let RemoteTimelineClientMetrics {
2310 357 : tenant_id,
2311 357 : shard_id,
2312 357 : timeline_id,
2313 357 : remote_physical_size_gauge,
2314 357 : calls_unfinished_gauge,
2315 357 : bytes_started_counter,
2316 357 : bytes_finished_counter,
2317 357 : } = self;
2318 877 : for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
2319 877 : let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
2320 877 : tenant_id,
2321 877 : shard_id,
2322 877 : timeline_id,
2323 877 : a,
2324 877 : b,
2325 877 : ]);
2326 877 : }
2327 357 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2328 222 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2329 222 : tenant_id,
2330 222 : shard_id,
2331 222 : timeline_id,
2332 222 : a,
2333 222 : b,
2334 222 : ]);
2335 222 : }
2336 357 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2337 222 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2338 222 : tenant_id,
2339 222 : shard_id,
2340 222 : timeline_id,
2341 222 : a,
2342 222 : b,
2343 222 : ]);
2344 222 : }
2345 357 : {
2346 357 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2347 357 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2348 357 : }
2349 357 : }
2350 : }
2351 :
2352 : /// Wrapper future that measures the time spent by a remote storage operation,
2353 : /// and records the time and success/failure as a prometheus metric.
2354 : pub(crate) trait MeasureRemoteOp: Sized {
2355 40534 : fn measure_remote_op(
2356 40534 : self,
2357 40534 : file_kind: RemoteOpFileKind,
2358 40534 : op: RemoteOpKind,
2359 40534 : metrics: Arc<RemoteTimelineClientMetrics>,
2360 40534 : ) -> MeasuredRemoteOp<Self> {
2361 40534 : let start = Instant::now();
2362 40534 : MeasuredRemoteOp {
2363 40534 : inner: self,
2364 40534 : file_kind,
2365 40534 : op,
2366 40534 : start,
2367 40534 : metrics,
2368 40534 : }
2369 40534 : }
2370 : }
2371 :
2372 : impl<T: Sized> MeasureRemoteOp for T {}
2373 :
2374 : pin_project! {
2375 : pub(crate) struct MeasuredRemoteOp<F>
2376 : {
2377 : #[pin]
2378 : inner: F,
2379 : file_kind: RemoteOpFileKind,
2380 : op: RemoteOpKind,
2381 : start: Instant,
2382 : metrics: Arc<RemoteTimelineClientMetrics>,
2383 : }
2384 : }
2385 :
2386 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
2387 : type Output = Result<O, E>;
2388 :
2389 1616252 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
2390 1616252 : let this = self.project();
2391 1616252 : let poll_result = this.inner.poll(cx);
2392 1616252 : if let Poll::Ready(ref res) = poll_result {
2393 40521 : let duration = this.start.elapsed();
2394 40521 : let status = if res.is_ok() { &"success" } else { &"failure" };
2395 40521 : this.metrics
2396 40521 : .remote_operation_time(this.file_kind, this.op, status)
2397 40521 : .observe(duration.as_secs_f64());
2398 1575731 : }
2399 1616252 : poll_result
2400 1616252 : }
2401 : }
2402 :
2403 : pub mod tokio_epoll_uring {
2404 : use metrics::UIntGauge;
2405 :
2406 : pub struct Collector {
2407 : descs: Vec<metrics::core::Desc>,
2408 : systems_created: UIntGauge,
2409 : systems_destroyed: UIntGauge,
2410 : }
2411 :
2412 : const NMETRICS: usize = 2;
2413 :
2414 : impl metrics::core::Collector for Collector {
2415 624 : fn desc(&self) -> Vec<&metrics::core::Desc> {
2416 624 : self.descs.iter().collect()
2417 624 : }
2418 :
2419 1258 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
2420 1258 : let mut mfs = Vec::with_capacity(NMETRICS);
2421 1258 : let tokio_epoll_uring::metrics::Metrics {
2422 1258 : systems_created,
2423 1258 : systems_destroyed,
2424 1258 : } = tokio_epoll_uring::metrics::global();
2425 1258 : self.systems_created.set(systems_created);
2426 1258 : mfs.extend(self.systems_created.collect());
2427 1258 : self.systems_destroyed.set(systems_destroyed);
2428 1258 : mfs.extend(self.systems_destroyed.collect());
2429 1258 : mfs
2430 1258 : }
2431 : }
2432 :
2433 : impl Collector {
2434 : #[allow(clippy::new_without_default)]
2435 624 : pub fn new() -> Self {
2436 624 : let mut descs = Vec::new();
2437 624 :
2438 624 : let systems_created = UIntGauge::new(
2439 624 : "pageserver_tokio_epoll_uring_systems_created",
2440 624 : "counter of tokio-epoll-uring systems that were created",
2441 624 : )
2442 624 : .unwrap();
2443 624 : descs.extend(
2444 624 : metrics::core::Collector::desc(&systems_created)
2445 624 : .into_iter()
2446 624 : .cloned(),
2447 624 : );
2448 624 :
2449 624 : let systems_destroyed = UIntGauge::new(
2450 624 : "pageserver_tokio_epoll_uring_systems_destroyed",
2451 624 : "counter of tokio-epoll-uring systems that were destroyed",
2452 624 : )
2453 624 : .unwrap();
2454 624 : descs.extend(
2455 624 : metrics::core::Collector::desc(&systems_destroyed)
2456 624 : .into_iter()
2457 624 : .cloned(),
2458 624 : );
2459 624 :
2460 624 : Self {
2461 624 : descs,
2462 624 : systems_created,
2463 624 : systems_destroyed,
2464 624 : }
2465 624 : }
2466 : }
2467 : }
2468 :
2469 624 : pub fn preinitialize_metrics() {
2470 624 : // Python tests need these and on some we do alerting.
2471 624 : //
2472 624 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
2473 624 : // order:
2474 624 : // - global metrics reside in a Lazy<PageserverMetrics>
2475 624 : // - access via crate::metrics::PS_METRICS.materialized_page_cache_hit.inc()
2476 624 : // - could move the statics into TimelineMetrics::new()?
2477 624 :
2478 624 : // counters
2479 624 : [
2480 624 : &MATERIALIZED_PAGE_CACHE_HIT,
2481 624 : &MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
2482 624 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
2483 624 : &WALRECEIVER_STARTED_CONNECTIONS,
2484 624 : &WALRECEIVER_BROKER_UPDATES,
2485 624 : &WALRECEIVER_CANDIDATES_ADDED,
2486 624 : &WALRECEIVER_CANDIDATES_REMOVED,
2487 624 : ]
2488 624 : .into_iter()
2489 4368 : .for_each(|c| {
2490 4368 : Lazy::force(c);
2491 4368 : });
2492 624 :
2493 624 : // Deletion queue stats
2494 624 : Lazy::force(&DELETION_QUEUE);
2495 624 :
2496 624 : // Tenant stats
2497 624 : Lazy::force(&TENANT);
2498 624 :
2499 624 : // Tenant manager stats
2500 624 : Lazy::force(&TENANT_MANAGER);
2501 624 :
2502 624 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
2503 624 :
2504 624 : // countervecs
2505 624 : [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
2506 624 : .into_iter()
2507 624 : .for_each(|c| {
2508 624 : Lazy::force(c);
2509 624 : });
2510 624 :
2511 624 : // gauges
2512 624 : WALRECEIVER_ACTIVE_MANAGERS.get();
2513 624 :
2514 624 : // histograms
2515 624 : [
2516 624 : &READ_NUM_FS_LAYERS,
2517 624 : &WAIT_LSN_TIME,
2518 624 : &WAL_REDO_TIME,
2519 624 : &WAL_REDO_RECORDS_HISTOGRAM,
2520 624 : &WAL_REDO_BYTES_HISTOGRAM,
2521 624 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
2522 624 : ]
2523 624 : .into_iter()
2524 3744 : .for_each(|h| {
2525 3744 : Lazy::force(h);
2526 3744 : });
2527 624 :
2528 624 : // Custom
2529 624 : Lazy::force(&RECONSTRUCT_TIME);
2530 624 : }
|