Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::shard::TenantShardId;
11 : use strum::{EnumCount, IntoEnumIterator, VariantNames};
12 : use strum_macros::{EnumVariantNames, IntoStaticStr};
13 : use tracing::warn;
14 : use utils::id::TimelineId;
15 :
16 : /// Prometheus histogram buckets (in seconds) for operations in the critical
17 : /// path. In other words, operations that directly affect that latency of user
18 : /// queries.
19 : ///
20 : /// The buckets capture the majority of latencies in the microsecond and
21 : /// millisecond range but also extend far enough up to distinguish "bad" from
22 : /// "really bad".
23 : const CRITICAL_OP_BUCKETS: &[f64] = &[
24 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
25 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
26 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
27 : ];
28 :
29 : // Metrics collected on operations on the storage repository.
30 2240 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
31 : #[strum(serialize_all = "kebab_case")]
32 : pub(crate) enum StorageTimeOperation {
33 : #[strum(serialize = "layer flush")]
34 : LayerFlush,
35 :
36 : #[strum(serialize = "compact")]
37 : Compact,
38 :
39 : #[strum(serialize = "create images")]
40 : CreateImages,
41 :
42 : #[strum(serialize = "logical size")]
43 : LogicalSize,
44 :
45 : #[strum(serialize = "imitate logical size")]
46 : ImitateLogicalSize,
47 :
48 : #[strum(serialize = "load layer map")]
49 : LoadLayerMap,
50 :
51 : #[strum(serialize = "gc")]
52 : Gc,
53 :
54 : #[strum(serialize = "create tenant")]
55 : CreateTenant,
56 : }
57 :
58 100 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
59 100 : register_counter_vec!(
60 100 : "pageserver_storage_operations_seconds_sum",
61 100 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
62 100 : &["operation", "tenant_id", "shard_id", "timeline_id"],
63 100 : )
64 100 : .expect("failed to define a metric")
65 100 : });
66 :
67 100 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
68 100 : register_int_counter_vec!(
69 100 : "pageserver_storage_operations_seconds_count",
70 100 : "Count of storage operations with operation, tenant and timeline dimensions",
71 100 : &["operation", "tenant_id", "shard_id", "timeline_id"],
72 100 : )
73 100 : .expect("failed to define a metric")
74 100 : });
75 :
76 : // Buckets for background operations like compaction, GC, size calculation
77 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
78 :
79 100 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
80 100 : register_histogram_vec!(
81 100 : "pageserver_storage_operations_seconds_global",
82 100 : "Time spent on storage operations",
83 100 : &["operation"],
84 100 : STORAGE_OP_BUCKETS.into(),
85 100 : )
86 100 : .expect("failed to define a metric")
87 100 : });
88 :
89 98 : pub(crate) static READ_NUM_FS_LAYERS: Lazy<Histogram> = Lazy::new(|| {
90 98 : register_histogram!(
91 98 : "pageserver_read_num_fs_layers",
92 98 : "Number of persistent layers accessed for processing a read request, including those in the cache",
93 98 : vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0, 50.0, 100.0],
94 98 : )
95 98 : .expect("failed to define a metric")
96 98 : });
97 :
98 : // Metrics collected on operations on the storage repository.
99 :
100 : pub(crate) struct ReconstructTimeMetrics {
101 : ok: Histogram,
102 : err: Histogram,
103 : }
104 :
105 98 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
106 98 : let inner = register_histogram_vec!(
107 98 : "pageserver_getpage_reconstruct_seconds",
108 98 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
109 98 : &["result"],
110 98 : CRITICAL_OP_BUCKETS.into(),
111 98 : )
112 98 : .expect("failed to define a metric");
113 98 : ReconstructTimeMetrics {
114 98 : ok: inner.get_metric_with_label_values(&["ok"]).unwrap(),
115 98 : err: inner.get_metric_with_label_values(&["err"]).unwrap(),
116 98 : }
117 98 : });
118 :
119 : impl ReconstructTimeMetrics {
120 502723 : pub(crate) fn for_result<T, E>(&self, result: &Result<T, E>) -> &Histogram {
121 502723 : match result {
122 502723 : Ok(_) => &self.ok,
123 0 : Err(_) => &self.err,
124 : }
125 502723 : }
126 : }
127 :
128 0 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT_DIRECT: Lazy<IntCounter> = Lazy::new(|| {
129 0 : register_int_counter!(
130 0 : "pageserver_materialized_cache_hits_direct_total",
131 0 : "Number of cache hits from materialized page cache without redo",
132 0 : )
133 0 : .expect("failed to define a metric")
134 0 : });
135 :
136 98 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<Histogram> = Lazy::new(|| {
137 98 : register_histogram!(
138 98 : "pageserver_getpage_get_reconstruct_data_seconds",
139 98 : "Time spent in get_reconstruct_value_data",
140 98 : CRITICAL_OP_BUCKETS.into(),
141 98 : )
142 98 : .expect("failed to define a metric")
143 98 : });
144 :
145 0 : pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
146 0 : register_int_counter!(
147 0 : "pageserver_materialized_cache_hits_total",
148 0 : "Number of cache hits from materialized page cache",
149 0 : )
150 0 : .expect("failed to define a metric")
151 0 : });
152 :
153 : pub(crate) struct GetVectoredLatency {
154 : map: EnumMap<TaskKind, Option<Histogram>>,
155 : }
156 :
157 : impl GetVectoredLatency {
158 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
159 : // cardinality of the metric.
160 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
161 :
162 568 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
163 568 : self.map[task_kind].as_ref()
164 568 : }
165 : }
166 :
167 94 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
168 94 : let inner = register_histogram_vec!(
169 94 : "pageserver_get_vectored_seconds",
170 94 : "Time spent in get_vectored, excluding time spent in timeline_get_throttle.",
171 94 : &["task_kind"],
172 94 : CRITICAL_OP_BUCKETS.into(),
173 94 : )
174 94 : .expect("failed to define a metric");
175 94 :
176 94 : GetVectoredLatency {
177 2444 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
178 2444 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
179 2444 :
180 2444 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
181 188 : let task_kind = task_kind.into();
182 188 : Some(inner.with_label_values(&[task_kind]))
183 : } else {
184 2256 : None
185 : }
186 2444 : })),
187 94 : }
188 94 : });
189 :
190 : pub(crate) struct PageCacheMetricsForTaskKind {
191 : pub read_accesses_materialized_page: IntCounter,
192 : pub read_accesses_immutable: IntCounter,
193 :
194 : pub read_hits_immutable: IntCounter,
195 : pub read_hits_materialized_page_exact: IntCounter,
196 : pub read_hits_materialized_page_older_lsn: IntCounter,
197 : }
198 :
199 : pub(crate) struct PageCacheMetrics {
200 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
201 : }
202 :
203 100 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
204 100 : register_int_counter_vec!(
205 100 : "pageserver_page_cache_read_hits_total",
206 100 : "Number of read accesses to the page cache that hit",
207 100 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
208 100 : )
209 100 : .expect("failed to define a metric")
210 100 : });
211 :
212 100 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
213 100 : register_int_counter_vec!(
214 100 : "pageserver_page_cache_read_accesses_total",
215 100 : "Number of read accesses to the page cache",
216 100 : &["task_kind", "key_kind", "content_kind"]
217 100 : )
218 100 : .expect("failed to define a metric")
219 100 : });
220 :
221 100 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
222 2600 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
223 2600 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
224 2600 : let task_kind: &'static str = task_kind.into();
225 15600 : EnumMap::from_array(std::array::from_fn(|content_kind| {
226 15600 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
227 15600 : let content_kind: &'static str = content_kind.into();
228 15600 : PageCacheMetricsForTaskKind {
229 15600 : read_accesses_materialized_page: {
230 15600 : PAGE_CACHE_READ_ACCESSES
231 15600 : .get_metric_with_label_values(&[
232 15600 : task_kind,
233 15600 : "materialized_page",
234 15600 : content_kind,
235 15600 : ])
236 15600 : .unwrap()
237 15600 : },
238 15600 :
239 15600 : read_accesses_immutable: {
240 15600 : PAGE_CACHE_READ_ACCESSES
241 15600 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
242 15600 : .unwrap()
243 15600 : },
244 15600 :
245 15600 : read_hits_immutable: {
246 15600 : PAGE_CACHE_READ_HITS
247 15600 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
248 15600 : .unwrap()
249 15600 : },
250 15600 :
251 15600 : read_hits_materialized_page_exact: {
252 15600 : PAGE_CACHE_READ_HITS
253 15600 : .get_metric_with_label_values(&[
254 15600 : task_kind,
255 15600 : "materialized_page",
256 15600 : content_kind,
257 15600 : "exact",
258 15600 : ])
259 15600 : .unwrap()
260 15600 : },
261 15600 :
262 15600 : read_hits_materialized_page_older_lsn: {
263 15600 : PAGE_CACHE_READ_HITS
264 15600 : .get_metric_with_label_values(&[
265 15600 : task_kind,
266 15600 : "materialized_page",
267 15600 : content_kind,
268 15600 : "older_lsn",
269 15600 : ])
270 15600 : .unwrap()
271 15600 : },
272 15600 : }
273 15600 : }))
274 2600 : })),
275 100 : });
276 :
277 : impl PageCacheMetrics {
278 14336347 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
279 14336347 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
280 14336347 : }
281 : }
282 :
283 : pub(crate) struct PageCacheSizeMetrics {
284 : pub max_bytes: UIntGauge,
285 :
286 : pub current_bytes_immutable: UIntGauge,
287 : pub current_bytes_materialized_page: UIntGauge,
288 : }
289 :
290 100 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
291 100 : register_uint_gauge_vec!(
292 100 : "pageserver_page_cache_size_current_bytes",
293 100 : "Current size of the page cache in bytes, by key kind",
294 100 : &["key_kind"]
295 100 : )
296 100 : .expect("failed to define a metric")
297 100 : });
298 :
299 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
300 100 : Lazy::new(|| PageCacheSizeMetrics {
301 100 : max_bytes: {
302 100 : register_uint_gauge!(
303 100 : "pageserver_page_cache_size_max_bytes",
304 100 : "Maximum size of the page cache in bytes"
305 100 : )
306 100 : .expect("failed to define a metric")
307 100 : },
308 100 : current_bytes_immutable: {
309 100 : PAGE_CACHE_SIZE_CURRENT_BYTES
310 100 : .get_metric_with_label_values(&["immutable"])
311 100 : .unwrap()
312 100 : },
313 100 : current_bytes_materialized_page: {
314 100 : PAGE_CACHE_SIZE_CURRENT_BYTES
315 100 : .get_metric_with_label_values(&["materialized_page"])
316 100 : .unwrap()
317 100 : },
318 100 : });
319 :
320 : pub(crate) mod page_cache_eviction_metrics {
321 : use std::num::NonZeroUsize;
322 :
323 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
324 : use once_cell::sync::Lazy;
325 :
326 : #[derive(Clone, Copy)]
327 : pub(crate) enum Outcome {
328 : FoundSlotUnused { iters: NonZeroUsize },
329 : FoundSlotEvicted { iters: NonZeroUsize },
330 : ItersExceeded { iters: NonZeroUsize },
331 : }
332 :
333 40 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
334 40 : register_int_counter_vec!(
335 40 : "pageserver_page_cache_find_victim_iters_total",
336 40 : "Counter for the number of iterations in the find_victim loop",
337 40 : &["outcome"],
338 40 : )
339 40 : .expect("failed to define a metric")
340 40 : });
341 :
342 40 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
343 40 : register_int_counter_vec!(
344 40 : "pageserver_page_cache_find_victim_calls",
345 40 : "Incremented at the end of each find_victim() call.\
346 40 : Filter by outcome to get e.g., eviction rate.",
347 40 : &["outcome"]
348 40 : )
349 40 : .unwrap()
350 40 : });
351 :
352 155978 : pub(crate) fn observe(outcome: Outcome) {
353 155978 : macro_rules! dry {
354 155978 : ($label:literal, $iters:expr) => {{
355 155978 : static LABEL: &'static str = $label;
356 155978 : static ITERS_TOTAL: Lazy<IntCounter> =
357 155978 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
358 155978 : static CALLS: Lazy<IntCounter> =
359 155978 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
360 155978 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
361 155978 : CALLS.inc();
362 155978 : }};
363 155978 : }
364 155978 : match outcome {
365 1220 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
366 154758 : Outcome::FoundSlotEvicted { iters } => {
367 154758 : dry!("found_evicted", iters)
368 : }
369 0 : Outcome::ItersExceeded { iters } => {
370 0 : dry!("err_iters_exceeded", iters);
371 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
372 0 : }
373 : }
374 155978 : }
375 : }
376 :
377 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
378 0 : register_int_counter_vec!(
379 0 : "page_cache_errors_total",
380 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
381 0 : &["error_kind"]
382 0 : )
383 0 : .expect("failed to define a metric")
384 0 : });
385 :
386 0 : #[derive(IntoStaticStr)]
387 : #[strum(serialize_all = "kebab_case")]
388 : pub(crate) enum PageCacheErrorKind {
389 : AcquirePinnedSlotTimeout,
390 : EvictIterLimit,
391 : }
392 :
393 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
394 0 : PAGE_CACHE_ERRORS
395 0 : .get_metric_with_label_values(&[error_kind.into()])
396 0 : .unwrap()
397 0 : .inc();
398 0 : }
399 :
400 12 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
401 12 : register_histogram!(
402 12 : "pageserver_wait_lsn_seconds",
403 12 : "Time spent waiting for WAL to arrive",
404 12 : CRITICAL_OP_BUCKETS.into(),
405 12 : )
406 12 : .expect("failed to define a metric")
407 12 : });
408 :
409 100 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
410 100 : register_int_gauge_vec!(
411 100 : "pageserver_last_record_lsn",
412 100 : "Last record LSN grouped by timeline",
413 100 : &["tenant_id", "shard_id", "timeline_id"]
414 100 : )
415 100 : .expect("failed to define a metric")
416 100 : });
417 :
418 100 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
419 100 : register_uint_gauge_vec!(
420 100 : "pageserver_resident_physical_size",
421 100 : "The size of the layer files present in the pageserver's filesystem.",
422 100 : &["tenant_id", "shard_id", "timeline_id"]
423 100 : )
424 100 : .expect("failed to define a metric")
425 100 : });
426 :
427 98 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
428 98 : register_uint_gauge!(
429 98 : "pageserver_resident_physical_size_global",
430 98 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
431 98 : )
432 98 : .expect("failed to define a metric")
433 98 : });
434 :
435 100 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
436 100 : register_uint_gauge_vec!(
437 100 : "pageserver_remote_physical_size",
438 100 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
439 100 : // Corollary: If any files are missing from the index part, they won't be included here.
440 100 : &["tenant_id", "shard_id", "timeline_id"]
441 100 : )
442 100 : .expect("failed to define a metric")
443 100 : });
444 :
445 100 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
446 100 : register_uint_gauge!(
447 100 : "pageserver_remote_physical_size_global",
448 100 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
449 100 : )
450 100 : .expect("failed to define a metric")
451 100 : });
452 :
453 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
454 4 : register_int_counter!(
455 4 : "pageserver_remote_ondemand_downloaded_layers_total",
456 4 : "Total on-demand downloaded layers"
457 4 : )
458 4 : .unwrap()
459 4 : });
460 :
461 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
462 4 : register_int_counter!(
463 4 : "pageserver_remote_ondemand_downloaded_bytes_total",
464 4 : "Total bytes of layers on-demand downloaded",
465 4 : )
466 4 : .unwrap()
467 4 : });
468 :
469 100 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
470 100 : register_uint_gauge_vec!(
471 100 : "pageserver_current_logical_size",
472 100 : "Current logical size grouped by timeline",
473 100 : &["tenant_id", "shard_id", "timeline_id"]
474 100 : )
475 100 : .expect("failed to define current logical size metric")
476 100 : });
477 :
478 : pub(crate) mod initial_logical_size {
479 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
480 : use once_cell::sync::Lazy;
481 :
482 : pub(crate) struct StartCalculation(IntCounterVec);
483 100 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
484 100 : StartCalculation(
485 100 : register_int_counter_vec!(
486 100 : "pageserver_initial_logical_size_start_calculation",
487 100 : "Incremented each time we start an initial logical size calculation attempt. \
488 100 : The `circumstances` label provides some additional details.",
489 100 : &["attempt", "circumstances"]
490 100 : )
491 100 : .unwrap(),
492 100 : )
493 100 : });
494 :
495 : struct DropCalculation {
496 : first: IntCounter,
497 : retry: IntCounter,
498 : }
499 :
500 100 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
501 100 : let vec = register_int_counter_vec!(
502 100 : "pageserver_initial_logical_size_drop_calculation",
503 100 : "Incremented each time we abort a started size calculation attmpt.",
504 100 : &["attempt"]
505 100 : )
506 100 : .unwrap();
507 100 : DropCalculation {
508 100 : first: vec.with_label_values(&["first"]),
509 100 : retry: vec.with_label_values(&["retry"]),
510 100 : }
511 100 : });
512 :
513 : pub(crate) struct Calculated {
514 : pub(crate) births: IntCounter,
515 : pub(crate) deaths: IntCounter,
516 : }
517 :
518 100 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
519 100 : births: register_int_counter!(
520 100 : "pageserver_initial_logical_size_finish_calculation",
521 100 : "Incremented every time we finish calculation of initial logical size.\
522 100 : If everything is working well, this should happen at most once per Timeline object."
523 100 : )
524 100 : .unwrap(),
525 100 : deaths: register_int_counter!(
526 100 : "pageserver_initial_logical_size_drop_finished_calculation",
527 100 : "Incremented when we drop a finished initial logical size calculation result.\
528 100 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
529 100 : )
530 100 : .unwrap(),
531 100 : });
532 :
533 : pub(crate) struct OngoingCalculationGuard {
534 : inc_drop_calculation: Option<IntCounter>,
535 : }
536 :
537 100 : #[derive(strum_macros::IntoStaticStr)]
538 : pub(crate) enum StartCircumstances {
539 : EmptyInitial,
540 : SkippedConcurrencyLimiter,
541 : AfterBackgroundTasksRateLimit,
542 : }
543 :
544 : impl StartCalculation {
545 100 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
546 100 : let circumstances_label: &'static str = circumstances.into();
547 100 : self.0
548 100 : .with_label_values(&["first", circumstances_label])
549 100 : .inc();
550 100 : OngoingCalculationGuard {
551 100 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
552 100 : }
553 100 : }
554 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
555 0 : let circumstances_label: &'static str = circumstances.into();
556 0 : self.0
557 0 : .with_label_values(&["retry", circumstances_label])
558 0 : .inc();
559 0 : OngoingCalculationGuard {
560 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
561 0 : }
562 0 : }
563 : }
564 :
565 : impl Drop for OngoingCalculationGuard {
566 100 : fn drop(&mut self) {
567 100 : if let Some(counter) = self.inc_drop_calculation.take() {
568 0 : counter.inc();
569 100 : }
570 100 : }
571 : }
572 :
573 : impl OngoingCalculationGuard {
574 100 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
575 100 : drop(self.inc_drop_calculation.take());
576 100 : CALCULATED.births.inc();
577 100 : FinishedCalculationGuard {
578 100 : inc_on_drop: CALCULATED.deaths.clone(),
579 100 : }
580 100 : }
581 : }
582 :
583 : pub(crate) struct FinishedCalculationGuard {
584 : inc_on_drop: IntCounter,
585 : }
586 :
587 : impl Drop for FinishedCalculationGuard {
588 6 : fn drop(&mut self) {
589 6 : self.inc_on_drop.inc();
590 6 : }
591 : }
592 :
593 : // context: https://github.com/neondatabase/neon/issues/5963
594 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
595 0 : Lazy::new(|| {
596 0 : register_int_counter!(
597 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
598 0 : "Counter for the following event: walreceiver calls\
599 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
600 0 : )
601 0 : .unwrap()
602 0 : });
603 : }
604 :
605 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
606 0 : register_uint_gauge_vec!(
607 0 : "pageserver_directory_entries_count",
608 0 : "Sum of the entries in pageserver-stored directory listings",
609 0 : &["tenant_id", "shard_id", "timeline_id"]
610 0 : )
611 0 : .expect("failed to define a metric")
612 0 : });
613 :
614 99 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
615 99 : register_uint_gauge_vec!(
616 99 : "pageserver_tenant_states_count",
617 99 : "Count of tenants per state",
618 99 : &["state"]
619 99 : )
620 99 : .expect("Failed to register pageserver_tenant_states_count metric")
621 99 : });
622 :
623 : /// A set of broken tenants.
624 : ///
625 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
626 : /// tenant.
627 4 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
628 4 : register_uint_gauge_vec!(
629 4 : "pageserver_broken_tenants_count",
630 4 : "Set of broken tenants",
631 4 : &["tenant_id", "shard_id"]
632 4 : )
633 4 : .expect("Failed to register pageserver_tenant_states_count metric")
634 4 : });
635 :
636 6 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
637 6 : register_uint_gauge_vec!(
638 6 : "pageserver_tenant_synthetic_cached_size_bytes",
639 6 : "Synthetic size of each tenant in bytes",
640 6 : &["tenant_id"]
641 6 : )
642 6 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
643 6 : });
644 :
645 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
646 0 : register_histogram_vec!(
647 0 : "pageserver_eviction_iteration_duration_seconds_global",
648 0 : "Time spent on a single eviction iteration",
649 0 : &["period_secs", "threshold_secs"],
650 0 : STORAGE_OP_BUCKETS.into(),
651 0 : )
652 0 : .expect("failed to define a metric")
653 0 : });
654 :
655 100 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
656 100 : register_int_counter_vec!(
657 100 : "pageserver_evictions",
658 100 : "Number of layers evicted from the pageserver",
659 100 : &["tenant_id", "shard_id", "timeline_id"]
660 100 : )
661 100 : .expect("failed to define a metric")
662 100 : });
663 :
664 100 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
665 100 : register_int_counter_vec!(
666 100 : "pageserver_evictions_with_low_residence_duration",
667 100 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
668 100 : Residence duration is determined using the `residence_duration_data_source`.",
669 100 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
670 100 : )
671 100 : .expect("failed to define a metric")
672 100 : });
673 :
674 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
675 0 : register_int_counter!(
676 0 : "pageserver_unexpected_ondemand_downloads_count",
677 0 : "Number of unexpected on-demand downloads. \
678 0 : We log more context for each increment, so, forgo any labels in this metric.",
679 0 : )
680 0 : .expect("failed to define a metric")
681 0 : });
682 :
683 : /// How long did we take to start up? Broken down by labels to describe
684 : /// different phases of startup.
685 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
686 0 : register_gauge_vec!(
687 0 : "pageserver_startup_duration_seconds",
688 0 : "Time taken by phases of pageserver startup, in seconds",
689 0 : &["phase"]
690 0 : )
691 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
692 0 : });
693 :
694 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
695 0 : register_uint_gauge!(
696 0 : "pageserver_startup_is_loading",
697 0 : "1 while in initial startup load of tenants, 0 at other times"
698 0 : )
699 0 : .expect("Failed to register pageserver_startup_is_loading")
700 0 : });
701 :
702 98 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
703 98 : register_uint_gauge!(
704 98 : "pageserver_timeline_ephemeral_bytes",
705 98 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
706 98 : )
707 98 : .expect("Failed to register metric")
708 98 : });
709 :
710 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
711 : /// like how long it took to load.
712 : ///
713 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
714 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
715 : /// at a timeline level than tenant level.
716 : pub(crate) struct TenantMetrics {
717 : /// How long did tenants take to go from construction to active state?
718 : pub(crate) activation: Histogram,
719 : pub(crate) preload: Histogram,
720 : pub(crate) attach: Histogram,
721 :
722 : /// How many tenants are included in the initial startup of the pagesrever?
723 : pub(crate) startup_scheduled: IntCounter,
724 : pub(crate) startup_complete: IntCounter,
725 : }
726 :
727 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
728 0 : TenantMetrics {
729 0 : activation: register_histogram!(
730 0 : "pageserver_tenant_activation_seconds",
731 0 : "Time taken by tenants to activate, in seconds",
732 0 : CRITICAL_OP_BUCKETS.into()
733 0 : )
734 0 : .expect("Failed to register metric"),
735 0 : preload: register_histogram!(
736 0 : "pageserver_tenant_preload_seconds",
737 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
738 0 : CRITICAL_OP_BUCKETS.into()
739 0 : )
740 0 : .expect("Failed to register metric"),
741 0 : attach: register_histogram!(
742 0 : "pageserver_tenant_attach_seconds",
743 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
744 0 : CRITICAL_OP_BUCKETS.into()
745 0 : )
746 0 : .expect("Failed to register metric"),
747 0 : startup_scheduled: register_int_counter!(
748 0 : "pageserver_tenant_startup_scheduled",
749 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
750 0 : ).expect("Failed to register metric"),
751 0 : startup_complete: register_int_counter!(
752 0 : "pageserver_tenant_startup_complete",
753 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
754 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
755 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
756 0 : ).expect("Failed to register metric"),
757 0 : }
758 0 : });
759 :
760 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
761 : #[derive(Debug)]
762 : pub(crate) struct EvictionsWithLowResidenceDuration {
763 : data_source: &'static str,
764 : threshold: Duration,
765 : counter: Option<IntCounter>,
766 : }
767 :
768 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
769 : data_source: &'static str,
770 : threshold: Duration,
771 : }
772 :
773 : impl EvictionsWithLowResidenceDurationBuilder {
774 320 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
775 320 : Self {
776 320 : data_source,
777 320 : threshold,
778 320 : }
779 320 : }
780 :
781 320 : fn build(
782 320 : &self,
783 320 : tenant_id: &str,
784 320 : shard_id: &str,
785 320 : timeline_id: &str,
786 320 : ) -> EvictionsWithLowResidenceDuration {
787 320 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
788 320 : .get_metric_with_label_values(&[
789 320 : tenant_id,
790 320 : shard_id,
791 320 : timeline_id,
792 320 : self.data_source,
793 320 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
794 320 : ])
795 320 : .unwrap();
796 320 : EvictionsWithLowResidenceDuration {
797 320 : data_source: self.data_source,
798 320 : threshold: self.threshold,
799 320 : counter: Some(counter),
800 320 : }
801 320 : }
802 : }
803 :
804 : impl EvictionsWithLowResidenceDuration {
805 328 : fn threshold_label_value(threshold: Duration) -> String {
806 328 : format!("{}", threshold.as_secs())
807 328 : }
808 :
809 18 : pub fn observe(&self, observed_value: Duration) {
810 18 : if observed_value < self.threshold {
811 18 : self.counter
812 18 : .as_ref()
813 18 : .expect("nobody calls this function after `remove_from_vec`")
814 18 : .inc();
815 18 : }
816 18 : }
817 :
818 0 : pub fn change_threshold(
819 0 : &mut self,
820 0 : tenant_id: &str,
821 0 : shard_id: &str,
822 0 : timeline_id: &str,
823 0 : new_threshold: Duration,
824 0 : ) {
825 0 : if new_threshold == self.threshold {
826 0 : return;
827 0 : }
828 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
829 0 : self.data_source,
830 0 : new_threshold,
831 0 : )
832 0 : .build(tenant_id, shard_id, timeline_id);
833 0 : std::mem::swap(self, &mut with_new);
834 0 : with_new.remove(tenant_id, shard_id, timeline_id);
835 0 : }
836 :
837 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
838 8 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
839 8 : let Some(_counter) = self.counter.take() else {
840 0 : return;
841 : };
842 :
843 8 : let threshold = Self::threshold_label_value(self.threshold);
844 8 :
845 8 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
846 8 : tenant_id,
847 8 : shard_id,
848 8 : timeline_id,
849 8 : self.data_source,
850 8 : &threshold,
851 8 : ]);
852 8 :
853 8 : match removed {
854 0 : Err(e) => {
855 0 : // this has been hit in staging as
856 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
857 0 : // because we can be in the drop path already, don't risk:
858 0 : // - "double-panic => illegal instruction" or
859 0 : // - future "drop panick => abort"
860 0 : //
861 0 : // so just nag: (the error has the labels)
862 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
863 : }
864 : Ok(()) => {
865 : // to help identify cases where we double-remove the same values, let's log all
866 : // deletions?
867 8 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
868 : }
869 : }
870 8 : }
871 : }
872 :
873 : // Metrics collected on disk IO operations
874 : //
875 : // Roughly logarithmic scale.
876 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
877 : 0.000030, // 30 usec
878 : 0.001000, // 1000 usec
879 : 0.030, // 30 ms
880 : 1.000, // 1000 ms
881 : 30.000, // 30000 ms
882 : ];
883 :
884 : /// VirtualFile fs operation variants.
885 : ///
886 : /// Operations:
887 : /// - open ([`std::fs::OpenOptions::open`])
888 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
889 : /// - close-by-replace (close by replacement algorithm)
890 : /// - read (`read_at`)
891 : /// - write (`write_at`)
892 : /// - seek (modify internal position or file length query)
893 : /// - fsync ([`std::fs::File::sync_all`])
894 : /// - metadata ([`std::fs::File::metadata`])
895 : #[derive(
896 1080 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
897 : )]
898 : pub(crate) enum StorageIoOperation {
899 : Open,
900 : OpenAfterReplace,
901 : Close,
902 : CloseByReplace,
903 : Read,
904 : Write,
905 : Seek,
906 : Fsync,
907 : Metadata,
908 : }
909 :
910 : impl StorageIoOperation {
911 1080 : pub fn as_str(&self) -> &'static str {
912 1080 : match self {
913 120 : StorageIoOperation::Open => "open",
914 120 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
915 120 : StorageIoOperation::Close => "close",
916 120 : StorageIoOperation::CloseByReplace => "close-by-replace",
917 120 : StorageIoOperation::Read => "read",
918 120 : StorageIoOperation::Write => "write",
919 120 : StorageIoOperation::Seek => "seek",
920 120 : StorageIoOperation::Fsync => "fsync",
921 120 : StorageIoOperation::Metadata => "metadata",
922 : }
923 1080 : }
924 : }
925 :
926 : /// Tracks time taken by fs operations near VirtualFile.
927 : #[derive(Debug)]
928 : pub(crate) struct StorageIoTime {
929 : metrics: [Histogram; StorageIoOperation::COUNT],
930 : }
931 :
932 : impl StorageIoTime {
933 120 : fn new() -> Self {
934 120 : let storage_io_histogram_vec = register_histogram_vec!(
935 120 : "pageserver_io_operations_seconds",
936 120 : "Time spent in IO operations",
937 120 : &["operation"],
938 120 : STORAGE_IO_TIME_BUCKETS.into()
939 120 : )
940 120 : .expect("failed to define a metric");
941 1080 : let metrics = std::array::from_fn(|i| {
942 1080 : let op = StorageIoOperation::from_repr(i).unwrap();
943 1080 : storage_io_histogram_vec
944 1080 : .get_metric_with_label_values(&[op.as_str()])
945 1080 : .unwrap()
946 1080 : });
947 120 : Self { metrics }
948 120 : }
949 :
950 874640 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
951 874640 : &self.metrics[op as usize]
952 874640 : }
953 : }
954 :
955 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
956 :
957 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
958 :
959 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
960 120 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
961 120 : register_int_gauge_vec!(
962 120 : "pageserver_io_operations_bytes_total",
963 120 : "Total amount of bytes read/written in IO operations",
964 120 : &["operation", "tenant_id", "shard_id", "timeline_id"]
965 120 : )
966 120 : .expect("failed to define a metric")
967 120 : });
968 :
969 : #[cfg(not(test))]
970 : pub(crate) mod virtual_file_descriptor_cache {
971 : use super::*;
972 :
973 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
974 0 : register_uint_gauge!(
975 0 : "pageserver_virtual_file_descriptor_cache_size_max",
976 0 : "Maximum number of open file descriptors in the cache."
977 0 : )
978 0 : .unwrap()
979 0 : });
980 :
981 : // SIZE_CURRENT: derive it like so:
982 : // ```
983 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
984 : // -ignoring(operation)
985 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
986 : // ```
987 : }
988 :
989 : #[cfg(not(test))]
990 : pub(crate) mod virtual_file_io_engine {
991 : use super::*;
992 :
993 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
994 0 : register_uint_gauge_vec!(
995 0 : "pageserver_virtual_file_io_engine_kind",
996 0 : "The configured io engine for VirtualFile",
997 0 : &["kind"],
998 0 : )
999 0 : .unwrap()
1000 0 : });
1001 : }
1002 :
1003 : #[derive(Debug)]
1004 : struct GlobalAndPerTimelineHistogram {
1005 : global: Histogram,
1006 : per_tenant_timeline: Histogram,
1007 : }
1008 :
1009 : impl GlobalAndPerTimelineHistogram {
1010 10 : fn observe(&self, value: f64) {
1011 10 : self.global.observe(value);
1012 10 : self.per_tenant_timeline.observe(value);
1013 10 : }
1014 : }
1015 :
1016 : struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
1017 : h: &'a GlobalAndPerTimelineHistogram,
1018 : ctx: &'c RequestContext,
1019 : start: std::time::Instant,
1020 : op: SmgrQueryType,
1021 : }
1022 :
1023 : impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
1024 10 : fn drop(&mut self) {
1025 10 : let elapsed = self.start.elapsed();
1026 10 : let ex_throttled = self
1027 10 : .ctx
1028 10 : .micros_spent_throttled
1029 10 : .close_and_checked_sub_from(elapsed);
1030 10 : let ex_throttled = match ex_throttled {
1031 10 : Ok(res) => res,
1032 0 : Err(error) => {
1033 0 : use utils::rate_limit::RateLimit;
1034 0 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1035 0 : Lazy::new(|| {
1036 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1037 0 : RateLimit::new(Duration::from_secs(10))
1038 0 : })))
1039 0 : });
1040 0 : let mut guard = LOGGED.lock().unwrap();
1041 0 : let rate_limit = &mut guard[self.op];
1042 0 : rate_limit.call(|| {
1043 0 : warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit");
1044 0 : });
1045 0 : elapsed
1046 : }
1047 : };
1048 10 : self.h.observe(ex_throttled.as_secs_f64());
1049 10 : }
1050 : }
1051 :
1052 : #[derive(
1053 : Debug,
1054 : Clone,
1055 : Copy,
1056 3350 : IntoStaticStr,
1057 : strum_macros::EnumCount,
1058 112 : strum_macros::EnumIter,
1059 1650 : strum_macros::FromRepr,
1060 : enum_map::Enum,
1061 : )]
1062 : #[strum(serialize_all = "snake_case")]
1063 : pub enum SmgrQueryType {
1064 : GetRelExists,
1065 : GetRelSize,
1066 : GetPageAtLsn,
1067 : GetDbSize,
1068 : GetSlruSegment,
1069 : }
1070 :
1071 : #[derive(Debug)]
1072 : pub(crate) struct SmgrQueryTimePerTimeline {
1073 : metrics: [GlobalAndPerTimelineHistogram; SmgrQueryType::COUNT],
1074 : }
1075 :
1076 102 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1077 102 : register_histogram_vec!(
1078 102 : "pageserver_smgr_query_seconds",
1079 102 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1080 102 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1081 102 : CRITICAL_OP_BUCKETS.into(),
1082 102 : )
1083 102 : .expect("failed to define a metric")
1084 102 : });
1085 :
1086 102 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1087 102 : [
1088 102 : 1,
1089 102 : 10,
1090 102 : 20,
1091 102 : 40,
1092 102 : 60,
1093 102 : 80,
1094 102 : 100,
1095 102 : 200,
1096 102 : 300,
1097 102 : 400,
1098 102 : 500,
1099 102 : 600,
1100 102 : 700,
1101 102 : 800,
1102 102 : 900,
1103 102 : 1_000, // 1ms
1104 102 : 2_000,
1105 102 : 4_000,
1106 102 : 6_000,
1107 102 : 8_000,
1108 102 : 10_000, // 10ms
1109 102 : 20_000,
1110 102 : 40_000,
1111 102 : 60_000,
1112 102 : 80_000,
1113 102 : 100_000,
1114 102 : 200_000,
1115 102 : 400_000,
1116 102 : 600_000,
1117 102 : 800_000,
1118 102 : 1_000_000, // 1s
1119 102 : 2_000_000,
1120 102 : 4_000_000,
1121 102 : 6_000_000,
1122 102 : 8_000_000,
1123 102 : 10_000_000, // 10s
1124 102 : 20_000_000,
1125 102 : 50_000_000,
1126 102 : 100_000_000,
1127 102 : 200_000_000,
1128 102 : 1_000_000_000, // 1000s
1129 102 : ]
1130 102 : .into_iter()
1131 102 : .map(Duration::from_micros)
1132 4182 : .map(|d| d.as_secs_f64())
1133 102 : .collect()
1134 102 : });
1135 :
1136 102 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1137 102 : register_histogram_vec!(
1138 102 : "pageserver_smgr_query_seconds_global",
1139 102 : "Time spent on smgr query handling, aggregated by query type.",
1140 102 : &["smgr_query_type"],
1141 102 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1142 102 : )
1143 102 : .expect("failed to define a metric")
1144 102 : });
1145 :
1146 : impl SmgrQueryTimePerTimeline {
1147 330 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1148 330 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1149 330 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1150 330 : let timeline_id = timeline_id.to_string();
1151 1650 : let metrics = std::array::from_fn(|i| {
1152 1650 : let op = SmgrQueryType::from_repr(i).unwrap();
1153 1650 : let global = SMGR_QUERY_TIME_GLOBAL
1154 1650 : .get_metric_with_label_values(&[op.into()])
1155 1650 : .unwrap();
1156 1650 : let per_tenant_timeline = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1157 1650 : .get_metric_with_label_values(&[op.into(), &tenant_id, &shard_slug, &timeline_id])
1158 1650 : .unwrap();
1159 1650 : GlobalAndPerTimelineHistogram {
1160 1650 : global,
1161 1650 : per_tenant_timeline,
1162 1650 : }
1163 1650 : });
1164 330 : Self { metrics }
1165 330 : }
1166 10 : pub(crate) fn start_timer<'c: 'a, 'a>(
1167 10 : &'a self,
1168 10 : op: SmgrQueryType,
1169 10 : ctx: &'c RequestContext,
1170 10 : ) -> impl Drop + '_ {
1171 10 : let metric = &self.metrics[op as usize];
1172 10 : let start = Instant::now();
1173 10 : match ctx.micros_spent_throttled.open() {
1174 10 : Ok(()) => (),
1175 0 : Err(error) => {
1176 0 : use utils::rate_limit::RateLimit;
1177 0 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1178 0 : Lazy::new(|| {
1179 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1180 0 : RateLimit::new(Duration::from_secs(10))
1181 0 : })))
1182 0 : });
1183 0 : let mut guard = LOGGED.lock().unwrap();
1184 0 : let rate_limit = &mut guard[op];
1185 0 : rate_limit.call(|| {
1186 0 : warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1187 0 : });
1188 0 : }
1189 : }
1190 10 : GlobalAndPerTimelineHistogramTimer {
1191 10 : h: metric,
1192 10 : ctx,
1193 10 : start,
1194 10 : op,
1195 10 : }
1196 10 : }
1197 : }
1198 :
1199 : #[cfg(test)]
1200 : mod smgr_query_time_tests {
1201 : use pageserver_api::shard::TenantShardId;
1202 : use strum::IntoEnumIterator;
1203 : use utils::id::{TenantId, TimelineId};
1204 :
1205 : use crate::{
1206 : context::{DownloadBehavior, RequestContext},
1207 : task_mgr::TaskKind,
1208 : };
1209 :
1210 : // Regression test, we used hard-coded string constants before using an enum.
1211 : #[test]
1212 2 : fn op_label_name() {
1213 2 : use super::SmgrQueryType::*;
1214 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1215 2 : (GetRelExists, "get_rel_exists"),
1216 2 : (GetRelSize, "get_rel_size"),
1217 2 : (GetPageAtLsn, "get_page_at_lsn"),
1218 2 : (GetDbSize, "get_db_size"),
1219 2 : (GetSlruSegment, "get_slru_segment"),
1220 2 : ];
1221 12 : for (op, expect) in expect {
1222 10 : let actual: &'static str = op.into();
1223 10 : assert_eq!(actual, expect);
1224 : }
1225 2 : }
1226 :
1227 : #[test]
1228 2 : fn basic() {
1229 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1230 :
1231 12 : for op in &ops {
1232 10 : let tenant_id = TenantId::generate();
1233 10 : let timeline_id = TimelineId::generate();
1234 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1235 10 : &TenantShardId::unsharded(tenant_id),
1236 10 : &timeline_id,
1237 10 : );
1238 10 :
1239 20 : let get_counts = || {
1240 20 : let global: u64 = ops
1241 20 : .iter()
1242 100 : .map(|op| metrics.metrics[*op as usize].global.get_sample_count())
1243 20 : .sum();
1244 20 : let per_tenant_timeline: u64 = ops
1245 20 : .iter()
1246 100 : .map(|op| {
1247 100 : metrics.metrics[*op as usize]
1248 100 : .per_tenant_timeline
1249 100 : .get_sample_count()
1250 100 : })
1251 20 : .sum();
1252 20 : (global, per_tenant_timeline)
1253 20 : };
1254 :
1255 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1256 10 : assert_eq!(pre_per_tenant_timeline, 0);
1257 :
1258 10 : let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
1259 10 : let timer = metrics.start_timer(*op, &ctx);
1260 10 : drop(timer);
1261 10 :
1262 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1263 10 : assert_eq!(post_per_tenant_timeline, 1);
1264 10 : assert!(post_global > pre_global);
1265 : }
1266 2 : }
1267 : }
1268 :
1269 : // keep in sync with control plane Go code so that we can validate
1270 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1271 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1272 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1273 0 : [
1274 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1275 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1276 0 : ]
1277 0 : .map(|ms| (ms as f64) / 1000.0)
1278 0 : });
1279 :
1280 : pub(crate) struct BasebackupQueryTime(HistogramVec);
1281 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1282 0 : BasebackupQueryTime({
1283 0 : register_histogram_vec!(
1284 0 : "pageserver_basebackup_query_seconds",
1285 0 : "Histogram of basebackup queries durations, by result type",
1286 0 : &["result"],
1287 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1288 0 : )
1289 0 : .expect("failed to define a metric")
1290 0 : })
1291 0 : });
1292 :
1293 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a, 'c> {
1294 : parent: &'a BasebackupQueryTime,
1295 : ctx: &'c RequestContext,
1296 : start: std::time::Instant,
1297 : }
1298 :
1299 : impl BasebackupQueryTime {
1300 0 : pub(crate) fn start_recording<'c: 'a, 'a>(
1301 0 : &'a self,
1302 0 : ctx: &'c RequestContext,
1303 0 : ) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
1304 0 : let start = Instant::now();
1305 0 : match ctx.micros_spent_throttled.open() {
1306 0 : Ok(()) => (),
1307 0 : Err(error) => {
1308 0 : use utils::rate_limit::RateLimit;
1309 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
1310 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1311 0 : let mut rate_limit = LOGGED.lock().unwrap();
1312 0 : rate_limit.call(|| {
1313 0 : warn!(error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1314 0 : });
1315 0 : }
1316 : }
1317 0 : BasebackupQueryTimeOngoingRecording {
1318 0 : parent: self,
1319 0 : ctx,
1320 0 : start,
1321 0 : }
1322 0 : }
1323 : }
1324 :
1325 : impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
1326 0 : pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
1327 0 : let elapsed = self.start.elapsed();
1328 0 : let ex_throttled = self
1329 0 : .ctx
1330 0 : .micros_spent_throttled
1331 0 : .close_and_checked_sub_from(elapsed);
1332 0 : let ex_throttled = match ex_throttled {
1333 0 : Ok(ex_throttled) => ex_throttled,
1334 0 : Err(error) => {
1335 0 : use utils::rate_limit::RateLimit;
1336 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
1337 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1338 0 : let mut rate_limit = LOGGED.lock().unwrap();
1339 0 : rate_limit.call(|| {
1340 0 : warn!(error, "error deducting time spent throttled; this message is logged at a global rate limit");
1341 0 : });
1342 0 : elapsed
1343 : }
1344 : };
1345 0 : let label_value = if res.is_ok() { "ok" } else { "error" };
1346 0 : let metric = self
1347 0 : .parent
1348 0 : .0
1349 0 : .get_metric_with_label_values(&[label_value])
1350 0 : .unwrap();
1351 0 : metric.observe(ex_throttled.as_secs_f64());
1352 0 : }
1353 : }
1354 :
1355 0 : pub(crate) static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
1356 0 : register_int_gauge_vec!(
1357 0 : "pageserver_live_connections",
1358 0 : "Number of live network connections",
1359 0 : &["pageserver_connection_kind"]
1360 0 : )
1361 0 : .expect("failed to define a metric")
1362 0 : });
1363 :
1364 : // remote storage metrics
1365 :
1366 96 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1367 192 : register_int_counter_pair_vec!(
1368 192 : "pageserver_remote_timeline_client_calls_started",
1369 192 : "Number of started calls to remote timeline client.",
1370 192 : "pageserver_remote_timeline_client_calls_finished",
1371 192 : "Number of finshed calls to remote timeline client.",
1372 192 : &[
1373 192 : "tenant_id",
1374 192 : "shard_id",
1375 192 : "timeline_id",
1376 192 : "file_kind",
1377 192 : "op_kind"
1378 192 : ],
1379 192 : )
1380 96 : .unwrap()
1381 96 : });
1382 :
1383 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1384 96 : Lazy::new(|| {
1385 96 : register_int_counter_vec!(
1386 96 : "pageserver_remote_timeline_client_bytes_started",
1387 96 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1388 96 : The increment happens when the operation is scheduled.",
1389 96 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1390 96 : )
1391 96 : .expect("failed to define a metric")
1392 96 : });
1393 :
1394 96 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1395 96 : register_int_counter_vec!(
1396 96 : "pageserver_remote_timeline_client_bytes_finished",
1397 96 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1398 96 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1399 96 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1400 96 : )
1401 96 : .expect("failed to define a metric")
1402 96 : });
1403 :
1404 : pub(crate) struct TenantManagerMetrics {
1405 : pub(crate) tenant_slots: UIntGauge,
1406 : pub(crate) tenant_slot_writes: IntCounter,
1407 : pub(crate) unexpected_errors: IntCounter,
1408 : }
1409 :
1410 2 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1411 2 : TenantManagerMetrics {
1412 2 : tenant_slots: register_uint_gauge!(
1413 2 : "pageserver_tenant_manager_slots",
1414 2 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1415 2 : )
1416 2 : .expect("failed to define a metric"),
1417 2 : tenant_slot_writes: register_int_counter!(
1418 2 : "pageserver_tenant_manager_slot_writes",
1419 2 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1420 2 : )
1421 2 : .expect("failed to define a metric"),
1422 2 : unexpected_errors: register_int_counter!(
1423 2 : "pageserver_tenant_manager_unexpected_errors_total",
1424 2 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1425 2 : )
1426 2 : .expect("failed to define a metric"),
1427 2 : }
1428 2 : });
1429 :
1430 : pub(crate) struct DeletionQueueMetrics {
1431 : pub(crate) keys_submitted: IntCounter,
1432 : pub(crate) keys_dropped: IntCounter,
1433 : pub(crate) keys_executed: IntCounter,
1434 : pub(crate) keys_validated: IntCounter,
1435 : pub(crate) dropped_lsn_updates: IntCounter,
1436 : pub(crate) unexpected_errors: IntCounter,
1437 : pub(crate) remote_errors: IntCounterVec,
1438 : }
1439 22 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1440 22 : DeletionQueueMetrics{
1441 22 :
1442 22 : keys_submitted: register_int_counter!(
1443 22 : "pageserver_deletion_queue_submitted_total",
1444 22 : "Number of objects submitted for deletion"
1445 22 : )
1446 22 : .expect("failed to define a metric"),
1447 22 :
1448 22 : keys_dropped: register_int_counter!(
1449 22 : "pageserver_deletion_queue_dropped_total",
1450 22 : "Number of object deletions dropped due to stale generation."
1451 22 : )
1452 22 : .expect("failed to define a metric"),
1453 22 :
1454 22 : keys_executed: register_int_counter!(
1455 22 : "pageserver_deletion_queue_executed_total",
1456 22 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1457 22 : )
1458 22 : .expect("failed to define a metric"),
1459 22 :
1460 22 : keys_validated: register_int_counter!(
1461 22 : "pageserver_deletion_queue_validated_total",
1462 22 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1463 22 : )
1464 22 : .expect("failed to define a metric"),
1465 22 :
1466 22 : dropped_lsn_updates: register_int_counter!(
1467 22 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1468 22 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1469 22 : )
1470 22 : .expect("failed to define a metric"),
1471 22 : unexpected_errors: register_int_counter!(
1472 22 : "pageserver_deletion_queue_unexpected_errors_total",
1473 22 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1474 22 : )
1475 22 : .expect("failed to define a metric"),
1476 22 : remote_errors: register_int_counter_vec!(
1477 22 : "pageserver_deletion_queue_remote_errors_total",
1478 22 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1479 22 : &["op_kind"],
1480 22 : )
1481 22 : .expect("failed to define a metric")
1482 22 : }
1483 22 : });
1484 :
1485 : pub(crate) struct WalIngestMetrics {
1486 : pub(crate) bytes_received: IntCounter,
1487 : pub(crate) records_received: IntCounter,
1488 : pub(crate) records_committed: IntCounter,
1489 : pub(crate) records_filtered: IntCounter,
1490 : }
1491 :
1492 2 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
1493 2 : bytes_received: register_int_counter!(
1494 2 : "pageserver_wal_ingest_bytes_received",
1495 2 : "Bytes of WAL ingested from safekeepers",
1496 2 : )
1497 2 : .unwrap(),
1498 2 : records_received: register_int_counter!(
1499 2 : "pageserver_wal_ingest_records_received",
1500 2 : "Number of WAL records received from safekeepers"
1501 2 : )
1502 2 : .expect("failed to define a metric"),
1503 2 : records_committed: register_int_counter!(
1504 2 : "pageserver_wal_ingest_records_committed",
1505 2 : "Number of WAL records which resulted in writes to pageserver storage"
1506 2 : )
1507 2 : .expect("failed to define a metric"),
1508 2 : records_filtered: register_int_counter!(
1509 2 : "pageserver_wal_ingest_records_filtered",
1510 2 : "Number of WAL records filtered out due to sharding"
1511 2 : )
1512 2 : .expect("failed to define a metric"),
1513 2 : });
1514 : pub(crate) struct SecondaryModeMetrics {
1515 : pub(crate) upload_heatmap: IntCounter,
1516 : pub(crate) upload_heatmap_errors: IntCounter,
1517 : pub(crate) upload_heatmap_duration: Histogram,
1518 : pub(crate) download_heatmap: IntCounter,
1519 : pub(crate) download_layer: IntCounter,
1520 : }
1521 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
1522 0 : SecondaryModeMetrics {
1523 0 : upload_heatmap: register_int_counter!(
1524 0 : "pageserver_secondary_upload_heatmap",
1525 0 : "Number of heatmaps written to remote storage by attached tenants"
1526 0 : )
1527 0 : .expect("failed to define a metric"),
1528 0 : upload_heatmap_errors: register_int_counter!(
1529 0 : "pageserver_secondary_upload_heatmap_errors",
1530 0 : "Failures writing heatmap to remote storage"
1531 0 : )
1532 0 : .expect("failed to define a metric"),
1533 0 : upload_heatmap_duration: register_histogram!(
1534 0 : "pageserver_secondary_upload_heatmap_duration",
1535 0 : "Time to build and upload a heatmap, including any waiting inside the S3 client"
1536 0 : )
1537 0 : .expect("failed to define a metric"),
1538 0 : download_heatmap: register_int_counter!(
1539 0 : "pageserver_secondary_download_heatmap",
1540 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
1541 0 : )
1542 0 : .expect("failed to define a metric"),
1543 0 : download_layer: register_int_counter!(
1544 0 : "pageserver_secondary_download_layer",
1545 0 : "Number of downloads of layers by secondary mode locations"
1546 0 : )
1547 0 : .expect("failed to define a metric"),
1548 0 : }
1549 0 : });
1550 :
1551 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1552 : pub enum RemoteOpKind {
1553 : Upload,
1554 : Download,
1555 : Delete,
1556 : }
1557 : impl RemoteOpKind {
1558 8754 : pub fn as_str(&self) -> &'static str {
1559 8754 : match self {
1560 7920 : Self::Upload => "upload",
1561 52 : Self::Download => "download",
1562 782 : Self::Delete => "delete",
1563 : }
1564 8754 : }
1565 : }
1566 :
1567 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1568 : pub enum RemoteOpFileKind {
1569 : Layer,
1570 : Index,
1571 : }
1572 : impl RemoteOpFileKind {
1573 8754 : pub fn as_str(&self) -> &'static str {
1574 8754 : match self {
1575 5906 : Self::Layer => "layer",
1576 2848 : Self::Index => "index",
1577 : }
1578 8754 : }
1579 : }
1580 :
1581 94 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1582 94 : register_histogram_vec!(
1583 94 : "pageserver_remote_operation_seconds",
1584 94 : "Time spent on remote storage operations. \
1585 94 : Grouped by tenant, timeline, operation_kind and status. \
1586 94 : Does not account for time spent waiting in remote timeline client's queues.",
1587 94 : &["file_kind", "op_kind", "status"]
1588 94 : )
1589 94 : .expect("failed to define a metric")
1590 94 : });
1591 :
1592 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1593 0 : register_int_counter_vec!(
1594 0 : "pageserver_tenant_task_events",
1595 0 : "Number of task start/stop/fail events.",
1596 0 : &["event"],
1597 0 : )
1598 0 : .expect("Failed to register tenant_task_events metric")
1599 0 : });
1600 :
1601 14 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
1602 28 : register_int_counter_pair_vec!(
1603 28 : "pageserver_background_loop_semaphore_wait_start_count",
1604 28 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1605 28 : "pageserver_background_loop_semaphore_wait_finish_count",
1606 28 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1607 28 : &["task"],
1608 28 : )
1609 14 : .unwrap()
1610 14 : });
1611 :
1612 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
1613 0 : register_int_counter_vec!(
1614 0 : "pageserver_background_loop_period_overrun_count",
1615 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
1616 0 : &["task", "period"],
1617 0 : )
1618 0 : .expect("failed to define a metric")
1619 0 : });
1620 :
1621 : // walreceiver metrics
1622 :
1623 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
1624 0 : register_int_counter!(
1625 0 : "pageserver_walreceiver_started_connections_total",
1626 0 : "Number of started walreceiver connections"
1627 0 : )
1628 0 : .expect("failed to define a metric")
1629 0 : });
1630 :
1631 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
1632 0 : register_int_gauge!(
1633 0 : "pageserver_walreceiver_active_managers",
1634 0 : "Number of active walreceiver managers"
1635 0 : )
1636 0 : .expect("failed to define a metric")
1637 0 : });
1638 :
1639 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
1640 0 : register_int_counter_vec!(
1641 0 : "pageserver_walreceiver_switches_total",
1642 0 : "Number of walreceiver manager change_connection calls",
1643 0 : &["reason"]
1644 0 : )
1645 0 : .expect("failed to define a metric")
1646 0 : });
1647 :
1648 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
1649 0 : register_int_counter!(
1650 0 : "pageserver_walreceiver_broker_updates_total",
1651 0 : "Number of received broker updates in walreceiver"
1652 0 : )
1653 0 : .expect("failed to define a metric")
1654 0 : });
1655 :
1656 2 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1657 2 : register_int_counter_vec!(
1658 2 : "pageserver_walreceiver_candidates_events_total",
1659 2 : "Number of walreceiver candidate events",
1660 2 : &["event"]
1661 2 : )
1662 2 : .expect("failed to define a metric")
1663 2 : });
1664 :
1665 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
1666 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
1667 :
1668 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
1669 2 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
1670 :
1671 : // Metrics collected on WAL redo operations
1672 : //
1673 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
1674 : // for access to the postgres process ('wait') since there is only one for
1675 : // each tenant.
1676 :
1677 : /// Time buckets are small because we want to be able to measure the
1678 : /// smallest redo processing times. These buckets allow us to measure down
1679 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
1680 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
1681 : ///
1682 : /// Values up to 1s are recorded because metrics show that we have redo
1683 : /// durations and lock times larger than 0.250s.
1684 : macro_rules! redo_histogram_time_buckets {
1685 : () => {
1686 : vec![
1687 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
1688 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1689 : 1.000_000,
1690 : ]
1691 : };
1692 : }
1693 :
1694 : /// While we're at it, also measure the amount of records replayed in each
1695 : /// operation. We have a global 'total replayed' counter, but that's not
1696 : /// as useful as 'what is the skew for how many records we replay in one
1697 : /// operation'.
1698 : macro_rules! redo_histogram_count_buckets {
1699 : () => {
1700 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
1701 : };
1702 : }
1703 :
1704 : macro_rules! redo_bytes_histogram_count_buckets {
1705 : () => {
1706 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
1707 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
1708 : vec![
1709 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
1710 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
1711 : ]
1712 : };
1713 : }
1714 :
1715 6 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
1716 6 : register_histogram!(
1717 6 : "pageserver_wal_redo_seconds",
1718 6 : "Time spent on WAL redo",
1719 6 : redo_histogram_time_buckets!()
1720 6 : )
1721 6 : .expect("failed to define a metric")
1722 6 : });
1723 :
1724 6 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1725 6 : register_histogram!(
1726 6 : "pageserver_wal_redo_records_histogram",
1727 6 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
1728 6 : redo_histogram_count_buckets!(),
1729 6 : )
1730 6 : .expect("failed to define a metric")
1731 6 : });
1732 :
1733 6 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1734 6 : register_histogram!(
1735 6 : "pageserver_wal_redo_bytes_histogram",
1736 6 : "Histogram of number of records replayed per redo sent to Postgres",
1737 6 : redo_bytes_histogram_count_buckets!(),
1738 6 : )
1739 6 : .expect("failed to define a metric")
1740 6 : });
1741 :
1742 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
1743 6 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
1744 6 : register_int_counter!(
1745 6 : "pageserver_replayed_wal_records_total",
1746 6 : "Number of WAL records replayed in WAL redo process"
1747 6 : )
1748 6 : .unwrap()
1749 6 : });
1750 :
1751 : #[rustfmt::skip]
1752 6 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1753 6 : register_histogram!(
1754 6 : "pageserver_wal_redo_process_launch_duration",
1755 6 : "Histogram of the duration of successful WalRedoProcess::launch calls",
1756 6 : vec![
1757 6 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
1758 6 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
1759 6 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
1760 6 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
1761 6 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
1762 6 : ],
1763 6 : )
1764 6 : .expect("failed to define a metric")
1765 6 : });
1766 :
1767 : pub(crate) struct WalRedoProcessCounters {
1768 : pub(crate) started: IntCounter,
1769 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
1770 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
1771 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
1772 : }
1773 :
1774 18 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
1775 : pub(crate) enum WalRedoKillCause {
1776 : WalRedoProcessDrop,
1777 : NoLeakChildDrop,
1778 : Startup,
1779 : }
1780 :
1781 : impl Default for WalRedoProcessCounters {
1782 6 : fn default() -> Self {
1783 6 : let started = register_int_counter!(
1784 6 : "pageserver_wal_redo_process_started_total",
1785 6 : "Number of WAL redo processes started",
1786 6 : )
1787 6 : .unwrap();
1788 6 :
1789 6 : let killed = register_int_counter_vec!(
1790 6 : "pageserver_wal_redo_process_stopped_total",
1791 6 : "Number of WAL redo processes stopped",
1792 6 : &["cause"],
1793 6 : )
1794 6 : .unwrap();
1795 6 :
1796 6 : let active_stderr_logger_tasks_started = register_int_counter!(
1797 6 : "pageserver_walredo_stderr_logger_tasks_started_total",
1798 6 : "Number of active walredo stderr logger tasks that have started",
1799 6 : )
1800 6 : .unwrap();
1801 6 :
1802 6 : let active_stderr_logger_tasks_finished = register_int_counter!(
1803 6 : "pageserver_walredo_stderr_logger_tasks_finished_total",
1804 6 : "Number of active walredo stderr logger tasks that have finished",
1805 6 : )
1806 6 : .unwrap();
1807 6 :
1808 6 : Self {
1809 6 : started,
1810 18 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
1811 18 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
1812 18 : let cause_str: &'static str = cause.into();
1813 18 : killed.with_label_values(&[cause_str])
1814 18 : })),
1815 6 : active_stderr_logger_tasks_started,
1816 6 : active_stderr_logger_tasks_finished,
1817 6 : }
1818 6 : }
1819 : }
1820 :
1821 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
1822 : Lazy::new(WalRedoProcessCounters::default);
1823 :
1824 : #[cfg(not(test))]
1825 : pub mod wal_redo {
1826 : use super::*;
1827 :
1828 0 : static PROCESS_KIND: Lazy<std::sync::Mutex<UIntGaugeVec>> = Lazy::new(|| {
1829 0 : std::sync::Mutex::new(
1830 0 : register_uint_gauge_vec!(
1831 0 : "pageserver_wal_redo_process_kind",
1832 0 : "The configured process kind for walredo",
1833 0 : &["kind"],
1834 0 : )
1835 0 : .unwrap(),
1836 0 : )
1837 0 : });
1838 :
1839 0 : pub fn set_process_kind_metric(kind: crate::walredo::ProcessKind) {
1840 0 : // use guard to avoid races around the next two steps
1841 0 : let guard = PROCESS_KIND.lock().unwrap();
1842 0 : guard.reset();
1843 0 : guard.with_label_values(&[&format!("{kind}")]).set(1);
1844 0 : }
1845 : }
1846 :
1847 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
1848 : pub(crate) struct StorageTimeMetricsTimer {
1849 : metrics: StorageTimeMetrics,
1850 : start: Instant,
1851 : }
1852 :
1853 : impl StorageTimeMetricsTimer {
1854 3016 : fn new(metrics: StorageTimeMetrics) -> Self {
1855 3016 : Self {
1856 3016 : metrics,
1857 3016 : start: Instant::now(),
1858 3016 : }
1859 3016 : }
1860 :
1861 : /// Record the time from creation to now.
1862 2318 : pub fn stop_and_record(self) {
1863 2318 : let duration = self.start.elapsed().as_secs_f64();
1864 2318 : self.metrics.timeline_sum.inc_by(duration);
1865 2318 : self.metrics.timeline_count.inc();
1866 2318 : self.metrics.global_histogram.observe(duration);
1867 2318 : }
1868 : }
1869 :
1870 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
1871 : /// timeline total sum and count.
1872 : #[derive(Clone, Debug)]
1873 : pub(crate) struct StorageTimeMetrics {
1874 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
1875 : timeline_sum: Counter,
1876 : /// Number of oeprations, per operation, tenant_id and timeline_id
1877 : timeline_count: IntCounter,
1878 : /// Global histogram having only the "operation" label.
1879 : global_histogram: Histogram,
1880 : }
1881 :
1882 : impl StorageTimeMetrics {
1883 2240 : pub fn new(
1884 2240 : operation: StorageTimeOperation,
1885 2240 : tenant_id: &str,
1886 2240 : shard_id: &str,
1887 2240 : timeline_id: &str,
1888 2240 : ) -> Self {
1889 2240 : let operation: &'static str = operation.into();
1890 2240 :
1891 2240 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
1892 2240 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1893 2240 : .unwrap();
1894 2240 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
1895 2240 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
1896 2240 : .unwrap();
1897 2240 : let global_histogram = STORAGE_TIME_GLOBAL
1898 2240 : .get_metric_with_label_values(&[operation])
1899 2240 : .unwrap();
1900 2240 :
1901 2240 : StorageTimeMetrics {
1902 2240 : timeline_sum,
1903 2240 : timeline_count,
1904 2240 : global_histogram,
1905 2240 : }
1906 2240 : }
1907 :
1908 : /// Starts timing a new operation.
1909 : ///
1910 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
1911 3016 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
1912 3016 : StorageTimeMetricsTimer::new(self.clone())
1913 3016 : }
1914 : }
1915 :
1916 : #[derive(Debug)]
1917 : pub(crate) struct TimelineMetrics {
1918 : tenant_id: String,
1919 : shard_id: String,
1920 : timeline_id: String,
1921 : pub flush_time_histo: StorageTimeMetrics,
1922 : pub compact_time_histo: StorageTimeMetrics,
1923 : pub create_images_time_histo: StorageTimeMetrics,
1924 : pub logical_size_histo: StorageTimeMetrics,
1925 : pub imitate_logical_size_histo: StorageTimeMetrics,
1926 : pub load_layer_map_histo: StorageTimeMetrics,
1927 : pub garbage_collect_histo: StorageTimeMetrics,
1928 : pub last_record_gauge: IntGauge,
1929 : resident_physical_size_gauge: UIntGauge,
1930 : /// copy of LayeredTimeline.current_logical_size
1931 : pub current_logical_size_gauge: UIntGauge,
1932 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
1933 : pub evictions: IntCounter,
1934 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
1935 : }
1936 :
1937 : impl TimelineMetrics {
1938 320 : pub fn new(
1939 320 : tenant_shard_id: &TenantShardId,
1940 320 : timeline_id_raw: &TimelineId,
1941 320 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
1942 320 : ) -> Self {
1943 320 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1944 320 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
1945 320 : let timeline_id = timeline_id_raw.to_string();
1946 320 : let flush_time_histo = StorageTimeMetrics::new(
1947 320 : StorageTimeOperation::LayerFlush,
1948 320 : &tenant_id,
1949 320 : &shard_id,
1950 320 : &timeline_id,
1951 320 : );
1952 320 : let compact_time_histo = StorageTimeMetrics::new(
1953 320 : StorageTimeOperation::Compact,
1954 320 : &tenant_id,
1955 320 : &shard_id,
1956 320 : &timeline_id,
1957 320 : );
1958 320 : let create_images_time_histo = StorageTimeMetrics::new(
1959 320 : StorageTimeOperation::CreateImages,
1960 320 : &tenant_id,
1961 320 : &shard_id,
1962 320 : &timeline_id,
1963 320 : );
1964 320 : let logical_size_histo = StorageTimeMetrics::new(
1965 320 : StorageTimeOperation::LogicalSize,
1966 320 : &tenant_id,
1967 320 : &shard_id,
1968 320 : &timeline_id,
1969 320 : );
1970 320 : let imitate_logical_size_histo = StorageTimeMetrics::new(
1971 320 : StorageTimeOperation::ImitateLogicalSize,
1972 320 : &tenant_id,
1973 320 : &shard_id,
1974 320 : &timeline_id,
1975 320 : );
1976 320 : let load_layer_map_histo = StorageTimeMetrics::new(
1977 320 : StorageTimeOperation::LoadLayerMap,
1978 320 : &tenant_id,
1979 320 : &shard_id,
1980 320 : &timeline_id,
1981 320 : );
1982 320 : let garbage_collect_histo = StorageTimeMetrics::new(
1983 320 : StorageTimeOperation::Gc,
1984 320 : &tenant_id,
1985 320 : &shard_id,
1986 320 : &timeline_id,
1987 320 : );
1988 320 : let last_record_gauge = LAST_RECORD_LSN
1989 320 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1990 320 : .unwrap();
1991 320 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
1992 320 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1993 320 : .unwrap();
1994 320 : // TODO: we shouldn't expose this metric
1995 320 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
1996 320 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
1997 320 : .unwrap();
1998 320 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
1999 320 : let directory_entries_count_gauge_closure = {
2000 320 : let tenant_shard_id = *tenant_shard_id;
2001 320 : let timeline_id_raw = *timeline_id_raw;
2002 0 : move || {
2003 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2004 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2005 0 : let timeline_id = timeline_id_raw.to_string();
2006 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2007 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2008 0 : .unwrap();
2009 0 : gauge
2010 0 : }
2011 : };
2012 320 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2013 320 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2014 320 : let evictions = EVICTIONS
2015 320 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2016 320 : .unwrap();
2017 320 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2018 320 : .build(&tenant_id, &shard_id, &timeline_id);
2019 320 :
2020 320 : TimelineMetrics {
2021 320 : tenant_id,
2022 320 : shard_id,
2023 320 : timeline_id,
2024 320 : flush_time_histo,
2025 320 : compact_time_histo,
2026 320 : create_images_time_histo,
2027 320 : logical_size_histo,
2028 320 : imitate_logical_size_histo,
2029 320 : garbage_collect_histo,
2030 320 : load_layer_map_histo,
2031 320 : last_record_gauge,
2032 320 : resident_physical_size_gauge,
2033 320 : current_logical_size_gauge,
2034 320 : directory_entries_count_gauge,
2035 320 : evictions,
2036 320 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2037 320 : evictions_with_low_residence_duration,
2038 320 : ),
2039 320 : }
2040 320 : }
2041 :
2042 948 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2043 948 : self.resident_physical_size_add(sz);
2044 948 : }
2045 :
2046 466 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2047 466 : self.resident_physical_size_gauge.sub(sz);
2048 466 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2049 466 : }
2050 :
2051 978 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2052 978 : self.resident_physical_size_gauge.add(sz);
2053 978 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2054 978 : }
2055 :
2056 8 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2057 8 : self.resident_physical_size_gauge.get()
2058 8 : }
2059 :
2060 8 : pub(crate) fn shutdown(&self) {
2061 8 : let tenant_id = &self.tenant_id;
2062 8 : let timeline_id = &self.timeline_id;
2063 8 : let shard_id = &self.shard_id;
2064 8 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2065 8 : {
2066 8 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2067 8 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2068 8 : }
2069 8 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2070 8 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2071 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2072 8 : }
2073 8 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2074 8 :
2075 8 : self.evictions_with_low_residence_duration
2076 8 : .write()
2077 8 : .unwrap()
2078 8 : .remove(tenant_id, shard_id, timeline_id);
2079 :
2080 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2081 : // removed at the end of it. The idea is to have the metrics outlive the
2082 : // entity during which they're observed, e.g., the smgr metrics shall
2083 : // outlive an individual smgr connection, but not the timeline.
2084 :
2085 72 : for op in StorageTimeOperation::VARIANTS {
2086 64 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2087 64 : op,
2088 64 : tenant_id,
2089 64 : shard_id,
2090 64 : timeline_id,
2091 64 : ]);
2092 64 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2093 64 : op,
2094 64 : tenant_id,
2095 64 : shard_id,
2096 64 : timeline_id,
2097 64 : ]);
2098 64 : }
2099 :
2100 24 : for op in STORAGE_IO_SIZE_OPERATIONS {
2101 16 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2102 16 : }
2103 :
2104 48 : for op in SmgrQueryType::iter() {
2105 40 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2106 40 : op.into(),
2107 40 : tenant_id,
2108 40 : shard_id,
2109 40 : timeline_id,
2110 40 : ]);
2111 40 : }
2112 8 : }
2113 : }
2114 :
2115 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2116 6 : // Only shard zero deals in synthetic sizes
2117 6 : if tenant_shard_id.is_shard_zero() {
2118 6 : let tid = tenant_shard_id.tenant_id.to_string();
2119 6 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2120 6 : }
2121 :
2122 : // we leave the BROKEN_TENANTS_SET entry if any
2123 6 : }
2124 :
2125 : use futures::Future;
2126 : use pin_project_lite::pin_project;
2127 : use std::collections::HashMap;
2128 : use std::num::NonZeroUsize;
2129 : use std::pin::Pin;
2130 : use std::sync::{Arc, Mutex};
2131 : use std::task::{Context, Poll};
2132 : use std::time::{Duration, Instant};
2133 :
2134 : use crate::context::{PageContentKind, RequestContext};
2135 : use crate::task_mgr::TaskKind;
2136 :
2137 : /// Maintain a per timeline gauge in addition to the global gauge.
2138 : struct PerTimelineRemotePhysicalSizeGauge {
2139 : last_set: u64,
2140 : gauge: UIntGauge,
2141 : }
2142 :
2143 : impl PerTimelineRemotePhysicalSizeGauge {
2144 320 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2145 320 : Self {
2146 320 : last_set: per_timeline_gauge.get(),
2147 320 : gauge: per_timeline_gauge,
2148 320 : }
2149 320 : }
2150 1247 : fn set(&mut self, sz: u64) {
2151 1247 : self.gauge.set(sz);
2152 1247 : if sz < self.last_set {
2153 34 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set - sz);
2154 1213 : } else {
2155 1213 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - self.last_set);
2156 1213 : };
2157 1247 : self.last_set = sz;
2158 1247 : }
2159 2 : fn get(&self) -> u64 {
2160 2 : self.gauge.get()
2161 2 : }
2162 : }
2163 :
2164 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2165 8 : fn drop(&mut self) {
2166 8 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set);
2167 8 : }
2168 : }
2169 :
2170 : pub(crate) struct RemoteTimelineClientMetrics {
2171 : tenant_id: String,
2172 : shard_id: String,
2173 : timeline_id: String,
2174 : remote_physical_size_gauge: Mutex<Option<PerTimelineRemotePhysicalSizeGauge>>,
2175 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
2176 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2177 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2178 : }
2179 :
2180 : impl RemoteTimelineClientMetrics {
2181 330 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2182 330 : RemoteTimelineClientMetrics {
2183 330 : tenant_id: tenant_shard_id.tenant_id.to_string(),
2184 330 : shard_id: format!("{}", tenant_shard_id.shard_slug()),
2185 330 : timeline_id: timeline_id.to_string(),
2186 330 : calls: Mutex::new(HashMap::default()),
2187 330 : bytes_started_counter: Mutex::new(HashMap::default()),
2188 330 : bytes_finished_counter: Mutex::new(HashMap::default()),
2189 330 : remote_physical_size_gauge: Mutex::new(None),
2190 330 : }
2191 330 : }
2192 :
2193 1247 : pub(crate) fn remote_physical_size_set(&self, sz: u64) {
2194 1247 : let mut guard = self.remote_physical_size_gauge.lock().unwrap();
2195 1247 : let gauge = guard.get_or_insert_with(|| {
2196 320 : PerTimelineRemotePhysicalSizeGauge::new(
2197 320 : REMOTE_PHYSICAL_SIZE
2198 320 : .get_metric_with_label_values(&[
2199 320 : &self.tenant_id,
2200 320 : &self.shard_id,
2201 320 : &self.timeline_id,
2202 320 : ])
2203 320 : .unwrap(),
2204 320 : )
2205 1247 : });
2206 1247 : gauge.set(sz);
2207 1247 : }
2208 :
2209 2 : pub(crate) fn remote_physical_size_get(&self) -> u64 {
2210 2 : let guard = self.remote_physical_size_gauge.lock().unwrap();
2211 2 : guard.as_ref().map(|gauge| gauge.get()).unwrap_or(0)
2212 2 : }
2213 :
2214 1697 : pub fn remote_operation_time(
2215 1697 : &self,
2216 1697 : file_kind: &RemoteOpFileKind,
2217 1697 : op_kind: &RemoteOpKind,
2218 1697 : status: &'static str,
2219 1697 : ) -> Histogram {
2220 1697 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2221 1697 : REMOTE_OPERATION_TIME
2222 1697 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2223 1697 : .unwrap()
2224 1697 : }
2225 :
2226 4389 : fn calls_counter_pair(
2227 4389 : &self,
2228 4389 : file_kind: &RemoteOpFileKind,
2229 4389 : op_kind: &RemoteOpKind,
2230 4389 : ) -> IntCounterPair {
2231 4389 : let mut guard = self.calls.lock().unwrap();
2232 4389 : let key = (file_kind.as_str(), op_kind.as_str());
2233 4389 : let metric = guard.entry(key).or_insert_with(move || {
2234 554 : REMOTE_TIMELINE_CLIENT_CALLS
2235 554 : .get_metric_with_label_values(&[
2236 554 : &self.tenant_id,
2237 554 : &self.shard_id,
2238 554 : &self.timeline_id,
2239 554 : key.0,
2240 554 : key.1,
2241 554 : ])
2242 554 : .unwrap()
2243 4389 : });
2244 4389 : metric.clone()
2245 4389 : }
2246 :
2247 956 : fn bytes_started_counter(
2248 956 : &self,
2249 956 : file_kind: &RemoteOpFileKind,
2250 956 : op_kind: &RemoteOpKind,
2251 956 : ) -> IntCounter {
2252 956 : let mut guard = self.bytes_started_counter.lock().unwrap();
2253 956 : let key = (file_kind.as_str(), op_kind.as_str());
2254 956 : let metric = guard.entry(key).or_insert_with(move || {
2255 204 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2256 204 : .get_metric_with_label_values(&[
2257 204 : &self.tenant_id,
2258 204 : &self.shard_id,
2259 204 : &self.timeline_id,
2260 204 : key.0,
2261 204 : key.1,
2262 204 : ])
2263 204 : .unwrap()
2264 956 : });
2265 956 : metric.clone()
2266 956 : }
2267 :
2268 1700 : fn bytes_finished_counter(
2269 1700 : &self,
2270 1700 : file_kind: &RemoteOpFileKind,
2271 1700 : op_kind: &RemoteOpKind,
2272 1700 : ) -> IntCounter {
2273 1700 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2274 1700 : let key = (file_kind.as_str(), op_kind.as_str());
2275 1700 : let metric = guard.entry(key).or_insert_with(move || {
2276 204 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2277 204 : .get_metric_with_label_values(&[
2278 204 : &self.tenant_id,
2279 204 : &self.shard_id,
2280 204 : &self.timeline_id,
2281 204 : key.0,
2282 204 : key.1,
2283 204 : ])
2284 204 : .unwrap()
2285 1700 : });
2286 1700 : metric.clone()
2287 1700 : }
2288 : }
2289 :
2290 : #[cfg(test)]
2291 : impl RemoteTimelineClientMetrics {
2292 6 : pub fn get_bytes_started_counter_value(
2293 6 : &self,
2294 6 : file_kind: &RemoteOpFileKind,
2295 6 : op_kind: &RemoteOpKind,
2296 6 : ) -> Option<u64> {
2297 6 : let guard = self.bytes_started_counter.lock().unwrap();
2298 6 : let key = (file_kind.as_str(), op_kind.as_str());
2299 6 : guard.get(&key).map(|counter| counter.get())
2300 6 : }
2301 :
2302 6 : pub fn get_bytes_finished_counter_value(
2303 6 : &self,
2304 6 : file_kind: &RemoteOpFileKind,
2305 6 : op_kind: &RemoteOpKind,
2306 6 : ) -> Option<u64> {
2307 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2308 6 : let key = (file_kind.as_str(), op_kind.as_str());
2309 6 : guard.get(&key).map(|counter| counter.get())
2310 6 : }
2311 : }
2312 :
2313 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2314 : #[must_use]
2315 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2316 : /// Decremented on drop.
2317 : calls_counter_pair: Option<IntCounterPair>,
2318 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2319 : bytes_finished: Option<(IntCounter, u64)>,
2320 : }
2321 :
2322 : impl RemoteTimelineClientCallMetricGuard {
2323 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2324 : /// The caller vouches to do the metric updates manually.
2325 2362 : pub fn will_decrement_manually(mut self) {
2326 2362 : let RemoteTimelineClientCallMetricGuard {
2327 2362 : calls_counter_pair,
2328 2362 : bytes_finished,
2329 2362 : } = &mut self;
2330 2362 : calls_counter_pair.take();
2331 2362 : bytes_finished.take();
2332 2362 : }
2333 : }
2334 :
2335 : impl Drop for RemoteTimelineClientCallMetricGuard {
2336 2388 : fn drop(&mut self) {
2337 2388 : let RemoteTimelineClientCallMetricGuard {
2338 2388 : calls_counter_pair,
2339 2388 : bytes_finished,
2340 2388 : } = self;
2341 2388 : if let Some(guard) = calls_counter_pair.take() {
2342 26 : guard.dec();
2343 2362 : }
2344 2388 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2345 0 : bytes_finished_metric.inc_by(*value);
2346 2388 : }
2347 2388 : }
2348 : }
2349 :
2350 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2351 : /// track the byte size of this call in applicable metric(s).
2352 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2353 : /// Do not account for this call's byte size in any metrics.
2354 : /// The `reason` field is there to make the call sites self-documenting
2355 : /// about why they don't need the metric.
2356 : DontTrackSize { reason: &'static str },
2357 : /// Track the byte size of the call in applicable metric(s).
2358 : Bytes(u64),
2359 : }
2360 :
2361 : impl RemoteTimelineClientMetrics {
2362 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2363 : ///
2364 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2365 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2366 : /// is more suitable.
2367 : /// Never do both.
2368 2388 : pub(crate) fn call_begin(
2369 2388 : &self,
2370 2388 : file_kind: &RemoteOpFileKind,
2371 2388 : op_kind: &RemoteOpKind,
2372 2388 : size: RemoteTimelineClientMetricsCallTrackSize,
2373 2388 : ) -> RemoteTimelineClientCallMetricGuard {
2374 2388 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2375 2388 : calls_counter_pair.inc();
2376 :
2377 2388 : let bytes_finished = match size {
2378 1432 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2379 1432 : // nothing to do
2380 1432 : None
2381 : }
2382 956 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2383 956 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2384 956 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2385 956 : Some((finished_counter, size))
2386 : }
2387 : };
2388 2388 : RemoteTimelineClientCallMetricGuard {
2389 2388 : calls_counter_pair: Some(calls_counter_pair),
2390 2388 : bytes_finished,
2391 2388 : }
2392 2388 : }
2393 :
2394 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2395 : /// Using the guard object is generally preferable.
2396 : /// See [`call_begin`](Self::call_begin) for more context.
2397 2001 : pub(crate) fn call_end(
2398 2001 : &self,
2399 2001 : file_kind: &RemoteOpFileKind,
2400 2001 : op_kind: &RemoteOpKind,
2401 2001 : size: RemoteTimelineClientMetricsCallTrackSize,
2402 2001 : ) {
2403 2001 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2404 2001 : calls_counter_pair.dec();
2405 2001 : match size {
2406 1257 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2407 744 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2408 744 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2409 744 : }
2410 : }
2411 2001 : }
2412 : }
2413 :
2414 : impl Drop for RemoteTimelineClientMetrics {
2415 18 : fn drop(&mut self) {
2416 18 : let RemoteTimelineClientMetrics {
2417 18 : tenant_id,
2418 18 : shard_id,
2419 18 : timeline_id,
2420 18 : remote_physical_size_gauge,
2421 18 : calls,
2422 18 : bytes_started_counter,
2423 18 : bytes_finished_counter,
2424 18 : } = self;
2425 22 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
2426 22 : let mut res = [Ok(()), Ok(())];
2427 22 : REMOTE_TIMELINE_CLIENT_CALLS
2428 22 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
2429 22 : // don't care about results
2430 22 : }
2431 18 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2432 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2433 6 : tenant_id,
2434 6 : shard_id,
2435 6 : timeline_id,
2436 6 : a,
2437 6 : b,
2438 6 : ]);
2439 6 : }
2440 18 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2441 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2442 6 : tenant_id,
2443 6 : shard_id,
2444 6 : timeline_id,
2445 6 : a,
2446 6 : b,
2447 6 : ]);
2448 6 : }
2449 18 : {
2450 18 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2451 18 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2452 18 : }
2453 18 : }
2454 : }
2455 :
2456 : /// Wrapper future that measures the time spent by a remote storage operation,
2457 : /// and records the time and success/failure as a prometheus metric.
2458 : pub(crate) trait MeasureRemoteOp: Sized {
2459 1810 : fn measure_remote_op(
2460 1810 : self,
2461 1810 : file_kind: RemoteOpFileKind,
2462 1810 : op: RemoteOpKind,
2463 1810 : metrics: Arc<RemoteTimelineClientMetrics>,
2464 1810 : ) -> MeasuredRemoteOp<Self> {
2465 1810 : let start = Instant::now();
2466 1810 : MeasuredRemoteOp {
2467 1810 : inner: self,
2468 1810 : file_kind,
2469 1810 : op,
2470 1810 : start,
2471 1810 : metrics,
2472 1810 : }
2473 1810 : }
2474 : }
2475 :
2476 : impl<T: Sized> MeasureRemoteOp for T {}
2477 :
2478 : pin_project! {
2479 : pub(crate) struct MeasuredRemoteOp<F>
2480 : {
2481 : #[pin]
2482 : inner: F,
2483 : file_kind: RemoteOpFileKind,
2484 : op: RemoteOpKind,
2485 : start: Instant,
2486 : metrics: Arc<RemoteTimelineClientMetrics>,
2487 : }
2488 : }
2489 :
2490 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
2491 : type Output = Result<O, E>;
2492 :
2493 31325 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
2494 31325 : let this = self.project();
2495 31325 : let poll_result = this.inner.poll(cx);
2496 31325 : if let Poll::Ready(ref res) = poll_result {
2497 1697 : let duration = this.start.elapsed();
2498 1697 : let status = if res.is_ok() { &"success" } else { &"failure" };
2499 1697 : this.metrics
2500 1697 : .remote_operation_time(this.file_kind, this.op, status)
2501 1697 : .observe(duration.as_secs_f64());
2502 29628 : }
2503 31325 : poll_result
2504 31325 : }
2505 : }
2506 :
2507 : pub mod tokio_epoll_uring {
2508 : use metrics::{register_int_counter, UIntGauge};
2509 : use once_cell::sync::Lazy;
2510 :
2511 : pub struct Collector {
2512 : descs: Vec<metrics::core::Desc>,
2513 : systems_created: UIntGauge,
2514 : systems_destroyed: UIntGauge,
2515 : }
2516 :
2517 : impl metrics::core::Collector for Collector {
2518 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
2519 0 : self.descs.iter().collect()
2520 0 : }
2521 :
2522 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
2523 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
2524 0 : let tokio_epoll_uring::metrics::Metrics {
2525 0 : systems_created,
2526 0 : systems_destroyed,
2527 0 : } = tokio_epoll_uring::metrics::global();
2528 0 : self.systems_created.set(systems_created);
2529 0 : mfs.extend(self.systems_created.collect());
2530 0 : self.systems_destroyed.set(systems_destroyed);
2531 0 : mfs.extend(self.systems_destroyed.collect());
2532 0 : mfs
2533 0 : }
2534 : }
2535 :
2536 : impl Collector {
2537 : const NMETRICS: usize = 2;
2538 :
2539 : #[allow(clippy::new_without_default)]
2540 0 : pub fn new() -> Self {
2541 0 : let mut descs = Vec::new();
2542 0 :
2543 0 : let systems_created = UIntGauge::new(
2544 0 : "pageserver_tokio_epoll_uring_systems_created",
2545 0 : "counter of tokio-epoll-uring systems that were created",
2546 0 : )
2547 0 : .unwrap();
2548 0 : descs.extend(
2549 0 : metrics::core::Collector::desc(&systems_created)
2550 0 : .into_iter()
2551 0 : .cloned(),
2552 0 : );
2553 0 :
2554 0 : let systems_destroyed = UIntGauge::new(
2555 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
2556 0 : "counter of tokio-epoll-uring systems that were destroyed",
2557 0 : )
2558 0 : .unwrap();
2559 0 : descs.extend(
2560 0 : metrics::core::Collector::desc(&systems_destroyed)
2561 0 : .into_iter()
2562 0 : .cloned(),
2563 0 : );
2564 0 :
2565 0 : Self {
2566 0 : descs,
2567 0 : systems_created,
2568 0 : systems_destroyed,
2569 0 : }
2570 0 : }
2571 : }
2572 :
2573 61 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
2574 61 : register_int_counter!(
2575 61 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
2576 61 : "Number of times where thread_local_system creation spanned multiple executor threads",
2577 61 : )
2578 61 : .unwrap()
2579 61 : });
2580 :
2581 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
2582 0 : register_int_counter!(
2583 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
2584 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
2585 0 : )
2586 0 : .unwrap()
2587 0 : });
2588 : }
2589 :
2590 : pub(crate) mod tenant_throttling {
2591 : use metrics::{register_int_counter_vec, IntCounter};
2592 : use once_cell::sync::Lazy;
2593 :
2594 : use crate::tenant::{self, throttle::Metric};
2595 :
2596 : pub(crate) struct TimelineGet {
2597 : wait_time: IntCounter,
2598 : count: IntCounter,
2599 : }
2600 :
2601 102 : pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
2602 102 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2603 102 : register_int_counter_vec!(
2604 102 : "pageserver_tenant_throttling_wait_usecs_sum_global",
2605 102 : "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
2606 102 : &["kind"]
2607 102 : )
2608 102 : .unwrap()
2609 102 : });
2610 102 :
2611 102 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2612 102 : register_int_counter_vec!(
2613 102 : "pageserver_tenant_throttling_count_global",
2614 102 : "Count of tenant throttlings, by kind of throttle.",
2615 102 : &["kind"]
2616 102 : )
2617 102 : .unwrap()
2618 102 : });
2619 102 :
2620 102 : let kind = "timeline_get";
2621 102 : TimelineGet {
2622 102 : wait_time: WAIT_USECS.with_label_values(&[kind]),
2623 102 : count: WAIT_COUNT.with_label_values(&[kind]),
2624 102 : }
2625 102 : });
2626 :
2627 : impl Metric for &'static TimelineGet {
2628 : #[inline(always)]
2629 0 : fn observe_throttling(
2630 0 : &self,
2631 0 : tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
2632 0 : ) {
2633 0 : let val = u64::try_from(wait_time.as_micros()).unwrap();
2634 0 : self.wait_time.inc_by(val);
2635 0 : self.count.inc();
2636 0 : }
2637 : }
2638 : }
2639 :
2640 : pub(crate) mod disk_usage_based_eviction {
2641 : use super::*;
2642 :
2643 : pub(crate) struct Metrics {
2644 : pub(crate) tenant_collection_time: Histogram,
2645 : pub(crate) tenant_layer_count: Histogram,
2646 : pub(crate) layers_collected: IntCounter,
2647 : pub(crate) layers_selected: IntCounter,
2648 : pub(crate) layers_evicted: IntCounter,
2649 : }
2650 :
2651 : impl Default for Metrics {
2652 0 : fn default() -> Self {
2653 0 : let tenant_collection_time = register_histogram!(
2654 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
2655 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
2656 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
2657 0 : )
2658 0 : .unwrap();
2659 0 :
2660 0 : let tenant_layer_count = register_histogram!(
2661 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
2662 0 : "Amount of layers gathered from a tenant",
2663 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
2664 0 : )
2665 0 : .unwrap();
2666 0 :
2667 0 : let layers_collected = register_int_counter!(
2668 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
2669 0 : "Amount of layers collected"
2670 0 : )
2671 0 : .unwrap();
2672 0 :
2673 0 : let layers_selected = register_int_counter!(
2674 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
2675 0 : "Amount of layers selected"
2676 0 : )
2677 0 : .unwrap();
2678 0 :
2679 0 : let layers_evicted = register_int_counter!(
2680 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
2681 0 : "Amount of layers successfully evicted"
2682 0 : )
2683 0 : .unwrap();
2684 0 :
2685 0 : Self {
2686 0 : tenant_collection_time,
2687 0 : tenant_layer_count,
2688 0 : layers_collected,
2689 0 : layers_selected,
2690 0 : layers_evicted,
2691 0 : }
2692 0 : }
2693 : }
2694 :
2695 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
2696 : }
2697 :
2698 96 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
2699 96 : register_uint_gauge_vec!(
2700 96 : "pageserver_tokio_executor_thread_configured_count",
2701 96 : "Total number of configued tokio executor threads in the process.
2702 96 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
2703 96 : &["setup"],
2704 96 : )
2705 96 : .unwrap()
2706 96 : });
2707 :
2708 96 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
2709 96 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
2710 96 : let _guard = SERIALIZE.lock().unwrap();
2711 96 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
2712 96 : TOKIO_EXECUTOR_THREAD_COUNT
2713 96 : .get_metric_with_label_values(&[setup])
2714 96 : .unwrap()
2715 96 : .set(u64::try_from(num_threads.get()).unwrap());
2716 96 : }
2717 :
2718 0 : pub fn preinitialize_metrics() {
2719 0 : // Python tests need these and on some we do alerting.
2720 0 : //
2721 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
2722 0 : // order:
2723 0 : // - global metrics reside in a Lazy<PageserverMetrics>
2724 0 : // - access via crate::metrics::PS_METRICS.materialized_page_cache_hit.inc()
2725 0 : // - could move the statics into TimelineMetrics::new()?
2726 0 :
2727 0 : // counters
2728 0 : [
2729 0 : &MATERIALIZED_PAGE_CACHE_HIT,
2730 0 : &MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
2731 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
2732 0 : &WALRECEIVER_STARTED_CONNECTIONS,
2733 0 : &WALRECEIVER_BROKER_UPDATES,
2734 0 : &WALRECEIVER_CANDIDATES_ADDED,
2735 0 : &WALRECEIVER_CANDIDATES_REMOVED,
2736 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
2737 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
2738 0 : ]
2739 0 : .into_iter()
2740 0 : .for_each(|c| {
2741 0 : Lazy::force(c);
2742 0 : });
2743 0 :
2744 0 : // Deletion queue stats
2745 0 : Lazy::force(&DELETION_QUEUE);
2746 0 :
2747 0 : // Tenant stats
2748 0 : Lazy::force(&TENANT);
2749 0 :
2750 0 : // Tenant manager stats
2751 0 : Lazy::force(&TENANT_MANAGER);
2752 0 :
2753 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
2754 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
2755 :
2756 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
2757 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
2758 0 : // values from last restart.
2759 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
2760 0 : }
2761 :
2762 : // countervecs
2763 0 : [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
2764 0 : .into_iter()
2765 0 : .for_each(|c| {
2766 0 : Lazy::force(c);
2767 0 : });
2768 0 :
2769 0 : // gauges
2770 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
2771 0 :
2772 0 : // histograms
2773 0 : [
2774 0 : &READ_NUM_FS_LAYERS,
2775 0 : &WAIT_LSN_TIME,
2776 0 : &WAL_REDO_TIME,
2777 0 : &WAL_REDO_RECORDS_HISTOGRAM,
2778 0 : &WAL_REDO_BYTES_HISTOGRAM,
2779 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
2780 0 : ]
2781 0 : .into_iter()
2782 0 : .for_each(|h| {
2783 0 : Lazy::force(h);
2784 0 : });
2785 0 :
2786 0 : // Custom
2787 0 : Lazy::force(&RECONSTRUCT_TIME);
2788 0 : Lazy::force(&tenant_throttling::TIMELINE_GET);
2789 0 : }
|