Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::shard::TenantShardId;
11 : use strum::{EnumCount, VariantNames};
12 : use strum_macros::{EnumVariantNames, IntoStaticStr};
13 : use tracing::warn;
14 : use utils::id::TimelineId;
15 :
16 : /// Prometheus histogram buckets (in seconds) for operations in the critical
17 : /// path. In other words, operations that directly affect that latency of user
18 : /// queries.
19 : ///
20 : /// The buckets capture the majority of latencies in the microsecond and
21 : /// millisecond range but also extend far enough up to distinguish "bad" from
22 : /// "really bad".
23 : const CRITICAL_OP_BUCKETS: &[f64] = &[
24 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
25 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
26 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
27 : ];
28 :
29 : // Metrics collected on operations on the storage repository.
30 3072 : #[derive(Debug, EnumVariantNames, IntoStaticStr)]
31 : #[strum(serialize_all = "kebab_case")]
32 : pub(crate) enum StorageTimeOperation {
33 : #[strum(serialize = "layer flush")]
34 : LayerFlush,
35 :
36 : #[strum(serialize = "compact")]
37 : Compact,
38 :
39 : #[strum(serialize = "create images")]
40 : CreateImages,
41 :
42 : #[strum(serialize = "logical size")]
43 : LogicalSize,
44 :
45 : #[strum(serialize = "imitate logical size")]
46 : ImitateLogicalSize,
47 :
48 : #[strum(serialize = "load layer map")]
49 : LoadLayerMap,
50 :
51 : #[strum(serialize = "gc")]
52 : Gc,
53 :
54 : #[strum(serialize = "find gc cutoffs")]
55 : FindGcCutoffs,
56 : }
57 :
58 142 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
59 : register_counter_vec!(
60 : "pageserver_storage_operations_seconds_sum",
61 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
62 : &["operation", "tenant_id", "shard_id", "timeline_id"],
63 : )
64 142 : .expect("failed to define a metric")
65 142 : });
66 :
67 142 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
68 : register_int_counter_vec!(
69 : "pageserver_storage_operations_seconds_count",
70 : "Count of storage operations with operation, tenant and timeline dimensions",
71 : &["operation", "tenant_id", "shard_id", "timeline_id"],
72 : )
73 142 : .expect("failed to define a metric")
74 142 : });
75 :
76 : // Buckets for background operations like compaction, GC, size calculation
77 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
78 :
79 142 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
80 : register_histogram_vec!(
81 : "pageserver_storage_operations_seconds_global",
82 : "Time spent on storage operations",
83 : &["operation"],
84 : STORAGE_OP_BUCKETS.into(),
85 : )
86 142 : .expect("failed to define a metric")
87 142 : });
88 :
89 140 : pub(crate) static READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
90 : register_histogram!(
91 : "pageserver_layers_visited_per_read_global",
92 : "Number of layers visited to reconstruct one key",
93 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
94 : )
95 140 : .expect("failed to define a metric")
96 140 : });
97 :
98 24 : pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
99 : register_histogram!(
100 : "pageserver_layers_visited_per_vectored_read_global",
101 : "Average number of layers visited to reconstruct one key",
102 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
103 : )
104 24 : .expect("failed to define a metric")
105 24 : });
106 :
107 : // Metrics collected on operations on the storage repository.
108 : #[derive(
109 560 : Clone, Copy, enum_map::Enum, strum_macros::EnumString, strum_macros::Display, IntoStaticStr,
110 : )]
111 : pub(crate) enum GetKind {
112 : Singular,
113 : Vectored,
114 : }
115 :
116 : pub(crate) struct ReconstructTimeMetrics {
117 : singular: Histogram,
118 : vectored: Histogram,
119 : }
120 :
121 140 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
122 140 : let inner = register_histogram_vec!(
123 : "pageserver_getpage_reconstruct_seconds",
124 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
125 : &["get_kind"],
126 : CRITICAL_OP_BUCKETS.into(),
127 : )
128 140 : .expect("failed to define a metric");
129 140 :
130 140 : ReconstructTimeMetrics {
131 140 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
132 140 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
133 140 : }
134 140 : });
135 :
136 : impl ReconstructTimeMetrics {
137 625914 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
138 625914 : match get_kind {
139 625844 : GetKind::Singular => &self.singular,
140 70 : GetKind::Vectored => &self.vectored,
141 : }
142 625914 : }
143 : }
144 :
145 : pub(crate) struct ReconstructDataTimeMetrics {
146 : singular: Histogram,
147 : vectored: Histogram,
148 : }
149 :
150 : impl ReconstructDataTimeMetrics {
151 626054 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
152 626054 : match get_kind {
153 625984 : GetKind::Singular => &self.singular,
154 70 : GetKind::Vectored => &self.vectored,
155 : }
156 626054 : }
157 : }
158 :
159 140 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<ReconstructDataTimeMetrics> = Lazy::new(|| {
160 140 : let inner = register_histogram_vec!(
161 : "pageserver_getpage_get_reconstruct_data_seconds",
162 : "Time spent in get_reconstruct_value_data",
163 : &["get_kind"],
164 : CRITICAL_OP_BUCKETS.into(),
165 : )
166 140 : .expect("failed to define a metric");
167 140 :
168 140 : ReconstructDataTimeMetrics {
169 140 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
170 140 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
171 140 : }
172 140 : });
173 :
174 : pub(crate) struct GetVectoredLatency {
175 : map: EnumMap<TaskKind, Option<Histogram>>,
176 : }
177 :
178 : #[allow(dead_code)]
179 : pub(crate) struct ScanLatency {
180 : map: EnumMap<TaskKind, Option<Histogram>>,
181 : }
182 :
183 : impl GetVectoredLatency {
184 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
185 : // cardinality of the metric.
186 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
187 :
188 964 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
189 964 : self.map[task_kind].as_ref()
190 964 : }
191 : }
192 :
193 : impl ScanLatency {
194 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
195 : // cardinality of the metric.
196 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
197 :
198 12 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
199 12 : self.map[task_kind].as_ref()
200 12 : }
201 : }
202 :
203 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
204 : parent: &'a Histogram,
205 : start: std::time::Instant,
206 : }
207 :
208 : impl<'a> ScanLatencyOngoingRecording<'a> {
209 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
210 0 : let start = Instant::now();
211 0 : ScanLatencyOngoingRecording { parent, start }
212 0 : }
213 :
214 0 : pub(crate) fn observe(self, throttled: Option<Duration>) {
215 0 : let elapsed = self.start.elapsed();
216 0 : let ex_throttled = if let Some(throttled) = throttled {
217 0 : elapsed.checked_sub(throttled)
218 : } else {
219 0 : Some(elapsed)
220 : };
221 0 : if let Some(ex_throttled) = ex_throttled {
222 0 : self.parent.observe(ex_throttled.as_secs_f64());
223 0 : } else {
224 0 : use utils::rate_limit::RateLimit;
225 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
226 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
227 0 : let mut rate_limit = LOGGED.lock().unwrap();
228 0 : rate_limit.call(|| {
229 0 : warn!("error deducting time spent throttled; this message is logged at a global rate limit");
230 0 : });
231 0 : }
232 0 : }
233 : }
234 :
235 134 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
236 134 : let inner = register_histogram_vec!(
237 : "pageserver_get_vectored_seconds",
238 : "Time spent in get_vectored, excluding time spent in timeline_get_throttle.",
239 : &["task_kind"],
240 : CRITICAL_OP_BUCKETS.into(),
241 : )
242 134 : .expect("failed to define a metric");
243 134 :
244 134 : GetVectoredLatency {
245 4020 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
246 4020 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
247 4020 :
248 4020 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
249 268 : let task_kind = task_kind.into();
250 268 : Some(inner.with_label_values(&[task_kind]))
251 : } else {
252 3752 : None
253 : }
254 4020 : })),
255 134 : }
256 134 : });
257 :
258 4 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
259 4 : let inner = register_histogram_vec!(
260 : "pageserver_scan_seconds",
261 : "Time spent in scan, excluding time spent in timeline_get_throttle.",
262 : &["task_kind"],
263 : CRITICAL_OP_BUCKETS.into(),
264 : )
265 4 : .expect("failed to define a metric");
266 4 :
267 4 : ScanLatency {
268 120 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
269 120 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
270 120 :
271 120 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
272 4 : let task_kind = task_kind.into();
273 4 : Some(inner.with_label_values(&[task_kind]))
274 : } else {
275 116 : None
276 : }
277 120 : })),
278 4 : }
279 4 : });
280 :
281 : pub(crate) struct PageCacheMetricsForTaskKind {
282 : pub read_accesses_immutable: IntCounter,
283 : pub read_hits_immutable: IntCounter,
284 : }
285 :
286 : pub(crate) struct PageCacheMetrics {
287 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
288 : }
289 :
290 76 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
291 : register_int_counter_vec!(
292 : "pageserver_page_cache_read_hits_total",
293 : "Number of read accesses to the page cache that hit",
294 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
295 : )
296 76 : .expect("failed to define a metric")
297 76 : });
298 :
299 76 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
300 : register_int_counter_vec!(
301 : "pageserver_page_cache_read_accesses_total",
302 : "Number of read accesses to the page cache",
303 : &["task_kind", "key_kind", "content_kind"]
304 : )
305 76 : .expect("failed to define a metric")
306 76 : });
307 :
308 76 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
309 2280 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
310 2280 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
311 2280 : let task_kind: &'static str = task_kind.into();
312 13680 : EnumMap::from_array(std::array::from_fn(|content_kind| {
313 13680 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
314 13680 : let content_kind: &'static str = content_kind.into();
315 13680 : PageCacheMetricsForTaskKind {
316 13680 : read_accesses_immutable: {
317 13680 : PAGE_CACHE_READ_ACCESSES
318 13680 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
319 13680 : .unwrap()
320 13680 : },
321 13680 :
322 13680 : read_hits_immutable: {
323 13680 : PAGE_CACHE_READ_HITS
324 13680 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
325 13680 : .unwrap()
326 13680 : },
327 13680 : }
328 13680 : }))
329 2280 : })),
330 76 : });
331 :
332 : impl PageCacheMetrics {
333 14189932 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
334 14189932 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
335 14189932 : }
336 : }
337 :
338 : pub(crate) struct PageCacheSizeMetrics {
339 : pub max_bytes: UIntGauge,
340 :
341 : pub current_bytes_immutable: UIntGauge,
342 : }
343 :
344 76 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
345 : register_uint_gauge_vec!(
346 : "pageserver_page_cache_size_current_bytes",
347 : "Current size of the page cache in bytes, by key kind",
348 : &["key_kind"]
349 : )
350 76 : .expect("failed to define a metric")
351 76 : });
352 :
353 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
354 76 : Lazy::new(|| PageCacheSizeMetrics {
355 : max_bytes: {
356 : register_uint_gauge!(
357 : "pageserver_page_cache_size_max_bytes",
358 : "Maximum size of the page cache in bytes"
359 : )
360 76 : .expect("failed to define a metric")
361 76 : },
362 76 : current_bytes_immutable: {
363 76 : PAGE_CACHE_SIZE_CURRENT_BYTES
364 76 : .get_metric_with_label_values(&["immutable"])
365 76 : .unwrap()
366 76 : },
367 76 : });
368 :
369 : pub(crate) mod page_cache_eviction_metrics {
370 : use std::num::NonZeroUsize;
371 :
372 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
373 : use once_cell::sync::Lazy;
374 :
375 : #[derive(Clone, Copy)]
376 : pub(crate) enum Outcome {
377 : FoundSlotUnused { iters: NonZeroUsize },
378 : FoundSlotEvicted { iters: NonZeroUsize },
379 : ItersExceeded { iters: NonZeroUsize },
380 : }
381 :
382 76 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
383 : register_int_counter_vec!(
384 : "pageserver_page_cache_find_victim_iters_total",
385 : "Counter for the number of iterations in the find_victim loop",
386 : &["outcome"],
387 : )
388 76 : .expect("failed to define a metric")
389 76 : });
390 :
391 76 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
392 : register_int_counter_vec!(
393 : "pageserver_page_cache_find_victim_calls",
394 : "Incremented at the end of each find_victim() call.\
395 : Filter by outcome to get e.g., eviction rate.",
396 : &["outcome"]
397 : )
398 76 : .unwrap()
399 76 : });
400 :
401 166987 : pub(crate) fn observe(outcome: Outcome) {
402 166987 : macro_rules! dry {
403 166987 : ($label:literal, $iters:expr) => {{
404 166987 : static LABEL: &'static str = $label;
405 166987 : static ITERS_TOTAL: Lazy<IntCounter> =
406 166987 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
407 166987 : static CALLS: Lazy<IntCounter> =
408 166987 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
409 166987 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
410 166987 : CALLS.inc();
411 166987 : }};
412 166987 : }
413 166987 : match outcome {
414 1900 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
415 165087 : Outcome::FoundSlotEvicted { iters } => {
416 165087 : dry!("found_evicted", iters)
417 : }
418 0 : Outcome::ItersExceeded { iters } => {
419 0 : dry!("err_iters_exceeded", iters);
420 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
421 0 : }
422 : }
423 166987 : }
424 : }
425 :
426 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
427 : register_int_counter_vec!(
428 : "page_cache_errors_total",
429 : "Number of timeouts while acquiring a pinned slot in the page cache",
430 : &["error_kind"]
431 : )
432 0 : .expect("failed to define a metric")
433 0 : });
434 :
435 0 : #[derive(IntoStaticStr)]
436 : #[strum(serialize_all = "kebab_case")]
437 : pub(crate) enum PageCacheErrorKind {
438 : AcquirePinnedSlotTimeout,
439 : EvictIterLimit,
440 : }
441 :
442 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
443 0 : PAGE_CACHE_ERRORS
444 0 : .get_metric_with_label_values(&[error_kind.into()])
445 0 : .unwrap()
446 0 : .inc();
447 0 : }
448 :
449 16 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
450 : register_histogram!(
451 : "pageserver_wait_lsn_seconds",
452 : "Time spent waiting for WAL to arrive",
453 : CRITICAL_OP_BUCKETS.into(),
454 : )
455 16 : .expect("failed to define a metric")
456 16 : });
457 :
458 142 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
459 : register_int_gauge_vec!(
460 : "pageserver_last_record_lsn",
461 : "Last record LSN grouped by timeline",
462 : &["tenant_id", "shard_id", "timeline_id"]
463 : )
464 142 : .expect("failed to define a metric")
465 142 : });
466 :
467 142 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
468 : register_uint_gauge_vec!(
469 : "pageserver_pitr_history_size",
470 : "Data written since PITR cutoff on this timeline",
471 : &["tenant_id", "shard_id", "timeline_id"]
472 : )
473 142 : .expect("failed to define a metric")
474 142 : });
475 :
476 142 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
477 : register_uint_gauge_vec!(
478 : "pageserver_archive_size",
479 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
480 : &["tenant_id", "shard_id", "timeline_id"]
481 : )
482 142 : .expect("failed to define a metric")
483 142 : });
484 :
485 142 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
486 : register_int_gauge_vec!(
487 : "pageserver_standby_horizon",
488 : "Standby apply LSN for which GC is hold off, by timeline.",
489 : &["tenant_id", "shard_id", "timeline_id"]
490 : )
491 142 : .expect("failed to define a metric")
492 142 : });
493 :
494 142 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
495 : register_uint_gauge_vec!(
496 : "pageserver_resident_physical_size",
497 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
498 : &["tenant_id", "shard_id", "timeline_id"]
499 : )
500 142 : .expect("failed to define a metric")
501 142 : });
502 :
503 138 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
504 : register_uint_gauge!(
505 : "pageserver_resident_physical_size_global",
506 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
507 : )
508 138 : .expect("failed to define a metric")
509 138 : });
510 :
511 142 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
512 : register_uint_gauge_vec!(
513 : "pageserver_remote_physical_size",
514 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
515 : // Corollary: If any files are missing from the index part, they won't be included here.
516 : &["tenant_id", "shard_id", "timeline_id"]
517 : )
518 142 : .expect("failed to define a metric")
519 142 : });
520 :
521 142 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
522 : register_uint_gauge!(
523 : "pageserver_remote_physical_size_global",
524 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
525 : )
526 142 : .expect("failed to define a metric")
527 142 : });
528 :
529 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
530 : register_int_counter!(
531 : "pageserver_remote_ondemand_downloaded_layers_total",
532 : "Total on-demand downloaded layers"
533 : )
534 4 : .unwrap()
535 4 : });
536 :
537 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
538 : register_int_counter!(
539 : "pageserver_remote_ondemand_downloaded_bytes_total",
540 : "Total bytes of layers on-demand downloaded",
541 : )
542 4 : .unwrap()
543 4 : });
544 :
545 142 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
546 : register_uint_gauge_vec!(
547 : "pageserver_current_logical_size",
548 : "Current logical size grouped by timeline",
549 : &["tenant_id", "shard_id", "timeline_id"]
550 : )
551 142 : .expect("failed to define current logical size metric")
552 142 : });
553 :
554 142 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
555 : register_int_gauge_vec!(
556 : "pageserver_aux_file_estimated_size",
557 : "The size of all aux files for a timeline in aux file v2 store.",
558 : &["tenant_id", "shard_id", "timeline_id"]
559 : )
560 142 : .expect("failed to define a metric")
561 142 : });
562 :
563 142 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
564 : register_uint_gauge_vec!(
565 : "pageserver_valid_lsn_lease_count",
566 : "The number of valid leases after refreshing gc info.",
567 : &["tenant_id", "shard_id", "timeline_id"],
568 : )
569 142 : .expect("failed to define a metric")
570 142 : });
571 :
572 : pub(crate) mod initial_logical_size {
573 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
574 : use once_cell::sync::Lazy;
575 :
576 : pub(crate) struct StartCalculation(IntCounterVec);
577 142 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
578 142 : StartCalculation(
579 142 : register_int_counter_vec!(
580 : "pageserver_initial_logical_size_start_calculation",
581 : "Incremented each time we start an initial logical size calculation attempt. \
582 : The `circumstances` label provides some additional details.",
583 : &["attempt", "circumstances"]
584 142 : )
585 142 : .unwrap(),
586 142 : )
587 142 : });
588 :
589 : struct DropCalculation {
590 : first: IntCounter,
591 : retry: IntCounter,
592 : }
593 :
594 142 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
595 142 : let vec = register_int_counter_vec!(
596 : "pageserver_initial_logical_size_drop_calculation",
597 : "Incremented each time we abort a started size calculation attmpt.",
598 : &["attempt"]
599 : )
600 142 : .unwrap();
601 142 : DropCalculation {
602 142 : first: vec.with_label_values(&["first"]),
603 142 : retry: vec.with_label_values(&["retry"]),
604 142 : }
605 142 : });
606 :
607 : pub(crate) struct Calculated {
608 : pub(crate) births: IntCounter,
609 : pub(crate) deaths: IntCounter,
610 : }
611 :
612 142 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
613 : births: register_int_counter!(
614 : "pageserver_initial_logical_size_finish_calculation",
615 : "Incremented every time we finish calculation of initial logical size.\
616 : If everything is working well, this should happen at most once per Timeline object."
617 : )
618 142 : .unwrap(),
619 : deaths: register_int_counter!(
620 : "pageserver_initial_logical_size_drop_finished_calculation",
621 : "Incremented when we drop a finished initial logical size calculation result.\
622 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
623 : )
624 142 : .unwrap(),
625 142 : });
626 :
627 : pub(crate) struct OngoingCalculationGuard {
628 : inc_drop_calculation: Option<IntCounter>,
629 : }
630 :
631 154 : #[derive(strum_macros::IntoStaticStr)]
632 : pub(crate) enum StartCircumstances {
633 : EmptyInitial,
634 : SkippedConcurrencyLimiter,
635 : AfterBackgroundTasksRateLimit,
636 : }
637 :
638 : impl StartCalculation {
639 154 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
640 154 : let circumstances_label: &'static str = circumstances.into();
641 154 : self.0
642 154 : .with_label_values(&["first", circumstances_label])
643 154 : .inc();
644 154 : OngoingCalculationGuard {
645 154 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
646 154 : }
647 154 : }
648 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
649 0 : let circumstances_label: &'static str = circumstances.into();
650 0 : self.0
651 0 : .with_label_values(&["retry", circumstances_label])
652 0 : .inc();
653 0 : OngoingCalculationGuard {
654 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
655 0 : }
656 0 : }
657 : }
658 :
659 : impl Drop for OngoingCalculationGuard {
660 154 : fn drop(&mut self) {
661 154 : if let Some(counter) = self.inc_drop_calculation.take() {
662 0 : counter.inc();
663 154 : }
664 154 : }
665 : }
666 :
667 : impl OngoingCalculationGuard {
668 154 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
669 154 : drop(self.inc_drop_calculation.take());
670 154 : CALCULATED.births.inc();
671 154 : FinishedCalculationGuard {
672 154 : inc_on_drop: CALCULATED.deaths.clone(),
673 154 : }
674 154 : }
675 : }
676 :
677 : pub(crate) struct FinishedCalculationGuard {
678 : inc_on_drop: IntCounter,
679 : }
680 :
681 : impl Drop for FinishedCalculationGuard {
682 6 : fn drop(&mut self) {
683 6 : self.inc_on_drop.inc();
684 6 : }
685 : }
686 :
687 : // context: https://github.com/neondatabase/neon/issues/5963
688 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
689 0 : Lazy::new(|| {
690 : register_int_counter!(
691 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
692 : "Counter for the following event: walreceiver calls\
693 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
694 : )
695 0 : .unwrap()
696 0 : });
697 : }
698 :
699 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
700 : register_uint_gauge_vec!(
701 : "pageserver_directory_entries_count",
702 : "Sum of the entries in pageserver-stored directory listings",
703 : &["tenant_id", "shard_id", "timeline_id"]
704 : )
705 0 : .expect("failed to define a metric")
706 0 : });
707 :
708 144 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
709 : register_uint_gauge_vec!(
710 : "pageserver_tenant_states_count",
711 : "Count of tenants per state",
712 : &["state"]
713 : )
714 144 : .expect("Failed to register pageserver_tenant_states_count metric")
715 144 : });
716 :
717 : /// A set of broken tenants.
718 : ///
719 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
720 : /// tenant.
721 12 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
722 : register_uint_gauge_vec!(
723 : "pageserver_broken_tenants_count",
724 : "Set of broken tenants",
725 : &["tenant_id", "shard_id"]
726 : )
727 12 : .expect("Failed to register pageserver_tenant_states_count metric")
728 12 : });
729 :
730 6 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
731 : register_uint_gauge_vec!(
732 : "pageserver_tenant_synthetic_cached_size_bytes",
733 : "Synthetic size of each tenant in bytes",
734 : &["tenant_id"]
735 : )
736 6 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
737 6 : });
738 :
739 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
740 : register_histogram_vec!(
741 : "pageserver_eviction_iteration_duration_seconds_global",
742 : "Time spent on a single eviction iteration",
743 : &["period_secs", "threshold_secs"],
744 : STORAGE_OP_BUCKETS.into(),
745 : )
746 0 : .expect("failed to define a metric")
747 0 : });
748 :
749 142 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
750 : register_int_counter_vec!(
751 : "pageserver_evictions",
752 : "Number of layers evicted from the pageserver",
753 : &["tenant_id", "shard_id", "timeline_id"]
754 : )
755 142 : .expect("failed to define a metric")
756 142 : });
757 :
758 142 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
759 : register_int_counter_vec!(
760 : "pageserver_evictions_with_low_residence_duration",
761 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
762 : Residence duration is determined using the `residence_duration_data_source`.",
763 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
764 : )
765 142 : .expect("failed to define a metric")
766 142 : });
767 :
768 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
769 : register_int_counter!(
770 : "pageserver_unexpected_ondemand_downloads_count",
771 : "Number of unexpected on-demand downloads. \
772 : We log more context for each increment, so, forgo any labels in this metric.",
773 : )
774 0 : .expect("failed to define a metric")
775 0 : });
776 :
777 : /// How long did we take to start up? Broken down by labels to describe
778 : /// different phases of startup.
779 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
780 : register_gauge_vec!(
781 : "pageserver_startup_duration_seconds",
782 : "Time taken by phases of pageserver startup, in seconds",
783 : &["phase"]
784 : )
785 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
786 0 : });
787 :
788 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
789 : register_uint_gauge!(
790 : "pageserver_startup_is_loading",
791 : "1 while in initial startup load of tenants, 0 at other times"
792 : )
793 0 : .expect("Failed to register pageserver_startup_is_loading")
794 0 : });
795 :
796 138 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
797 : register_uint_gauge!(
798 : "pageserver_timeline_ephemeral_bytes",
799 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
800 : )
801 138 : .expect("Failed to register metric")
802 138 : });
803 :
804 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
805 : /// like how long it took to load.
806 : ///
807 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
808 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
809 : /// at a timeline level than tenant level.
810 : pub(crate) struct TenantMetrics {
811 : /// How long did tenants take to go from construction to active state?
812 : pub(crate) activation: Histogram,
813 : pub(crate) preload: Histogram,
814 : pub(crate) attach: Histogram,
815 :
816 : /// How many tenants are included in the initial startup of the pagesrever?
817 : pub(crate) startup_scheduled: IntCounter,
818 : pub(crate) startup_complete: IntCounter,
819 : }
820 :
821 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
822 0 : TenantMetrics {
823 0 : activation: register_histogram!(
824 : "pageserver_tenant_activation_seconds",
825 : "Time taken by tenants to activate, in seconds",
826 : CRITICAL_OP_BUCKETS.into()
827 0 : )
828 0 : .expect("Failed to register metric"),
829 0 : preload: register_histogram!(
830 : "pageserver_tenant_preload_seconds",
831 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
832 : CRITICAL_OP_BUCKETS.into()
833 0 : )
834 0 : .expect("Failed to register metric"),
835 0 : attach: register_histogram!(
836 : "pageserver_tenant_attach_seconds",
837 : "Time taken by tenants to intialize, after remote metadata is already loaded",
838 : CRITICAL_OP_BUCKETS.into()
839 0 : )
840 0 : .expect("Failed to register metric"),
841 0 : startup_scheduled: register_int_counter!(
842 : "pageserver_tenant_startup_scheduled",
843 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
844 0 : ).expect("Failed to register metric"),
845 0 : startup_complete: register_int_counter!(
846 : "pageserver_tenant_startup_complete",
847 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
848 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
849 : tenants: such cases will lead to this metric never reaching the scheduled count."
850 0 : ).expect("Failed to register metric"),
851 0 : }
852 0 : });
853 :
854 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
855 : #[derive(Debug)]
856 : pub(crate) struct EvictionsWithLowResidenceDuration {
857 : data_source: &'static str,
858 : threshold: Duration,
859 : counter: Option<IntCounter>,
860 : }
861 :
862 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
863 : data_source: &'static str,
864 : threshold: Duration,
865 : }
866 :
867 : impl EvictionsWithLowResidenceDurationBuilder {
868 384 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
869 384 : Self {
870 384 : data_source,
871 384 : threshold,
872 384 : }
873 384 : }
874 :
875 384 : fn build(
876 384 : &self,
877 384 : tenant_id: &str,
878 384 : shard_id: &str,
879 384 : timeline_id: &str,
880 384 : ) -> EvictionsWithLowResidenceDuration {
881 384 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
882 384 : .get_metric_with_label_values(&[
883 384 : tenant_id,
884 384 : shard_id,
885 384 : timeline_id,
886 384 : self.data_source,
887 384 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
888 384 : ])
889 384 : .unwrap();
890 384 : EvictionsWithLowResidenceDuration {
891 384 : data_source: self.data_source,
892 384 : threshold: self.threshold,
893 384 : counter: Some(counter),
894 384 : }
895 384 : }
896 : }
897 :
898 : impl EvictionsWithLowResidenceDuration {
899 392 : fn threshold_label_value(threshold: Duration) -> String {
900 392 : format!("{}", threshold.as_secs())
901 392 : }
902 :
903 18 : pub fn observe(&self, observed_value: Duration) {
904 18 : if observed_value < self.threshold {
905 18 : self.counter
906 18 : .as_ref()
907 18 : .expect("nobody calls this function after `remove_from_vec`")
908 18 : .inc();
909 18 : }
910 18 : }
911 :
912 8 : pub fn change_threshold(
913 8 : &mut self,
914 8 : tenant_id: &str,
915 8 : shard_id: &str,
916 8 : timeline_id: &str,
917 8 : new_threshold: Duration,
918 8 : ) {
919 8 : if new_threshold == self.threshold {
920 8 : return;
921 0 : }
922 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
923 0 : self.data_source,
924 0 : new_threshold,
925 0 : )
926 0 : .build(tenant_id, shard_id, timeline_id);
927 0 : std::mem::swap(self, &mut with_new);
928 0 : with_new.remove(tenant_id, shard_id, timeline_id);
929 8 : }
930 :
931 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
932 8 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
933 8 : let Some(_counter) = self.counter.take() else {
934 0 : return;
935 : };
936 :
937 8 : let threshold = Self::threshold_label_value(self.threshold);
938 8 :
939 8 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
940 8 : tenant_id,
941 8 : shard_id,
942 8 : timeline_id,
943 8 : self.data_source,
944 8 : &threshold,
945 8 : ]);
946 8 :
947 8 : match removed {
948 0 : Err(e) => {
949 0 : // this has been hit in staging as
950 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
951 0 : // because we can be in the drop path already, don't risk:
952 0 : // - "double-panic => illegal instruction" or
953 0 : // - future "drop panick => abort"
954 0 : //
955 0 : // so just nag: (the error has the labels)
956 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
957 : }
958 : Ok(()) => {
959 : // to help identify cases where we double-remove the same values, let's log all
960 : // deletions?
961 8 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
962 : }
963 : }
964 8 : }
965 : }
966 :
967 : // Metrics collected on disk IO operations
968 : //
969 : // Roughly logarithmic scale.
970 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
971 : 0.000030, // 30 usec
972 : 0.001000, // 1000 usec
973 : 0.030, // 30 ms
974 : 1.000, // 1000 ms
975 : 30.000, // 30000 ms
976 : ];
977 :
978 : /// VirtualFile fs operation variants.
979 : ///
980 : /// Operations:
981 : /// - open ([`std::fs::OpenOptions::open`])
982 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
983 : /// - close-by-replace (close by replacement algorithm)
984 : /// - read (`read_at`)
985 : /// - write (`write_at`)
986 : /// - seek (modify internal position or file length query)
987 : /// - fsync ([`std::fs::File::sync_all`])
988 : /// - metadata ([`std::fs::File::metadata`])
989 : #[derive(
990 1458 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
991 : )]
992 : pub(crate) enum StorageIoOperation {
993 : Open,
994 : OpenAfterReplace,
995 : Close,
996 : CloseByReplace,
997 : Read,
998 : Write,
999 : Seek,
1000 : Fsync,
1001 : Metadata,
1002 : }
1003 :
1004 : impl StorageIoOperation {
1005 1458 : pub fn as_str(&self) -> &'static str {
1006 1458 : match self {
1007 162 : StorageIoOperation::Open => "open",
1008 162 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1009 162 : StorageIoOperation::Close => "close",
1010 162 : StorageIoOperation::CloseByReplace => "close-by-replace",
1011 162 : StorageIoOperation::Read => "read",
1012 162 : StorageIoOperation::Write => "write",
1013 162 : StorageIoOperation::Seek => "seek",
1014 162 : StorageIoOperation::Fsync => "fsync",
1015 162 : StorageIoOperation::Metadata => "metadata",
1016 : }
1017 1458 : }
1018 : }
1019 :
1020 : /// Tracks time taken by fs operations near VirtualFile.
1021 : #[derive(Debug)]
1022 : pub(crate) struct StorageIoTime {
1023 : metrics: [Histogram; StorageIoOperation::COUNT],
1024 : }
1025 :
1026 : impl StorageIoTime {
1027 162 : fn new() -> Self {
1028 162 : let storage_io_histogram_vec = register_histogram_vec!(
1029 : "pageserver_io_operations_seconds",
1030 : "Time spent in IO operations",
1031 : &["operation"],
1032 : STORAGE_IO_TIME_BUCKETS.into()
1033 : )
1034 162 : .expect("failed to define a metric");
1035 1458 : let metrics = std::array::from_fn(|i| {
1036 1458 : let op = StorageIoOperation::from_repr(i).unwrap();
1037 1458 : storage_io_histogram_vec
1038 1458 : .get_metric_with_label_values(&[op.as_str()])
1039 1458 : .unwrap()
1040 1458 : });
1041 162 : Self { metrics }
1042 162 : }
1043 :
1044 1914666 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1045 1914666 : &self.metrics[op as usize]
1046 1914666 : }
1047 : }
1048 :
1049 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1050 :
1051 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
1052 :
1053 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1054 160 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1055 : register_int_gauge_vec!(
1056 : "pageserver_io_operations_bytes_total",
1057 : "Total amount of bytes read/written in IO operations",
1058 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1059 : )
1060 160 : .expect("failed to define a metric")
1061 160 : });
1062 :
1063 : #[cfg(not(test))]
1064 : pub(crate) mod virtual_file_descriptor_cache {
1065 : use super::*;
1066 :
1067 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1068 : register_uint_gauge!(
1069 : "pageserver_virtual_file_descriptor_cache_size_max",
1070 : "Maximum number of open file descriptors in the cache."
1071 : )
1072 0 : .unwrap()
1073 0 : });
1074 :
1075 : // SIZE_CURRENT: derive it like so:
1076 : // ```
1077 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1078 : // -ignoring(operation)
1079 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1080 : // ```
1081 : }
1082 :
1083 : #[cfg(not(test))]
1084 : pub(crate) mod virtual_file_io_engine {
1085 : use super::*;
1086 :
1087 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1088 : register_uint_gauge_vec!(
1089 : "pageserver_virtual_file_io_engine_kind",
1090 : "The configured io engine for VirtualFile",
1091 : &["kind"],
1092 : )
1093 0 : .unwrap()
1094 0 : });
1095 : }
1096 :
1097 : struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
1098 : global_metric: &'a Histogram,
1099 :
1100 : // Optional because not all op types are tracked per-timeline
1101 : timeline_metric: Option<&'a Histogram>,
1102 :
1103 : ctx: &'c RequestContext,
1104 : start: std::time::Instant,
1105 : op: SmgrQueryType,
1106 : }
1107 :
1108 : impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
1109 10 : fn drop(&mut self) {
1110 10 : let elapsed = self.start.elapsed();
1111 10 : let ex_throttled = self
1112 10 : .ctx
1113 10 : .micros_spent_throttled
1114 10 : .close_and_checked_sub_from(elapsed);
1115 10 : let ex_throttled = match ex_throttled {
1116 10 : Ok(res) => res,
1117 0 : Err(error) => {
1118 0 : use utils::rate_limit::RateLimit;
1119 0 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1120 0 : Lazy::new(|| {
1121 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1122 0 : RateLimit::new(Duration::from_secs(10))
1123 0 : })))
1124 0 : });
1125 0 : let mut guard = LOGGED.lock().unwrap();
1126 0 : let rate_limit = &mut guard[self.op];
1127 0 : rate_limit.call(|| {
1128 0 : warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit");
1129 0 : });
1130 0 : elapsed
1131 : }
1132 : };
1133 10 : self.global_metric.observe(ex_throttled.as_secs_f64());
1134 10 : if let Some(timeline_metric) = self.timeline_metric {
1135 2 : timeline_metric.observe(ex_throttled.as_secs_f64());
1136 8 : }
1137 10 : }
1138 : }
1139 :
1140 : #[derive(
1141 : Debug,
1142 : Clone,
1143 : Copy,
1144 2382 : IntoStaticStr,
1145 : strum_macros::EnumCount,
1146 24 : strum_macros::EnumIter,
1147 1970 : strum_macros::FromRepr,
1148 : enum_map::Enum,
1149 : )]
1150 : #[strum(serialize_all = "snake_case")]
1151 : pub enum SmgrQueryType {
1152 : GetRelExists,
1153 : GetRelSize,
1154 : GetPageAtLsn,
1155 : GetDbSize,
1156 : GetSlruSegment,
1157 : }
1158 :
1159 : #[derive(Debug)]
1160 : pub(crate) struct SmgrQueryTimePerTimeline {
1161 : global_metrics: [Histogram; SmgrQueryType::COUNT],
1162 : per_timeline_getpage: Histogram,
1163 : }
1164 :
1165 144 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1166 : register_histogram_vec!(
1167 : "pageserver_smgr_query_seconds",
1168 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1169 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1170 : CRITICAL_OP_BUCKETS.into(),
1171 : )
1172 144 : .expect("failed to define a metric")
1173 144 : });
1174 :
1175 144 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1176 144 : [
1177 144 : 1,
1178 144 : 10,
1179 144 : 20,
1180 144 : 40,
1181 144 : 60,
1182 144 : 80,
1183 144 : 100,
1184 144 : 200,
1185 144 : 300,
1186 144 : 400,
1187 144 : 500,
1188 144 : 600,
1189 144 : 700,
1190 144 : 800,
1191 144 : 900,
1192 144 : 1_000, // 1ms
1193 144 : 2_000,
1194 144 : 4_000,
1195 144 : 6_000,
1196 144 : 8_000,
1197 144 : 10_000, // 10ms
1198 144 : 20_000,
1199 144 : 40_000,
1200 144 : 60_000,
1201 144 : 80_000,
1202 144 : 100_000,
1203 144 : 200_000,
1204 144 : 400_000,
1205 144 : 600_000,
1206 144 : 800_000,
1207 144 : 1_000_000, // 1s
1208 144 : 2_000_000,
1209 144 : 4_000_000,
1210 144 : 6_000_000,
1211 144 : 8_000_000,
1212 144 : 10_000_000, // 10s
1213 144 : 20_000_000,
1214 144 : 50_000_000,
1215 144 : 100_000_000,
1216 144 : 200_000_000,
1217 144 : 1_000_000_000, // 1000s
1218 144 : ]
1219 144 : .into_iter()
1220 144 : .map(Duration::from_micros)
1221 5904 : .map(|d| d.as_secs_f64())
1222 144 : .collect()
1223 144 : });
1224 :
1225 144 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1226 : register_histogram_vec!(
1227 : "pageserver_smgr_query_seconds_global",
1228 : "Time spent on smgr query handling, aggregated by query type.",
1229 : &["smgr_query_type"],
1230 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1231 : )
1232 144 : .expect("failed to define a metric")
1233 144 : });
1234 :
1235 : impl SmgrQueryTimePerTimeline {
1236 394 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1237 394 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1238 394 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1239 394 : let timeline_id = timeline_id.to_string();
1240 1970 : let global_metrics = std::array::from_fn(|i| {
1241 1970 : let op = SmgrQueryType::from_repr(i).unwrap();
1242 1970 : SMGR_QUERY_TIME_GLOBAL
1243 1970 : .get_metric_with_label_values(&[op.into()])
1244 1970 : .unwrap()
1245 1970 : });
1246 394 :
1247 394 : let per_timeline_getpage = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1248 394 : .get_metric_with_label_values(&[
1249 394 : SmgrQueryType::GetPageAtLsn.into(),
1250 394 : &tenant_id,
1251 394 : &shard_slug,
1252 394 : &timeline_id,
1253 394 : ])
1254 394 : .unwrap();
1255 394 : Self {
1256 394 : global_metrics,
1257 394 : per_timeline_getpage,
1258 394 : }
1259 394 : }
1260 10 : pub(crate) fn start_timer<'c: 'a, 'a>(
1261 10 : &'a self,
1262 10 : op: SmgrQueryType,
1263 10 : ctx: &'c RequestContext,
1264 10 : ) -> Option<impl Drop + '_> {
1265 10 : let global_metric = &self.global_metrics[op as usize];
1266 10 : let start = Instant::now();
1267 10 : match ctx.micros_spent_throttled.open() {
1268 10 : Ok(()) => (),
1269 0 : Err(error) => {
1270 0 : use utils::rate_limit::RateLimit;
1271 0 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1272 0 : Lazy::new(|| {
1273 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1274 0 : RateLimit::new(Duration::from_secs(10))
1275 0 : })))
1276 0 : });
1277 0 : let mut guard = LOGGED.lock().unwrap();
1278 0 : let rate_limit = &mut guard[op];
1279 0 : rate_limit.call(|| {
1280 0 : warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1281 0 : });
1282 0 : }
1283 : }
1284 :
1285 10 : let timeline_metric = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1286 2 : Some(&self.per_timeline_getpage)
1287 : } else {
1288 8 : None
1289 : };
1290 :
1291 10 : Some(GlobalAndPerTimelineHistogramTimer {
1292 10 : global_metric,
1293 10 : timeline_metric,
1294 10 : ctx,
1295 10 : start,
1296 10 : op,
1297 10 : })
1298 10 : }
1299 : }
1300 :
1301 : #[cfg(test)]
1302 : mod smgr_query_time_tests {
1303 : use pageserver_api::shard::TenantShardId;
1304 : use strum::IntoEnumIterator;
1305 : use utils::id::{TenantId, TimelineId};
1306 :
1307 : use crate::{
1308 : context::{DownloadBehavior, RequestContext},
1309 : task_mgr::TaskKind,
1310 : };
1311 :
1312 : // Regression test, we used hard-coded string constants before using an enum.
1313 : #[test]
1314 2 : fn op_label_name() {
1315 2 : use super::SmgrQueryType::*;
1316 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1317 2 : (GetRelExists, "get_rel_exists"),
1318 2 : (GetRelSize, "get_rel_size"),
1319 2 : (GetPageAtLsn, "get_page_at_lsn"),
1320 2 : (GetDbSize, "get_db_size"),
1321 2 : (GetSlruSegment, "get_slru_segment"),
1322 2 : ];
1323 12 : for (op, expect) in expect {
1324 10 : let actual: &'static str = op.into();
1325 10 : assert_eq!(actual, expect);
1326 : }
1327 2 : }
1328 :
1329 : #[test]
1330 2 : fn basic() {
1331 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1332 :
1333 12 : for op in &ops {
1334 10 : let tenant_id = TenantId::generate();
1335 10 : let timeline_id = TimelineId::generate();
1336 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1337 10 : &TenantShardId::unsharded(tenant_id),
1338 10 : &timeline_id,
1339 10 : );
1340 10 :
1341 20 : let get_counts = || {
1342 20 : let global: u64 = ops
1343 20 : .iter()
1344 100 : .map(|op| metrics.global_metrics[*op as usize].get_sample_count())
1345 20 : .sum();
1346 20 : (global, metrics.per_timeline_getpage.get_sample_count())
1347 20 : };
1348 :
1349 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1350 10 : assert_eq!(pre_per_tenant_timeline, 0);
1351 :
1352 10 : let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
1353 10 : let timer = metrics.start_timer(*op, &ctx);
1354 10 : drop(timer);
1355 10 :
1356 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1357 10 : if matches!(op, super::SmgrQueryType::GetPageAtLsn) {
1358 : // getpage ops are tracked per-timeline, others aren't
1359 2 : assert_eq!(post_per_tenant_timeline, 1);
1360 : } else {
1361 8 : assert_eq!(post_per_tenant_timeline, 0);
1362 : }
1363 10 : assert!(post_global > pre_global);
1364 : }
1365 2 : }
1366 : }
1367 :
1368 : // keep in sync with control plane Go code so that we can validate
1369 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1370 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1371 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1372 0 : [
1373 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1374 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1375 0 : ]
1376 0 : .map(|ms| (ms as f64) / 1000.0)
1377 0 : });
1378 :
1379 : pub(crate) struct BasebackupQueryTime {
1380 : ok: Histogram,
1381 : error: Histogram,
1382 : }
1383 :
1384 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1385 0 : let vec = register_histogram_vec!(
1386 : "pageserver_basebackup_query_seconds",
1387 : "Histogram of basebackup queries durations, by result type",
1388 : &["result"],
1389 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1390 : )
1391 0 : .expect("failed to define a metric");
1392 0 : BasebackupQueryTime {
1393 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1394 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1395 0 : }
1396 0 : });
1397 :
1398 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a, 'c> {
1399 : parent: &'a BasebackupQueryTime,
1400 : ctx: &'c RequestContext,
1401 : start: std::time::Instant,
1402 : }
1403 :
1404 : impl BasebackupQueryTime {
1405 0 : pub(crate) fn start_recording<'c: 'a, 'a>(
1406 0 : &'a self,
1407 0 : ctx: &'c RequestContext,
1408 0 : ) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
1409 0 : let start = Instant::now();
1410 0 : match ctx.micros_spent_throttled.open() {
1411 0 : Ok(()) => (),
1412 0 : Err(error) => {
1413 0 : use utils::rate_limit::RateLimit;
1414 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
1415 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1416 0 : let mut rate_limit = LOGGED.lock().unwrap();
1417 0 : rate_limit.call(|| {
1418 0 : warn!(error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1419 0 : });
1420 0 : }
1421 : }
1422 0 : BasebackupQueryTimeOngoingRecording {
1423 0 : parent: self,
1424 0 : ctx,
1425 0 : start,
1426 0 : }
1427 0 : }
1428 : }
1429 :
1430 : impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
1431 0 : pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
1432 0 : let elapsed = self.start.elapsed();
1433 0 : let ex_throttled = self
1434 0 : .ctx
1435 0 : .micros_spent_throttled
1436 0 : .close_and_checked_sub_from(elapsed);
1437 0 : let ex_throttled = match ex_throttled {
1438 0 : Ok(ex_throttled) => ex_throttled,
1439 0 : Err(error) => {
1440 0 : use utils::rate_limit::RateLimit;
1441 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
1442 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1443 0 : let mut rate_limit = LOGGED.lock().unwrap();
1444 0 : rate_limit.call(|| {
1445 0 : warn!(error, "error deducting time spent throttled; this message is logged at a global rate limit");
1446 0 : });
1447 0 : elapsed
1448 : }
1449 : };
1450 0 : let metric = if res.is_ok() {
1451 0 : &self.parent.ok
1452 : } else {
1453 0 : &self.parent.error
1454 : };
1455 0 : metric.observe(ex_throttled.as_secs_f64());
1456 0 : }
1457 : }
1458 :
1459 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1460 : register_int_counter_pair_vec!(
1461 : "pageserver_live_connections_started",
1462 : "Number of network connections that we started handling",
1463 : "pageserver_live_connections_finished",
1464 : "Number of network connections that we finished handling",
1465 : &["pageserver_connection_kind"]
1466 : )
1467 0 : .expect("failed to define a metric")
1468 0 : });
1469 :
1470 0 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1471 : pub(crate) enum ComputeCommandKind {
1472 : PageStreamV2,
1473 : PageStream,
1474 : Basebackup,
1475 : Fullbackup,
1476 : ImportBasebackup,
1477 : ImportWal,
1478 : LeaseLsn,
1479 : Show,
1480 : }
1481 :
1482 : pub(crate) struct ComputeCommandCounters {
1483 : map: EnumMap<ComputeCommandKind, IntCounter>,
1484 : }
1485 :
1486 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
1487 0 : let inner = register_int_counter_vec!(
1488 : "pageserver_compute_commands",
1489 : "Number of compute -> pageserver commands processed",
1490 : &["command"]
1491 : )
1492 0 : .expect("failed to define a metric");
1493 0 :
1494 0 : ComputeCommandCounters {
1495 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
1496 0 : let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
1497 0 : let command_str: &'static str = command.into();
1498 0 : inner.with_label_values(&[command_str])
1499 0 : })),
1500 0 : }
1501 0 : });
1502 :
1503 : impl ComputeCommandCounters {
1504 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
1505 0 : &self.map[command]
1506 0 : }
1507 : }
1508 :
1509 : // remote storage metrics
1510 :
1511 140 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1512 : register_int_counter_pair_vec!(
1513 : "pageserver_remote_timeline_client_calls_started",
1514 : "Number of started calls to remote timeline client.",
1515 : "pageserver_remote_timeline_client_calls_finished",
1516 : "Number of finshed calls to remote timeline client.",
1517 : &[
1518 : "tenant_id",
1519 : "shard_id",
1520 : "timeline_id",
1521 : "file_kind",
1522 : "op_kind"
1523 : ],
1524 : )
1525 140 : .unwrap()
1526 140 : });
1527 :
1528 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1529 136 : Lazy::new(|| {
1530 : register_int_counter_vec!(
1531 : "pageserver_remote_timeline_client_bytes_started",
1532 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1533 : The increment happens when the operation is scheduled.",
1534 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1535 : )
1536 136 : .expect("failed to define a metric")
1537 136 : });
1538 :
1539 136 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1540 : register_int_counter_vec!(
1541 : "pageserver_remote_timeline_client_bytes_finished",
1542 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1543 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1544 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1545 : )
1546 136 : .expect("failed to define a metric")
1547 136 : });
1548 :
1549 : pub(crate) struct TenantManagerMetrics {
1550 : tenant_slots_attached: UIntGauge,
1551 : tenant_slots_secondary: UIntGauge,
1552 : tenant_slots_inprogress: UIntGauge,
1553 : pub(crate) tenant_slot_writes: IntCounter,
1554 : pub(crate) unexpected_errors: IntCounter,
1555 : }
1556 :
1557 : impl TenantManagerMetrics {
1558 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
1559 : /// exactly: they track the lifetime of the slots _in the tenant map_.
1560 2 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
1561 2 : match slot {
1562 0 : TenantSlot::Attached(_) => {
1563 0 : self.tenant_slots_attached.inc();
1564 0 : }
1565 0 : TenantSlot::Secondary(_) => {
1566 0 : self.tenant_slots_secondary.inc();
1567 0 : }
1568 2 : TenantSlot::InProgress(_) => {
1569 2 : self.tenant_slots_inprogress.inc();
1570 2 : }
1571 : }
1572 2 : }
1573 :
1574 2 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
1575 2 : match slot {
1576 2 : TenantSlot::Attached(_) => {
1577 2 : self.tenant_slots_attached.dec();
1578 2 : }
1579 0 : TenantSlot::Secondary(_) => {
1580 0 : self.tenant_slots_secondary.dec();
1581 0 : }
1582 0 : TenantSlot::InProgress(_) => {
1583 0 : self.tenant_slots_inprogress.dec();
1584 0 : }
1585 : }
1586 2 : }
1587 :
1588 : #[cfg(all(debug_assertions, not(test)))]
1589 0 : pub(crate) fn slots_total(&self) -> u64 {
1590 0 : self.tenant_slots_attached.get()
1591 0 : + self.tenant_slots_secondary.get()
1592 0 : + self.tenant_slots_inprogress.get()
1593 0 : }
1594 : }
1595 :
1596 2 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1597 2 : let tenant_slots = register_uint_gauge_vec!(
1598 : "pageserver_tenant_manager_slots",
1599 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1600 : &["mode"]
1601 : )
1602 2 : .expect("failed to define a metric");
1603 2 : TenantManagerMetrics {
1604 2 : tenant_slots_attached: tenant_slots
1605 2 : .get_metric_with_label_values(&["attached"])
1606 2 : .unwrap(),
1607 2 : tenant_slots_secondary: tenant_slots
1608 2 : .get_metric_with_label_values(&["secondary"])
1609 2 : .unwrap(),
1610 2 : tenant_slots_inprogress: tenant_slots
1611 2 : .get_metric_with_label_values(&["inprogress"])
1612 2 : .unwrap(),
1613 2 : tenant_slot_writes: register_int_counter!(
1614 : "pageserver_tenant_manager_slot_writes",
1615 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1616 2 : )
1617 2 : .expect("failed to define a metric"),
1618 2 : unexpected_errors: register_int_counter!(
1619 : "pageserver_tenant_manager_unexpected_errors_total",
1620 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1621 2 : )
1622 2 : .expect("failed to define a metric"),
1623 2 : }
1624 2 : });
1625 :
1626 : pub(crate) struct DeletionQueueMetrics {
1627 : pub(crate) keys_submitted: IntCounter,
1628 : pub(crate) keys_dropped: IntCounter,
1629 : pub(crate) keys_executed: IntCounter,
1630 : pub(crate) keys_validated: IntCounter,
1631 : pub(crate) dropped_lsn_updates: IntCounter,
1632 : pub(crate) unexpected_errors: IntCounter,
1633 : pub(crate) remote_errors: IntCounterVec,
1634 : }
1635 25 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1636 25 : DeletionQueueMetrics{
1637 25 :
1638 25 : keys_submitted: register_int_counter!(
1639 : "pageserver_deletion_queue_submitted_total",
1640 : "Number of objects submitted for deletion"
1641 25 : )
1642 25 : .expect("failed to define a metric"),
1643 25 :
1644 25 : keys_dropped: register_int_counter!(
1645 : "pageserver_deletion_queue_dropped_total",
1646 : "Number of object deletions dropped due to stale generation."
1647 25 : )
1648 25 : .expect("failed to define a metric"),
1649 25 :
1650 25 : keys_executed: register_int_counter!(
1651 : "pageserver_deletion_queue_executed_total",
1652 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1653 25 : )
1654 25 : .expect("failed to define a metric"),
1655 25 :
1656 25 : keys_validated: register_int_counter!(
1657 : "pageserver_deletion_queue_validated_total",
1658 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1659 25 : )
1660 25 : .expect("failed to define a metric"),
1661 25 :
1662 25 : dropped_lsn_updates: register_int_counter!(
1663 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1664 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1665 25 : )
1666 25 : .expect("failed to define a metric"),
1667 25 : unexpected_errors: register_int_counter!(
1668 : "pageserver_deletion_queue_unexpected_errors_total",
1669 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1670 25 : )
1671 25 : .expect("failed to define a metric"),
1672 25 : remote_errors: register_int_counter_vec!(
1673 : "pageserver_deletion_queue_remote_errors_total",
1674 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1675 : &["op_kind"],
1676 25 : )
1677 25 : .expect("failed to define a metric")
1678 25 : }
1679 25 : });
1680 :
1681 : pub(crate) struct SecondaryModeMetrics {
1682 : pub(crate) upload_heatmap: IntCounter,
1683 : pub(crate) upload_heatmap_errors: IntCounter,
1684 : pub(crate) upload_heatmap_duration: Histogram,
1685 : pub(crate) download_heatmap: IntCounter,
1686 : pub(crate) download_layer: IntCounter,
1687 : }
1688 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
1689 0 : SecondaryModeMetrics {
1690 0 : upload_heatmap: register_int_counter!(
1691 : "pageserver_secondary_upload_heatmap",
1692 : "Number of heatmaps written to remote storage by attached tenants"
1693 0 : )
1694 0 : .expect("failed to define a metric"),
1695 0 : upload_heatmap_errors: register_int_counter!(
1696 : "pageserver_secondary_upload_heatmap_errors",
1697 : "Failures writing heatmap to remote storage"
1698 0 : )
1699 0 : .expect("failed to define a metric"),
1700 0 : upload_heatmap_duration: register_histogram!(
1701 : "pageserver_secondary_upload_heatmap_duration",
1702 : "Time to build and upload a heatmap, including any waiting inside the S3 client"
1703 0 : )
1704 0 : .expect("failed to define a metric"),
1705 0 : download_heatmap: register_int_counter!(
1706 : "pageserver_secondary_download_heatmap",
1707 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
1708 0 : )
1709 0 : .expect("failed to define a metric"),
1710 0 : download_layer: register_int_counter!(
1711 : "pageserver_secondary_download_layer",
1712 : "Number of downloads of layers by secondary mode locations"
1713 0 : )
1714 0 : .expect("failed to define a metric"),
1715 0 : }
1716 0 : });
1717 :
1718 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1719 : register_uint_gauge_vec!(
1720 : "pageserver_secondary_resident_physical_size",
1721 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
1722 : &["tenant_id", "shard_id"]
1723 : )
1724 0 : .expect("failed to define a metric")
1725 0 : });
1726 :
1727 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1728 : pub enum RemoteOpKind {
1729 : Upload,
1730 : Download,
1731 : Delete,
1732 : }
1733 : impl RemoteOpKind {
1734 13262 : pub fn as_str(&self) -> &'static str {
1735 13262 : match self {
1736 12490 : Self::Upload => "upload",
1737 52 : Self::Download => "download",
1738 720 : Self::Delete => "delete",
1739 : }
1740 13262 : }
1741 : }
1742 :
1743 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1744 : pub enum RemoteOpFileKind {
1745 : Layer,
1746 : Index,
1747 : }
1748 : impl RemoteOpFileKind {
1749 13262 : pub fn as_str(&self) -> &'static str {
1750 13262 : match self {
1751 9090 : Self::Layer => "layer",
1752 4172 : Self::Index => "index",
1753 : }
1754 13262 : }
1755 : }
1756 :
1757 134 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1758 : register_histogram_vec!(
1759 : "pageserver_remote_operation_seconds",
1760 : "Time spent on remote storage operations. \
1761 : Grouped by tenant, timeline, operation_kind and status. \
1762 : Does not account for time spent waiting in remote timeline client's queues.",
1763 : &["file_kind", "op_kind", "status"]
1764 : )
1765 134 : .expect("failed to define a metric")
1766 134 : });
1767 :
1768 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1769 : register_int_counter_vec!(
1770 : "pageserver_tenant_task_events",
1771 : "Number of task start/stop/fail events.",
1772 : &["event"],
1773 : )
1774 0 : .expect("Failed to register tenant_task_events metric")
1775 0 : });
1776 :
1777 20 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
1778 : register_int_counter_pair_vec!(
1779 : "pageserver_background_loop_semaphore_wait_start_count",
1780 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1781 : "pageserver_background_loop_semaphore_wait_finish_count",
1782 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1783 : &["task"],
1784 : )
1785 20 : .unwrap()
1786 20 : });
1787 :
1788 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
1789 : register_int_counter_vec!(
1790 : "pageserver_background_loop_period_overrun_count",
1791 : "Incremented whenever warn_when_period_overrun() logs a warning.",
1792 : &["task", "period"],
1793 : )
1794 0 : .expect("failed to define a metric")
1795 0 : });
1796 :
1797 : // walreceiver metrics
1798 :
1799 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
1800 : register_int_counter!(
1801 : "pageserver_walreceiver_started_connections_total",
1802 : "Number of started walreceiver connections"
1803 : )
1804 0 : .expect("failed to define a metric")
1805 0 : });
1806 :
1807 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
1808 : register_int_gauge!(
1809 : "pageserver_walreceiver_active_managers",
1810 : "Number of active walreceiver managers"
1811 : )
1812 0 : .expect("failed to define a metric")
1813 0 : });
1814 :
1815 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
1816 : register_int_counter_vec!(
1817 : "pageserver_walreceiver_switches_total",
1818 : "Number of walreceiver manager change_connection calls",
1819 : &["reason"]
1820 : )
1821 0 : .expect("failed to define a metric")
1822 0 : });
1823 :
1824 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
1825 : register_int_counter!(
1826 : "pageserver_walreceiver_broker_updates_total",
1827 : "Number of received broker updates in walreceiver"
1828 : )
1829 0 : .expect("failed to define a metric")
1830 0 : });
1831 :
1832 2 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1833 : register_int_counter_vec!(
1834 : "pageserver_walreceiver_candidates_events_total",
1835 : "Number of walreceiver candidate events",
1836 : &["event"]
1837 : )
1838 2 : .expect("failed to define a metric")
1839 2 : });
1840 :
1841 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
1842 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
1843 :
1844 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
1845 2 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
1846 :
1847 : // Metrics collected on WAL redo operations
1848 : //
1849 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
1850 : // for access to the postgres process ('wait') since there is only one for
1851 : // each tenant.
1852 :
1853 : /// Time buckets are small because we want to be able to measure the
1854 : /// smallest redo processing times. These buckets allow us to measure down
1855 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
1856 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
1857 : ///
1858 : /// Values up to 1s are recorded because metrics show that we have redo
1859 : /// durations and lock times larger than 0.250s.
1860 : macro_rules! redo_histogram_time_buckets {
1861 : () => {
1862 : vec![
1863 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
1864 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
1865 : 1.000_000,
1866 : ]
1867 : };
1868 : }
1869 :
1870 : /// While we're at it, also measure the amount of records replayed in each
1871 : /// operation. We have a global 'total replayed' counter, but that's not
1872 : /// as useful as 'what is the skew for how many records we replay in one
1873 : /// operation'.
1874 : macro_rules! redo_histogram_count_buckets {
1875 : () => {
1876 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
1877 : };
1878 : }
1879 :
1880 : macro_rules! redo_bytes_histogram_count_buckets {
1881 : () => {
1882 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
1883 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
1884 : vec![
1885 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
1886 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
1887 : ]
1888 : };
1889 : }
1890 :
1891 : pub(crate) struct WalIngestMetrics {
1892 : pub(crate) bytes_received: IntCounter,
1893 : pub(crate) records_received: IntCounter,
1894 : pub(crate) records_committed: IntCounter,
1895 : pub(crate) records_filtered: IntCounter,
1896 : }
1897 :
1898 2 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
1899 : bytes_received: register_int_counter!(
1900 : "pageserver_wal_ingest_bytes_received",
1901 : "Bytes of WAL ingested from safekeepers",
1902 : )
1903 2 : .unwrap(),
1904 : records_received: register_int_counter!(
1905 : "pageserver_wal_ingest_records_received",
1906 : "Number of WAL records received from safekeepers"
1907 : )
1908 2 : .expect("failed to define a metric"),
1909 : records_committed: register_int_counter!(
1910 : "pageserver_wal_ingest_records_committed",
1911 : "Number of WAL records which resulted in writes to pageserver storage"
1912 : )
1913 2 : .expect("failed to define a metric"),
1914 : records_filtered: register_int_counter!(
1915 : "pageserver_wal_ingest_records_filtered",
1916 : "Number of WAL records filtered out due to sharding"
1917 : )
1918 2 : .expect("failed to define a metric"),
1919 2 : });
1920 :
1921 6 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
1922 : register_histogram!(
1923 : "pageserver_wal_redo_seconds",
1924 : "Time spent on WAL redo",
1925 : redo_histogram_time_buckets!()
1926 : )
1927 6 : .expect("failed to define a metric")
1928 6 : });
1929 :
1930 6 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1931 : register_histogram!(
1932 : "pageserver_wal_redo_records_histogram",
1933 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
1934 : redo_histogram_count_buckets!(),
1935 : )
1936 6 : .expect("failed to define a metric")
1937 6 : });
1938 :
1939 6 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1940 : register_histogram!(
1941 : "pageserver_wal_redo_bytes_histogram",
1942 : "Histogram of number of records replayed per redo sent to Postgres",
1943 : redo_bytes_histogram_count_buckets!(),
1944 : )
1945 6 : .expect("failed to define a metric")
1946 6 : });
1947 :
1948 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
1949 6 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
1950 : register_int_counter!(
1951 : "pageserver_replayed_wal_records_total",
1952 : "Number of WAL records replayed in WAL redo process"
1953 : )
1954 6 : .unwrap()
1955 6 : });
1956 :
1957 : #[rustfmt::skip]
1958 6 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
1959 : register_histogram!(
1960 : "pageserver_wal_redo_process_launch_duration",
1961 : "Histogram of the duration of successful WalRedoProcess::launch calls",
1962 : vec![
1963 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
1964 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
1965 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
1966 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
1967 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
1968 : ],
1969 : )
1970 6 : .expect("failed to define a metric")
1971 6 : });
1972 :
1973 : pub(crate) struct WalRedoProcessCounters {
1974 : pub(crate) started: IntCounter,
1975 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
1976 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
1977 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
1978 : }
1979 :
1980 18 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
1981 : pub(crate) enum WalRedoKillCause {
1982 : WalRedoProcessDrop,
1983 : NoLeakChildDrop,
1984 : Startup,
1985 : }
1986 :
1987 : impl Default for WalRedoProcessCounters {
1988 6 : fn default() -> Self {
1989 6 : let started = register_int_counter!(
1990 : "pageserver_wal_redo_process_started_total",
1991 : "Number of WAL redo processes started",
1992 : )
1993 6 : .unwrap();
1994 6 :
1995 6 : let killed = register_int_counter_vec!(
1996 : "pageserver_wal_redo_process_stopped_total",
1997 : "Number of WAL redo processes stopped",
1998 : &["cause"],
1999 : )
2000 6 : .unwrap();
2001 6 :
2002 6 : let active_stderr_logger_tasks_started = register_int_counter!(
2003 : "pageserver_walredo_stderr_logger_tasks_started_total",
2004 : "Number of active walredo stderr logger tasks that have started",
2005 : )
2006 6 : .unwrap();
2007 6 :
2008 6 : let active_stderr_logger_tasks_finished = register_int_counter!(
2009 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2010 : "Number of active walredo stderr logger tasks that have finished",
2011 : )
2012 6 : .unwrap();
2013 6 :
2014 6 : Self {
2015 6 : started,
2016 18 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2017 18 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
2018 18 : let cause_str: &'static str = cause.into();
2019 18 : killed.with_label_values(&[cause_str])
2020 18 : })),
2021 6 : active_stderr_logger_tasks_started,
2022 6 : active_stderr_logger_tasks_finished,
2023 6 : }
2024 6 : }
2025 : }
2026 :
2027 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2028 : Lazy::new(WalRedoProcessCounters::default);
2029 :
2030 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2031 : pub(crate) struct StorageTimeMetricsTimer {
2032 : metrics: StorageTimeMetrics,
2033 : start: Instant,
2034 : }
2035 :
2036 : impl StorageTimeMetricsTimer {
2037 4611 : fn new(metrics: StorageTimeMetrics) -> Self {
2038 4611 : Self {
2039 4611 : metrics,
2040 4611 : start: Instant::now(),
2041 4611 : }
2042 4611 : }
2043 :
2044 : /// Record the time from creation to now.
2045 3502 : pub fn stop_and_record(self) {
2046 3502 : let duration = self.start.elapsed().as_secs_f64();
2047 3502 : self.metrics.timeline_sum.inc_by(duration);
2048 3502 : self.metrics.timeline_count.inc();
2049 3502 : self.metrics.global_histogram.observe(duration);
2050 3502 : }
2051 :
2052 : /// Turns this timer into a timer, which will always record -- usually this means recording
2053 : /// regardless an early `?` path was taken in a function.
2054 754 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2055 754 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2056 754 : }
2057 : }
2058 :
2059 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2060 :
2061 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2062 754 : fn drop(&mut self) {
2063 754 : if let Some(inner) = self.0.take() {
2064 754 : inner.stop_and_record();
2065 754 : }
2066 754 : }
2067 : }
2068 :
2069 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2070 : /// timeline total sum and count.
2071 : #[derive(Clone, Debug)]
2072 : pub(crate) struct StorageTimeMetrics {
2073 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2074 : timeline_sum: Counter,
2075 : /// Number of oeprations, per operation, tenant_id and timeline_id
2076 : timeline_count: IntCounter,
2077 : /// Global histogram having only the "operation" label.
2078 : global_histogram: Histogram,
2079 : }
2080 :
2081 : impl StorageTimeMetrics {
2082 3072 : pub fn new(
2083 3072 : operation: StorageTimeOperation,
2084 3072 : tenant_id: &str,
2085 3072 : shard_id: &str,
2086 3072 : timeline_id: &str,
2087 3072 : ) -> Self {
2088 3072 : let operation: &'static str = operation.into();
2089 3072 :
2090 3072 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2091 3072 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2092 3072 : .unwrap();
2093 3072 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2094 3072 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2095 3072 : .unwrap();
2096 3072 : let global_histogram = STORAGE_TIME_GLOBAL
2097 3072 : .get_metric_with_label_values(&[operation])
2098 3072 : .unwrap();
2099 3072 :
2100 3072 : StorageTimeMetrics {
2101 3072 : timeline_sum,
2102 3072 : timeline_count,
2103 3072 : global_histogram,
2104 3072 : }
2105 3072 : }
2106 :
2107 : /// Starts timing a new operation.
2108 : ///
2109 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2110 4611 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2111 4611 : StorageTimeMetricsTimer::new(self.clone())
2112 4611 : }
2113 : }
2114 :
2115 : #[derive(Debug)]
2116 : pub(crate) struct TimelineMetrics {
2117 : tenant_id: String,
2118 : shard_id: String,
2119 : timeline_id: String,
2120 : pub flush_time_histo: StorageTimeMetrics,
2121 : pub compact_time_histo: StorageTimeMetrics,
2122 : pub create_images_time_histo: StorageTimeMetrics,
2123 : pub logical_size_histo: StorageTimeMetrics,
2124 : pub imitate_logical_size_histo: StorageTimeMetrics,
2125 : pub load_layer_map_histo: StorageTimeMetrics,
2126 : pub garbage_collect_histo: StorageTimeMetrics,
2127 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2128 : pub last_record_gauge: IntGauge,
2129 : pub pitr_history_size: UIntGauge,
2130 : pub archival_size: UIntGauge,
2131 : pub standby_horizon_gauge: IntGauge,
2132 : pub resident_physical_size_gauge: UIntGauge,
2133 : /// copy of LayeredTimeline.current_logical_size
2134 : pub current_logical_size_gauge: UIntGauge,
2135 : pub aux_file_size_gauge: IntGauge,
2136 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2137 : pub evictions: IntCounter,
2138 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2139 : /// Number of valid LSN leases.
2140 : pub valid_lsn_lease_count_gauge: UIntGauge,
2141 : shutdown: std::sync::atomic::AtomicBool,
2142 : }
2143 :
2144 : impl TimelineMetrics {
2145 384 : pub fn new(
2146 384 : tenant_shard_id: &TenantShardId,
2147 384 : timeline_id_raw: &TimelineId,
2148 384 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2149 384 : ) -> Self {
2150 384 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2151 384 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2152 384 : let timeline_id = timeline_id_raw.to_string();
2153 384 : let flush_time_histo = StorageTimeMetrics::new(
2154 384 : StorageTimeOperation::LayerFlush,
2155 384 : &tenant_id,
2156 384 : &shard_id,
2157 384 : &timeline_id,
2158 384 : );
2159 384 : let compact_time_histo = StorageTimeMetrics::new(
2160 384 : StorageTimeOperation::Compact,
2161 384 : &tenant_id,
2162 384 : &shard_id,
2163 384 : &timeline_id,
2164 384 : );
2165 384 : let create_images_time_histo = StorageTimeMetrics::new(
2166 384 : StorageTimeOperation::CreateImages,
2167 384 : &tenant_id,
2168 384 : &shard_id,
2169 384 : &timeline_id,
2170 384 : );
2171 384 : let logical_size_histo = StorageTimeMetrics::new(
2172 384 : StorageTimeOperation::LogicalSize,
2173 384 : &tenant_id,
2174 384 : &shard_id,
2175 384 : &timeline_id,
2176 384 : );
2177 384 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2178 384 : StorageTimeOperation::ImitateLogicalSize,
2179 384 : &tenant_id,
2180 384 : &shard_id,
2181 384 : &timeline_id,
2182 384 : );
2183 384 : let load_layer_map_histo = StorageTimeMetrics::new(
2184 384 : StorageTimeOperation::LoadLayerMap,
2185 384 : &tenant_id,
2186 384 : &shard_id,
2187 384 : &timeline_id,
2188 384 : );
2189 384 : let garbage_collect_histo = StorageTimeMetrics::new(
2190 384 : StorageTimeOperation::Gc,
2191 384 : &tenant_id,
2192 384 : &shard_id,
2193 384 : &timeline_id,
2194 384 : );
2195 384 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2196 384 : StorageTimeOperation::FindGcCutoffs,
2197 384 : &tenant_id,
2198 384 : &shard_id,
2199 384 : &timeline_id,
2200 384 : );
2201 384 : let last_record_gauge = LAST_RECORD_LSN
2202 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2203 384 : .unwrap();
2204 384 :
2205 384 : let pitr_history_size = PITR_HISTORY_SIZE
2206 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2207 384 : .unwrap();
2208 384 :
2209 384 : let archival_size = TIMELINE_ARCHIVE_SIZE
2210 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2211 384 : .unwrap();
2212 384 :
2213 384 : let standby_horizon_gauge = STANDBY_HORIZON
2214 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2215 384 : .unwrap();
2216 384 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2217 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2218 384 : .unwrap();
2219 384 : // TODO: we shouldn't expose this metric
2220 384 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2221 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2222 384 : .unwrap();
2223 384 : let aux_file_size_gauge = AUX_FILE_SIZE
2224 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2225 384 : .unwrap();
2226 384 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2227 384 : let directory_entries_count_gauge_closure = {
2228 384 : let tenant_shard_id = *tenant_shard_id;
2229 384 : let timeline_id_raw = *timeline_id_raw;
2230 0 : move || {
2231 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2232 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2233 0 : let timeline_id = timeline_id_raw.to_string();
2234 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2235 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2236 0 : .unwrap();
2237 0 : gauge
2238 0 : }
2239 : };
2240 384 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2241 384 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2242 384 : let evictions = EVICTIONS
2243 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2244 384 : .unwrap();
2245 384 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2246 384 : .build(&tenant_id, &shard_id, &timeline_id);
2247 384 :
2248 384 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2249 384 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2250 384 : .unwrap();
2251 384 :
2252 384 : TimelineMetrics {
2253 384 : tenant_id,
2254 384 : shard_id,
2255 384 : timeline_id,
2256 384 : flush_time_histo,
2257 384 : compact_time_histo,
2258 384 : create_images_time_histo,
2259 384 : logical_size_histo,
2260 384 : imitate_logical_size_histo,
2261 384 : garbage_collect_histo,
2262 384 : find_gc_cutoffs_histo,
2263 384 : load_layer_map_histo,
2264 384 : last_record_gauge,
2265 384 : pitr_history_size,
2266 384 : archival_size,
2267 384 : standby_horizon_gauge,
2268 384 : resident_physical_size_gauge,
2269 384 : current_logical_size_gauge,
2270 384 : aux_file_size_gauge,
2271 384 : directory_entries_count_gauge,
2272 384 : evictions,
2273 384 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2274 384 : evictions_with_low_residence_duration,
2275 384 : ),
2276 384 : valid_lsn_lease_count_gauge,
2277 384 : shutdown: std::sync::atomic::AtomicBool::default(),
2278 384 : }
2279 384 : }
2280 :
2281 1470 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2282 1470 : self.resident_physical_size_add(sz);
2283 1470 : }
2284 :
2285 449 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2286 449 : self.resident_physical_size_gauge.sub(sz);
2287 449 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2288 449 : }
2289 :
2290 1500 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2291 1500 : self.resident_physical_size_gauge.add(sz);
2292 1500 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2293 1500 : }
2294 :
2295 8 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2296 8 : self.resident_physical_size_gauge.get()
2297 8 : }
2298 :
2299 8 : pub(crate) fn shutdown(&self) {
2300 8 : let was_shutdown = self
2301 8 : .shutdown
2302 8 : .swap(true, std::sync::atomic::Ordering::Relaxed);
2303 8 :
2304 8 : if was_shutdown {
2305 : // this happens on tenant deletion because tenant first shuts down timelines, then
2306 : // invokes timeline deletion which first shuts down the timeline again.
2307 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
2308 0 : return;
2309 8 : }
2310 8 :
2311 8 : let tenant_id = &self.tenant_id;
2312 8 : let timeline_id = &self.timeline_id;
2313 8 : let shard_id = &self.shard_id;
2314 8 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2315 8 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2316 8 : {
2317 8 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2318 8 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2319 8 : }
2320 8 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2321 8 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2322 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2323 8 : }
2324 :
2325 8 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2326 8 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2327 8 :
2328 8 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2329 8 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2330 8 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2331 8 :
2332 8 : self.evictions_with_low_residence_duration
2333 8 : .write()
2334 8 : .unwrap()
2335 8 : .remove(tenant_id, shard_id, timeline_id);
2336 :
2337 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2338 : // removed at the end of it. The idea is to have the metrics outlive the
2339 : // entity during which they're observed, e.g., the smgr metrics shall
2340 : // outlive an individual smgr connection, but not the timeline.
2341 :
2342 72 : for op in StorageTimeOperation::VARIANTS {
2343 64 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2344 64 : op,
2345 64 : tenant_id,
2346 64 : shard_id,
2347 64 : timeline_id,
2348 64 : ]);
2349 64 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2350 64 : op,
2351 64 : tenant_id,
2352 64 : shard_id,
2353 64 : timeline_id,
2354 64 : ]);
2355 64 : }
2356 :
2357 24 : for op in STORAGE_IO_SIZE_OPERATIONS {
2358 16 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2359 16 : }
2360 :
2361 8 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2362 8 : SmgrQueryType::GetPageAtLsn.into(),
2363 8 : tenant_id,
2364 8 : shard_id,
2365 8 : timeline_id,
2366 8 : ]);
2367 8 : }
2368 : }
2369 :
2370 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2371 6 : // Only shard zero deals in synthetic sizes
2372 6 : if tenant_shard_id.is_shard_zero() {
2373 6 : let tid = tenant_shard_id.tenant_id.to_string();
2374 6 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2375 6 : }
2376 :
2377 : // we leave the BROKEN_TENANTS_SET entry if any
2378 6 : }
2379 :
2380 : use futures::Future;
2381 : use pin_project_lite::pin_project;
2382 : use std::collections::HashMap;
2383 : use std::num::NonZeroUsize;
2384 : use std::pin::Pin;
2385 : use std::sync::atomic::AtomicU64;
2386 : use std::sync::{Arc, Mutex};
2387 : use std::task::{Context, Poll};
2388 : use std::time::{Duration, Instant};
2389 :
2390 : use crate::context::{PageContentKind, RequestContext};
2391 : use crate::task_mgr::TaskKind;
2392 : use crate::tenant::mgr::TenantSlot;
2393 :
2394 : /// Maintain a per timeline gauge in addition to the global gauge.
2395 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
2396 : last_set: AtomicU64,
2397 : gauge: UIntGauge,
2398 : }
2399 :
2400 : impl PerTimelineRemotePhysicalSizeGauge {
2401 394 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2402 394 : Self {
2403 394 : last_set: AtomicU64::new(0),
2404 394 : gauge: per_timeline_gauge,
2405 394 : }
2406 394 : }
2407 1742 : pub(crate) fn set(&self, sz: u64) {
2408 1742 : self.gauge.set(sz);
2409 1742 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
2410 1742 : if sz < prev {
2411 26 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
2412 1716 : } else {
2413 1716 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
2414 1716 : };
2415 1742 : }
2416 2 : pub(crate) fn get(&self) -> u64 {
2417 2 : self.gauge.get()
2418 2 : }
2419 : }
2420 :
2421 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2422 18 : fn drop(&mut self) {
2423 18 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
2424 18 : }
2425 : }
2426 :
2427 : pub(crate) struct RemoteTimelineClientMetrics {
2428 : tenant_id: String,
2429 : shard_id: String,
2430 : timeline_id: String,
2431 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
2432 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
2433 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2434 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2435 : }
2436 :
2437 : impl RemoteTimelineClientMetrics {
2438 394 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2439 394 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
2440 394 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
2441 394 : let timeline_id_str = timeline_id.to_string();
2442 394 :
2443 394 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
2444 394 : REMOTE_PHYSICAL_SIZE
2445 394 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
2446 394 : .unwrap(),
2447 394 : );
2448 394 :
2449 394 : RemoteTimelineClientMetrics {
2450 394 : tenant_id: tenant_id_str,
2451 394 : shard_id: shard_id_str,
2452 394 : timeline_id: timeline_id_str,
2453 394 : calls: Mutex::new(HashMap::default()),
2454 394 : bytes_started_counter: Mutex::new(HashMap::default()),
2455 394 : bytes_finished_counter: Mutex::new(HashMap::default()),
2456 394 : remote_physical_size_gauge,
2457 394 : }
2458 394 : }
2459 :
2460 2688 : pub fn remote_operation_time(
2461 2688 : &self,
2462 2688 : file_kind: &RemoteOpFileKind,
2463 2688 : op_kind: &RemoteOpKind,
2464 2688 : status: &'static str,
2465 2688 : ) -> Histogram {
2466 2688 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2467 2688 : REMOTE_OPERATION_TIME
2468 2688 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2469 2688 : .unwrap()
2470 2688 : }
2471 :
2472 6302 : fn calls_counter_pair(
2473 6302 : &self,
2474 6302 : file_kind: &RemoteOpFileKind,
2475 6302 : op_kind: &RemoteOpKind,
2476 6302 : ) -> IntCounterPair {
2477 6302 : let mut guard = self.calls.lock().unwrap();
2478 6302 : let key = (file_kind.as_str(), op_kind.as_str());
2479 6302 : let metric = guard.entry(key).or_insert_with(move || {
2480 682 : REMOTE_TIMELINE_CLIENT_CALLS
2481 682 : .get_metric_with_label_values(&[
2482 682 : &self.tenant_id,
2483 682 : &self.shard_id,
2484 682 : &self.timeline_id,
2485 682 : key.0,
2486 682 : key.1,
2487 682 : ])
2488 682 : .unwrap()
2489 6302 : });
2490 6302 : metric.clone()
2491 6302 : }
2492 :
2493 1478 : fn bytes_started_counter(
2494 1478 : &self,
2495 1478 : file_kind: &RemoteOpFileKind,
2496 1478 : op_kind: &RemoteOpKind,
2497 1478 : ) -> IntCounter {
2498 1478 : let mut guard = self.bytes_started_counter.lock().unwrap();
2499 1478 : let key = (file_kind.as_str(), op_kind.as_str());
2500 1478 : let metric = guard.entry(key).or_insert_with(move || {
2501 258 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2502 258 : .get_metric_with_label_values(&[
2503 258 : &self.tenant_id,
2504 258 : &self.shard_id,
2505 258 : &self.timeline_id,
2506 258 : key.0,
2507 258 : key.1,
2508 258 : ])
2509 258 : .unwrap()
2510 1478 : });
2511 1478 : metric.clone()
2512 1478 : }
2513 :
2514 2782 : fn bytes_finished_counter(
2515 2782 : &self,
2516 2782 : file_kind: &RemoteOpFileKind,
2517 2782 : op_kind: &RemoteOpKind,
2518 2782 : ) -> IntCounter {
2519 2782 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2520 2782 : let key = (file_kind.as_str(), op_kind.as_str());
2521 2782 : let metric = guard.entry(key).or_insert_with(move || {
2522 258 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2523 258 : .get_metric_with_label_values(&[
2524 258 : &self.tenant_id,
2525 258 : &self.shard_id,
2526 258 : &self.timeline_id,
2527 258 : key.0,
2528 258 : key.1,
2529 258 : ])
2530 258 : .unwrap()
2531 2782 : });
2532 2782 : metric.clone()
2533 2782 : }
2534 : }
2535 :
2536 : #[cfg(test)]
2537 : impl RemoteTimelineClientMetrics {
2538 6 : pub fn get_bytes_started_counter_value(
2539 6 : &self,
2540 6 : file_kind: &RemoteOpFileKind,
2541 6 : op_kind: &RemoteOpKind,
2542 6 : ) -> Option<u64> {
2543 6 : let guard = self.bytes_started_counter.lock().unwrap();
2544 6 : let key = (file_kind.as_str(), op_kind.as_str());
2545 6 : guard.get(&key).map(|counter| counter.get())
2546 6 : }
2547 :
2548 6 : pub fn get_bytes_finished_counter_value(
2549 6 : &self,
2550 6 : file_kind: &RemoteOpFileKind,
2551 6 : op_kind: &RemoteOpKind,
2552 6 : ) -> Option<u64> {
2553 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2554 6 : let key = (file_kind.as_str(), op_kind.as_str());
2555 6 : guard.get(&key).map(|counter| counter.get())
2556 6 : }
2557 : }
2558 :
2559 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2560 : #[must_use]
2561 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2562 : /// Decremented on drop.
2563 : calls_counter_pair: Option<IntCounterPair>,
2564 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2565 : bytes_finished: Option<(IntCounter, u64)>,
2566 : }
2567 :
2568 : impl RemoteTimelineClientCallMetricGuard {
2569 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2570 : /// The caller vouches to do the metric updates manually.
2571 3329 : pub fn will_decrement_manually(mut self) {
2572 3329 : let RemoteTimelineClientCallMetricGuard {
2573 3329 : calls_counter_pair,
2574 3329 : bytes_finished,
2575 3329 : } = &mut self;
2576 3329 : calls_counter_pair.take();
2577 3329 : bytes_finished.take();
2578 3329 : }
2579 : }
2580 :
2581 : impl Drop for RemoteTimelineClientCallMetricGuard {
2582 3355 : fn drop(&mut self) {
2583 3355 : let RemoteTimelineClientCallMetricGuard {
2584 3355 : calls_counter_pair,
2585 3355 : bytes_finished,
2586 3355 : } = self;
2587 3355 : if let Some(guard) = calls_counter_pair.take() {
2588 26 : guard.dec();
2589 3329 : }
2590 3355 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2591 0 : bytes_finished_metric.inc_by(*value);
2592 3355 : }
2593 3355 : }
2594 : }
2595 :
2596 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2597 : /// track the byte size of this call in applicable metric(s).
2598 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2599 : /// Do not account for this call's byte size in any metrics.
2600 : /// The `reason` field is there to make the call sites self-documenting
2601 : /// about why they don't need the metric.
2602 : DontTrackSize { reason: &'static str },
2603 : /// Track the byte size of the call in applicable metric(s).
2604 : Bytes(u64),
2605 : }
2606 :
2607 : impl RemoteTimelineClientMetrics {
2608 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2609 : ///
2610 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2611 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2612 : /// is more suitable.
2613 : /// Never do both.
2614 3355 : pub(crate) fn call_begin(
2615 3355 : &self,
2616 3355 : file_kind: &RemoteOpFileKind,
2617 3355 : op_kind: &RemoteOpKind,
2618 3355 : size: RemoteTimelineClientMetricsCallTrackSize,
2619 3355 : ) -> RemoteTimelineClientCallMetricGuard {
2620 3355 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2621 3355 : calls_counter_pair.inc();
2622 :
2623 3355 : let bytes_finished = match size {
2624 1877 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2625 1877 : // nothing to do
2626 1877 : None
2627 : }
2628 1478 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2629 1478 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2630 1478 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2631 1478 : Some((finished_counter, size))
2632 : }
2633 : };
2634 3355 : RemoteTimelineClientCallMetricGuard {
2635 3355 : calls_counter_pair: Some(calls_counter_pair),
2636 3355 : bytes_finished,
2637 3355 : }
2638 3355 : }
2639 :
2640 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2641 : /// Using the guard object is generally preferable.
2642 : /// See [`call_begin`](Self::call_begin) for more context.
2643 2947 : pub(crate) fn call_end(
2644 2947 : &self,
2645 2947 : file_kind: &RemoteOpFileKind,
2646 2947 : op_kind: &RemoteOpKind,
2647 2947 : size: RemoteTimelineClientMetricsCallTrackSize,
2648 2947 : ) {
2649 2947 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2650 2947 : calls_counter_pair.dec();
2651 2947 : match size {
2652 1643 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2653 1304 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2654 1304 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2655 1304 : }
2656 : }
2657 2947 : }
2658 : }
2659 :
2660 : impl Drop for RemoteTimelineClientMetrics {
2661 18 : fn drop(&mut self) {
2662 18 : let RemoteTimelineClientMetrics {
2663 18 : tenant_id,
2664 18 : shard_id,
2665 18 : timeline_id,
2666 18 : remote_physical_size_gauge,
2667 18 : calls,
2668 18 : bytes_started_counter,
2669 18 : bytes_finished_counter,
2670 18 : } = self;
2671 24 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
2672 24 : let mut res = [Ok(()), Ok(())];
2673 24 : REMOTE_TIMELINE_CLIENT_CALLS
2674 24 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
2675 24 : // don't care about results
2676 24 : }
2677 18 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2678 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2679 6 : tenant_id,
2680 6 : shard_id,
2681 6 : timeline_id,
2682 6 : a,
2683 6 : b,
2684 6 : ]);
2685 6 : }
2686 18 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2687 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2688 6 : tenant_id,
2689 6 : shard_id,
2690 6 : timeline_id,
2691 6 : a,
2692 6 : b,
2693 6 : ]);
2694 6 : }
2695 18 : {
2696 18 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2697 18 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2698 18 : }
2699 18 : }
2700 : }
2701 :
2702 : /// Wrapper future that measures the time spent by a remote storage operation,
2703 : /// and records the time and success/failure as a prometheus metric.
2704 : pub(crate) trait MeasureRemoteOp: Sized {
2705 2722 : fn measure_remote_op(
2706 2722 : self,
2707 2722 : file_kind: RemoteOpFileKind,
2708 2722 : op: RemoteOpKind,
2709 2722 : metrics: Arc<RemoteTimelineClientMetrics>,
2710 2722 : ) -> MeasuredRemoteOp<Self> {
2711 2722 : let start = Instant::now();
2712 2722 : MeasuredRemoteOp {
2713 2722 : inner: self,
2714 2722 : file_kind,
2715 2722 : op,
2716 2722 : start,
2717 2722 : metrics,
2718 2722 : }
2719 2722 : }
2720 : }
2721 :
2722 : impl<T: Sized> MeasureRemoteOp for T {}
2723 :
2724 : pin_project! {
2725 : pub(crate) struct MeasuredRemoteOp<F>
2726 : {
2727 : #[pin]
2728 : inner: F,
2729 : file_kind: RemoteOpFileKind,
2730 : op: RemoteOpKind,
2731 : start: Instant,
2732 : metrics: Arc<RemoteTimelineClientMetrics>,
2733 : }
2734 : }
2735 :
2736 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
2737 : type Output = Result<O, E>;
2738 :
2739 38140 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
2740 38140 : let this = self.project();
2741 38140 : let poll_result = this.inner.poll(cx);
2742 38140 : if let Poll::Ready(ref res) = poll_result {
2743 2688 : let duration = this.start.elapsed();
2744 2688 : let status = if res.is_ok() { &"success" } else { &"failure" };
2745 2688 : this.metrics
2746 2688 : .remote_operation_time(this.file_kind, this.op, status)
2747 2688 : .observe(duration.as_secs_f64());
2748 35452 : }
2749 38140 : poll_result
2750 38140 : }
2751 : }
2752 :
2753 : pub mod tokio_epoll_uring {
2754 : use metrics::{register_int_counter, UIntGauge};
2755 : use once_cell::sync::Lazy;
2756 :
2757 : pub struct Collector {
2758 : descs: Vec<metrics::core::Desc>,
2759 : systems_created: UIntGauge,
2760 : systems_destroyed: UIntGauge,
2761 : }
2762 :
2763 : impl metrics::core::Collector for Collector {
2764 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
2765 0 : self.descs.iter().collect()
2766 0 : }
2767 :
2768 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
2769 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
2770 0 : let tokio_epoll_uring::metrics::Metrics {
2771 0 : systems_created,
2772 0 : systems_destroyed,
2773 0 : } = tokio_epoll_uring::metrics::global();
2774 0 : self.systems_created.set(systems_created);
2775 0 : mfs.extend(self.systems_created.collect());
2776 0 : self.systems_destroyed.set(systems_destroyed);
2777 0 : mfs.extend(self.systems_destroyed.collect());
2778 0 : mfs
2779 0 : }
2780 : }
2781 :
2782 : impl Collector {
2783 : const NMETRICS: usize = 2;
2784 :
2785 : #[allow(clippy::new_without_default)]
2786 0 : pub fn new() -> Self {
2787 0 : let mut descs = Vec::new();
2788 0 :
2789 0 : let systems_created = UIntGauge::new(
2790 0 : "pageserver_tokio_epoll_uring_systems_created",
2791 0 : "counter of tokio-epoll-uring systems that were created",
2792 0 : )
2793 0 : .unwrap();
2794 0 : descs.extend(
2795 0 : metrics::core::Collector::desc(&systems_created)
2796 0 : .into_iter()
2797 0 : .cloned(),
2798 0 : );
2799 0 :
2800 0 : let systems_destroyed = UIntGauge::new(
2801 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
2802 0 : "counter of tokio-epoll-uring systems that were destroyed",
2803 0 : )
2804 0 : .unwrap();
2805 0 : descs.extend(
2806 0 : metrics::core::Collector::desc(&systems_destroyed)
2807 0 : .into_iter()
2808 0 : .cloned(),
2809 0 : );
2810 0 :
2811 0 : Self {
2812 0 : descs,
2813 0 : systems_created,
2814 0 : systems_destroyed,
2815 0 : }
2816 0 : }
2817 : }
2818 :
2819 82 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
2820 : register_int_counter!(
2821 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
2822 : "Number of times where thread_local_system creation spanned multiple executor threads",
2823 : )
2824 82 : .unwrap()
2825 82 : });
2826 :
2827 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
2828 : register_int_counter!(
2829 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
2830 : "Number of times thread_local_system creation failed and was retried after back-off.",
2831 : )
2832 0 : .unwrap()
2833 0 : });
2834 : }
2835 :
2836 : pub(crate) mod tenant_throttling {
2837 : use metrics::{register_int_counter_vec, IntCounter};
2838 : use once_cell::sync::Lazy;
2839 :
2840 : use crate::tenant::{self, throttle::Metric};
2841 :
2842 : pub(crate) struct TimelineGet {
2843 : wait_time: IntCounter,
2844 : count: IntCounter,
2845 : }
2846 :
2847 144 : pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
2848 144 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2849 144 : register_int_counter_vec!(
2850 144 : "pageserver_tenant_throttling_wait_usecs_sum_global",
2851 144 : "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
2852 144 : &["kind"]
2853 144 : )
2854 144 : .unwrap()
2855 144 : });
2856 144 :
2857 144 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
2858 144 : register_int_counter_vec!(
2859 144 : "pageserver_tenant_throttling_count_global",
2860 144 : "Count of tenant throttlings, by kind of throttle.",
2861 144 : &["kind"]
2862 144 : )
2863 144 : .unwrap()
2864 144 : });
2865 144 :
2866 144 : let kind = "timeline_get";
2867 144 : TimelineGet {
2868 144 : wait_time: WAIT_USECS.with_label_values(&[kind]),
2869 144 : count: WAIT_COUNT.with_label_values(&[kind]),
2870 144 : }
2871 144 : });
2872 :
2873 : impl Metric for &'static TimelineGet {
2874 : #[inline(always)]
2875 0 : fn observe_throttling(
2876 0 : &self,
2877 0 : tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
2878 0 : ) {
2879 0 : let val = u64::try_from(wait_time.as_micros()).unwrap();
2880 0 : self.wait_time.inc_by(val);
2881 0 : self.count.inc();
2882 0 : }
2883 : }
2884 : }
2885 :
2886 : pub(crate) mod disk_usage_based_eviction {
2887 : use super::*;
2888 :
2889 : pub(crate) struct Metrics {
2890 : pub(crate) tenant_collection_time: Histogram,
2891 : pub(crate) tenant_layer_count: Histogram,
2892 : pub(crate) layers_collected: IntCounter,
2893 : pub(crate) layers_selected: IntCounter,
2894 : pub(crate) layers_evicted: IntCounter,
2895 : }
2896 :
2897 : impl Default for Metrics {
2898 0 : fn default() -> Self {
2899 0 : let tenant_collection_time = register_histogram!(
2900 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
2901 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
2902 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
2903 : )
2904 0 : .unwrap();
2905 0 :
2906 0 : let tenant_layer_count = register_histogram!(
2907 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
2908 : "Amount of layers gathered from a tenant",
2909 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
2910 : )
2911 0 : .unwrap();
2912 0 :
2913 0 : let layers_collected = register_int_counter!(
2914 : "pageserver_disk_usage_based_eviction_collected_layers_total",
2915 : "Amount of layers collected"
2916 : )
2917 0 : .unwrap();
2918 0 :
2919 0 : let layers_selected = register_int_counter!(
2920 : "pageserver_disk_usage_based_eviction_select_layers_total",
2921 : "Amount of layers selected"
2922 : )
2923 0 : .unwrap();
2924 0 :
2925 0 : let layers_evicted = register_int_counter!(
2926 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
2927 : "Amount of layers successfully evicted"
2928 : )
2929 0 : .unwrap();
2930 0 :
2931 0 : Self {
2932 0 : tenant_collection_time,
2933 0 : tenant_layer_count,
2934 0 : layers_collected,
2935 0 : layers_selected,
2936 0 : layers_evicted,
2937 0 : }
2938 0 : }
2939 : }
2940 :
2941 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
2942 : }
2943 :
2944 138 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
2945 : register_uint_gauge_vec!(
2946 : "pageserver_tokio_executor_thread_configured_count",
2947 : "Total number of configued tokio executor threads in the process.
2948 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
2949 : &["setup"],
2950 : )
2951 138 : .unwrap()
2952 138 : });
2953 :
2954 138 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
2955 138 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
2956 138 : let _guard = SERIALIZE.lock().unwrap();
2957 138 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
2958 138 : TOKIO_EXECUTOR_THREAD_COUNT
2959 138 : .get_metric_with_label_values(&[setup])
2960 138 : .unwrap()
2961 138 : .set(u64::try_from(num_threads.get()).unwrap());
2962 138 : }
2963 :
2964 0 : pub fn preinitialize_metrics() {
2965 0 : // Python tests need these and on some we do alerting.
2966 0 : //
2967 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
2968 0 : // order:
2969 0 : // - global metrics reside in a Lazy<PageserverMetrics>
2970 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
2971 0 : // - could move the statics into TimelineMetrics::new()?
2972 0 :
2973 0 : // counters
2974 0 : [
2975 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
2976 0 : &WALRECEIVER_STARTED_CONNECTIONS,
2977 0 : &WALRECEIVER_BROKER_UPDATES,
2978 0 : &WALRECEIVER_CANDIDATES_ADDED,
2979 0 : &WALRECEIVER_CANDIDATES_REMOVED,
2980 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
2981 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
2982 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
2983 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
2984 0 : ]
2985 0 : .into_iter()
2986 0 : .for_each(|c| {
2987 0 : Lazy::force(c);
2988 0 : });
2989 0 :
2990 0 : // Deletion queue stats
2991 0 : Lazy::force(&DELETION_QUEUE);
2992 0 :
2993 0 : // Tenant stats
2994 0 : Lazy::force(&TENANT);
2995 0 :
2996 0 : // Tenant manager stats
2997 0 : Lazy::force(&TENANT_MANAGER);
2998 0 :
2999 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
3000 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
3001 :
3002 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
3003 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
3004 0 : // values from last restart.
3005 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
3006 0 : }
3007 :
3008 : // countervecs
3009 0 : [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
3010 0 : .into_iter()
3011 0 : .for_each(|c| {
3012 0 : Lazy::force(c);
3013 0 : });
3014 0 :
3015 0 : // gauges
3016 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
3017 0 :
3018 0 : // histograms
3019 0 : [
3020 0 : &READ_NUM_LAYERS_VISITED,
3021 0 : &VEC_READ_NUM_LAYERS_VISITED,
3022 0 : &WAIT_LSN_TIME,
3023 0 : &WAL_REDO_TIME,
3024 0 : &WAL_REDO_RECORDS_HISTOGRAM,
3025 0 : &WAL_REDO_BYTES_HISTOGRAM,
3026 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
3027 0 : ]
3028 0 : .into_iter()
3029 0 : .for_each(|h| {
3030 0 : Lazy::force(h);
3031 0 : });
3032 0 :
3033 0 : // Custom
3034 0 : Lazy::force(&RECONSTRUCT_TIME);
3035 0 : Lazy::force(&tenant_throttling::TIMELINE_GET);
3036 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
3037 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
3038 0 : }
|