Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::shard::TenantShardId;
11 : use postgres_backend::{is_expected_io_error, QueryError};
12 : use pq_proto::framed::ConnectionError;
13 : use strum::{EnumCount, VariantNames};
14 : use strum_macros::{IntoStaticStr, VariantNames};
15 : use tracing::warn;
16 : use utils::id::TimelineId;
17 :
18 : /// Prometheus histogram buckets (in seconds) for operations in the critical
19 : /// path. In other words, operations that directly affect that latency of user
20 : /// queries.
21 : ///
22 : /// The buckets capture the majority of latencies in the microsecond and
23 : /// millisecond range but also extend far enough up to distinguish "bad" from
24 : /// "really bad".
25 : const CRITICAL_OP_BUCKETS: &[f64] = &[
26 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
27 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
28 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
29 : ];
30 :
31 : // Metrics collected on operations on the storage repository.
32 3344 : #[derive(Debug, VariantNames, IntoStaticStr)]
33 : #[strum(serialize_all = "kebab_case")]
34 : pub(crate) enum StorageTimeOperation {
35 : #[strum(serialize = "layer flush")]
36 : LayerFlush,
37 :
38 : #[strum(serialize = "compact")]
39 : Compact,
40 :
41 : #[strum(serialize = "create images")]
42 : CreateImages,
43 :
44 : #[strum(serialize = "logical size")]
45 : LogicalSize,
46 :
47 : #[strum(serialize = "imitate logical size")]
48 : ImitateLogicalSize,
49 :
50 : #[strum(serialize = "load layer map")]
51 : LoadLayerMap,
52 :
53 : #[strum(serialize = "gc")]
54 : Gc,
55 :
56 : #[strum(serialize = "find gc cutoffs")]
57 : FindGcCutoffs,
58 : }
59 :
60 172 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
61 172 : register_counter_vec!(
62 172 : "pageserver_storage_operations_seconds_sum",
63 172 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
64 172 : &["operation", "tenant_id", "shard_id", "timeline_id"],
65 172 : )
66 172 : .expect("failed to define a metric")
67 172 : });
68 :
69 172 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
70 172 : register_int_counter_vec!(
71 172 : "pageserver_storage_operations_seconds_count",
72 172 : "Count of storage operations with operation, tenant and timeline dimensions",
73 172 : &["operation", "tenant_id", "shard_id", "timeline_id"],
74 172 : )
75 172 : .expect("failed to define a metric")
76 172 : });
77 :
78 : // Buckets for background operations like compaction, GC, size calculation
79 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
80 :
81 172 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
82 172 : register_histogram_vec!(
83 172 : "pageserver_storage_operations_seconds_global",
84 172 : "Time spent on storage operations",
85 172 : &["operation"],
86 172 : STORAGE_OP_BUCKETS.into(),
87 172 : )
88 172 : .expect("failed to define a metric")
89 172 : });
90 :
91 0 : pub(crate) static READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
92 0 : register_histogram!(
93 0 : "pageserver_layers_visited_per_read_global",
94 0 : "Number of layers visited to reconstruct one key",
95 0 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
96 0 : )
97 0 : .expect("failed to define a metric")
98 0 : });
99 :
100 168 : pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
101 168 : register_histogram!(
102 168 : "pageserver_layers_visited_per_vectored_read_global",
103 168 : "Average number of layers visited to reconstruct one key",
104 168 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
105 168 : )
106 168 : .expect("failed to define a metric")
107 168 : });
108 :
109 : // Metrics collected on operations on the storage repository.
110 : #[derive(
111 680 : Clone, Copy, enum_map::Enum, strum_macros::EnumString, strum_macros::Display, IntoStaticStr,
112 : )]
113 : pub(crate) enum GetKind {
114 : Singular,
115 : Vectored,
116 : }
117 :
118 : pub(crate) struct ReconstructTimeMetrics {
119 : singular: Histogram,
120 : vectored: Histogram,
121 : }
122 :
123 170 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
124 170 : let inner = register_histogram_vec!(
125 170 : "pageserver_getpage_reconstruct_seconds",
126 170 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
127 170 : &["get_kind"],
128 170 : CRITICAL_OP_BUCKETS.into(),
129 170 : )
130 170 : .expect("failed to define a metric");
131 170 :
132 170 : ReconstructTimeMetrics {
133 170 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
134 170 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
135 170 : }
136 170 : });
137 :
138 : impl ReconstructTimeMetrics {
139 626592 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
140 626592 : match get_kind {
141 626156 : GetKind::Singular => &self.singular,
142 436 : GetKind::Vectored => &self.vectored,
143 : }
144 626592 : }
145 : }
146 :
147 : pub(crate) struct ReconstructDataTimeMetrics {
148 : singular: Histogram,
149 : vectored: Histogram,
150 : }
151 :
152 : impl ReconstructDataTimeMetrics {
153 626608 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
154 626608 : match get_kind {
155 626172 : GetKind::Singular => &self.singular,
156 436 : GetKind::Vectored => &self.vectored,
157 : }
158 626608 : }
159 : }
160 :
161 170 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<ReconstructDataTimeMetrics> = Lazy::new(|| {
162 170 : let inner = register_histogram_vec!(
163 170 : "pageserver_getpage_get_reconstruct_data_seconds",
164 170 : "Time spent in get_reconstruct_value_data",
165 170 : &["get_kind"],
166 170 : CRITICAL_OP_BUCKETS.into(),
167 170 : )
168 170 : .expect("failed to define a metric");
169 170 :
170 170 : ReconstructDataTimeMetrics {
171 170 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
172 170 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
173 170 : }
174 170 : });
175 :
176 : pub(crate) struct GetVectoredLatency {
177 : map: EnumMap<TaskKind, Option<Histogram>>,
178 : }
179 :
180 : #[allow(dead_code)]
181 : pub(crate) struct ScanLatency {
182 : map: EnumMap<TaskKind, Option<Histogram>>,
183 : }
184 :
185 : impl GetVectoredLatency {
186 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
187 : // cardinality of the metric.
188 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
189 :
190 19528 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
191 19528 : self.map[task_kind].as_ref()
192 19528 : }
193 : }
194 :
195 : impl ScanLatency {
196 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
197 : // cardinality of the metric.
198 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
199 :
200 12 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
201 12 : self.map[task_kind].as_ref()
202 12 : }
203 : }
204 :
205 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
206 : parent: &'a Histogram,
207 : start: std::time::Instant,
208 : }
209 :
210 : impl<'a> ScanLatencyOngoingRecording<'a> {
211 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
212 0 : let start = Instant::now();
213 0 : ScanLatencyOngoingRecording { parent, start }
214 0 : }
215 :
216 0 : pub(crate) fn observe(self, throttled: Option<Duration>) {
217 0 : let elapsed = self.start.elapsed();
218 0 : let ex_throttled = if let Some(throttled) = throttled {
219 0 : elapsed.checked_sub(throttled)
220 : } else {
221 0 : Some(elapsed)
222 : };
223 0 : if let Some(ex_throttled) = ex_throttled {
224 0 : self.parent.observe(ex_throttled.as_secs_f64());
225 0 : } else {
226 0 : use utils::rate_limit::RateLimit;
227 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
228 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
229 0 : let mut rate_limit = LOGGED.lock().unwrap();
230 0 : rate_limit.call(|| {
231 0 : warn!("error deducting time spent throttled; this message is logged at a global rate limit");
232 0 : });
233 0 : }
234 0 : }
235 : }
236 :
237 164 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
238 164 : let inner = register_histogram_vec!(
239 164 : "pageserver_get_vectored_seconds",
240 164 : "Time spent in get_vectored, excluding time spent in timeline_get_throttle.",
241 164 : &["task_kind"],
242 164 : CRITICAL_OP_BUCKETS.into(),
243 164 : )
244 164 : .expect("failed to define a metric");
245 164 :
246 164 : GetVectoredLatency {
247 4920 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
248 4920 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
249 4920 :
250 4920 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
251 328 : let task_kind = task_kind.into();
252 328 : Some(inner.with_label_values(&[task_kind]))
253 : } else {
254 4592 : None
255 : }
256 4920 : })),
257 164 : }
258 164 : });
259 :
260 4 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
261 4 : let inner = register_histogram_vec!(
262 4 : "pageserver_scan_seconds",
263 4 : "Time spent in scan, excluding time spent in timeline_get_throttle.",
264 4 : &["task_kind"],
265 4 : CRITICAL_OP_BUCKETS.into(),
266 4 : )
267 4 : .expect("failed to define a metric");
268 4 :
269 4 : ScanLatency {
270 120 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
271 120 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
272 120 :
273 120 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
274 4 : let task_kind = task_kind.into();
275 4 : Some(inner.with_label_values(&[task_kind]))
276 : } else {
277 116 : None
278 : }
279 120 : })),
280 4 : }
281 4 : });
282 :
283 : pub(crate) struct PageCacheMetricsForTaskKind {
284 : pub read_accesses_immutable: IntCounter,
285 : pub read_hits_immutable: IntCounter,
286 : }
287 :
288 : pub(crate) struct PageCacheMetrics {
289 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
290 : }
291 :
292 88 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
293 88 : register_int_counter_vec!(
294 88 : "pageserver_page_cache_read_hits_total",
295 88 : "Number of read accesses to the page cache that hit",
296 88 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
297 88 : )
298 88 : .expect("failed to define a metric")
299 88 : });
300 :
301 88 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
302 88 : register_int_counter_vec!(
303 88 : "pageserver_page_cache_read_accesses_total",
304 88 : "Number of read accesses to the page cache",
305 88 : &["task_kind", "key_kind", "content_kind"]
306 88 : )
307 88 : .expect("failed to define a metric")
308 88 : });
309 :
310 88 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
311 2640 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
312 2640 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
313 2640 : let task_kind: &'static str = task_kind.into();
314 21120 : EnumMap::from_array(std::array::from_fn(|content_kind| {
315 21120 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
316 21120 : let content_kind: &'static str = content_kind.into();
317 21120 : PageCacheMetricsForTaskKind {
318 21120 : read_accesses_immutable: {
319 21120 : PAGE_CACHE_READ_ACCESSES
320 21120 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
321 21120 : .unwrap()
322 21120 : },
323 21120 :
324 21120 : read_hits_immutable: {
325 21120 : PAGE_CACHE_READ_HITS
326 21120 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
327 21120 : .unwrap()
328 21120 : },
329 21120 : }
330 21120 : }))
331 2640 : })),
332 88 : });
333 :
334 : impl PageCacheMetrics {
335 970514 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
336 970514 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
337 970514 : }
338 : }
339 :
340 : pub(crate) struct PageCacheSizeMetrics {
341 : pub max_bytes: UIntGauge,
342 :
343 : pub current_bytes_immutable: UIntGauge,
344 : }
345 :
346 88 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
347 88 : register_uint_gauge_vec!(
348 88 : "pageserver_page_cache_size_current_bytes",
349 88 : "Current size of the page cache in bytes, by key kind",
350 88 : &["key_kind"]
351 88 : )
352 88 : .expect("failed to define a metric")
353 88 : });
354 :
355 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
356 88 : Lazy::new(|| PageCacheSizeMetrics {
357 88 : max_bytes: {
358 88 : register_uint_gauge!(
359 88 : "pageserver_page_cache_size_max_bytes",
360 88 : "Maximum size of the page cache in bytes"
361 88 : )
362 88 : .expect("failed to define a metric")
363 88 : },
364 88 : current_bytes_immutable: {
365 88 : PAGE_CACHE_SIZE_CURRENT_BYTES
366 88 : .get_metric_with_label_values(&["immutable"])
367 88 : .unwrap()
368 88 : },
369 88 : });
370 :
371 : pub(crate) mod page_cache_eviction_metrics {
372 : use std::num::NonZeroUsize;
373 :
374 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
375 : use once_cell::sync::Lazy;
376 :
377 : #[derive(Clone, Copy)]
378 : pub(crate) enum Outcome {
379 : FoundSlotUnused { iters: NonZeroUsize },
380 : FoundSlotEvicted { iters: NonZeroUsize },
381 : ItersExceeded { iters: NonZeroUsize },
382 : }
383 :
384 88 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
385 88 : register_int_counter_vec!(
386 88 : "pageserver_page_cache_find_victim_iters_total",
387 88 : "Counter for the number of iterations in the find_victim loop",
388 88 : &["outcome"],
389 88 : )
390 88 : .expect("failed to define a metric")
391 88 : });
392 :
393 88 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
394 88 : register_int_counter_vec!(
395 88 : "pageserver_page_cache_find_victim_calls",
396 88 : "Incremented at the end of each find_victim() call.\
397 88 : Filter by outcome to get e.g., eviction rate.",
398 88 : &["outcome"]
399 88 : )
400 88 : .unwrap()
401 88 : });
402 :
403 32167 : pub(crate) fn observe(outcome: Outcome) {
404 : macro_rules! dry {
405 : ($label:literal, $iters:expr) => {{
406 : static LABEL: &'static str = $label;
407 : static ITERS_TOTAL: Lazy<IntCounter> =
408 108 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
409 : static CALLS: Lazy<IntCounter> =
410 108 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
411 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
412 : CALLS.inc();
413 : }};
414 : }
415 32167 : match outcome {
416 1560 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
417 30607 : Outcome::FoundSlotEvicted { iters } => {
418 30607 : dry!("found_evicted", iters)
419 : }
420 0 : Outcome::ItersExceeded { iters } => {
421 0 : dry!("err_iters_exceeded", iters);
422 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
423 0 : }
424 : }
425 32167 : }
426 : }
427 :
428 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
429 0 : register_int_counter_vec!(
430 0 : "page_cache_errors_total",
431 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
432 0 : &["error_kind"]
433 0 : )
434 0 : .expect("failed to define a metric")
435 0 : });
436 :
437 0 : #[derive(IntoStaticStr)]
438 : #[strum(serialize_all = "kebab_case")]
439 : pub(crate) enum PageCacheErrorKind {
440 : AcquirePinnedSlotTimeout,
441 : EvictIterLimit,
442 : }
443 :
444 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
445 0 : PAGE_CACHE_ERRORS
446 0 : .get_metric_with_label_values(&[error_kind.into()])
447 0 : .unwrap()
448 0 : .inc();
449 0 : }
450 :
451 18 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
452 18 : register_histogram!(
453 18 : "pageserver_wait_lsn_seconds",
454 18 : "Time spent waiting for WAL to arrive",
455 18 : CRITICAL_OP_BUCKETS.into(),
456 18 : )
457 18 : .expect("failed to define a metric")
458 18 : });
459 :
460 172 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
461 172 : register_int_gauge_vec!(
462 172 : "pageserver_last_record_lsn",
463 172 : "Last record LSN grouped by timeline",
464 172 : &["tenant_id", "shard_id", "timeline_id"]
465 172 : )
466 172 : .expect("failed to define a metric")
467 172 : });
468 :
469 172 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
470 172 : register_uint_gauge_vec!(
471 172 : "pageserver_pitr_history_size",
472 172 : "Data written since PITR cutoff on this timeline",
473 172 : &["tenant_id", "shard_id", "timeline_id"]
474 172 : )
475 172 : .expect("failed to define a metric")
476 172 : });
477 :
478 1712 : #[derive(strum_macros::EnumString, strum_macros::Display, strum_macros::IntoStaticStr)]
479 : #[strum(serialize_all = "kebab_case")]
480 : pub(crate) enum MetricLayerKind {
481 : Delta,
482 : Image,
483 : }
484 :
485 172 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
486 172 : register_uint_gauge_vec!(
487 172 : "pageserver_layer_bytes",
488 172 : "Sum of layer physical sizes in bytes",
489 172 : &["tenant_id", "shard_id", "timeline_id", "kind"]
490 172 : )
491 172 : .expect("failed to define a metric")
492 172 : });
493 :
494 172 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
495 172 : register_uint_gauge_vec!(
496 172 : "pageserver_layer_count",
497 172 : "Number of layers that exist",
498 172 : &["tenant_id", "shard_id", "timeline_id", "kind"]
499 172 : )
500 172 : .expect("failed to define a metric")
501 172 : });
502 :
503 172 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
504 172 : register_uint_gauge_vec!(
505 172 : "pageserver_archive_size",
506 172 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
507 172 : &["tenant_id", "shard_id", "timeline_id"]
508 172 : )
509 172 : .expect("failed to define a metric")
510 172 : });
511 :
512 172 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
513 172 : register_int_gauge_vec!(
514 172 : "pageserver_standby_horizon",
515 172 : "Standby apply LSN for which GC is hold off, by timeline.",
516 172 : &["tenant_id", "shard_id", "timeline_id"]
517 172 : )
518 172 : .expect("failed to define a metric")
519 172 : });
520 :
521 172 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
522 172 : register_uint_gauge_vec!(
523 172 : "pageserver_resident_physical_size",
524 172 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
525 172 : &["tenant_id", "shard_id", "timeline_id"]
526 172 : )
527 172 : .expect("failed to define a metric")
528 172 : });
529 :
530 172 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
531 172 : register_uint_gauge_vec!(
532 172 : "pageserver_visible_physical_size",
533 172 : "The size of the layer files present in the pageserver's filesystem.",
534 172 : &["tenant_id", "shard_id", "timeline_id"]
535 172 : )
536 172 : .expect("failed to define a metric")
537 172 : });
538 :
539 168 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
540 168 : register_uint_gauge!(
541 168 : "pageserver_resident_physical_size_global",
542 168 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
543 168 : )
544 168 : .expect("failed to define a metric")
545 168 : });
546 :
547 172 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
548 172 : register_uint_gauge_vec!(
549 172 : "pageserver_remote_physical_size",
550 172 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
551 172 : // Corollary: If any files are missing from the index part, they won't be included here.
552 172 : &["tenant_id", "shard_id", "timeline_id"]
553 172 : )
554 172 : .expect("failed to define a metric")
555 172 : });
556 :
557 172 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
558 172 : register_uint_gauge!(
559 172 : "pageserver_remote_physical_size_global",
560 172 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
561 172 : )
562 172 : .expect("failed to define a metric")
563 172 : });
564 :
565 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
566 4 : register_int_counter!(
567 4 : "pageserver_remote_ondemand_downloaded_layers_total",
568 4 : "Total on-demand downloaded layers"
569 4 : )
570 4 : .unwrap()
571 4 : });
572 :
573 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
574 4 : register_int_counter!(
575 4 : "pageserver_remote_ondemand_downloaded_bytes_total",
576 4 : "Total bytes of layers on-demand downloaded",
577 4 : )
578 4 : .unwrap()
579 4 : });
580 :
581 172 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
582 172 : register_uint_gauge_vec!(
583 172 : "pageserver_current_logical_size",
584 172 : "Current logical size grouped by timeline",
585 172 : &["tenant_id", "shard_id", "timeline_id"]
586 172 : )
587 172 : .expect("failed to define current logical size metric")
588 172 : });
589 :
590 172 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
591 172 : register_int_gauge_vec!(
592 172 : "pageserver_aux_file_estimated_size",
593 172 : "The size of all aux files for a timeline in aux file v2 store.",
594 172 : &["tenant_id", "shard_id", "timeline_id"]
595 172 : )
596 172 : .expect("failed to define a metric")
597 172 : });
598 :
599 172 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
600 172 : register_uint_gauge_vec!(
601 172 : "pageserver_valid_lsn_lease_count",
602 172 : "The number of valid leases after refreshing gc info.",
603 172 : &["tenant_id", "shard_id", "timeline_id"],
604 172 : )
605 172 : .expect("failed to define a metric")
606 172 : });
607 :
608 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
609 0 : register_int_counter!(
610 0 : "pageserver_circuit_breaker_broken",
611 0 : "How many times a circuit breaker has broken"
612 0 : )
613 0 : .expect("failed to define a metric")
614 0 : });
615 :
616 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
617 0 : register_int_counter!(
618 0 : "pageserver_circuit_breaker_unbroken",
619 0 : "How many times a circuit breaker has been un-broken (recovered)"
620 0 : )
621 0 : .expect("failed to define a metric")
622 0 : });
623 :
624 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
625 164 : register_int_counter!(
626 164 : "pageserver_compression_image_in_bytes_total",
627 164 : "Size of data written into image layers before compression"
628 164 : )
629 164 : .expect("failed to define a metric")
630 164 : });
631 :
632 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
633 164 : register_int_counter!(
634 164 : "pageserver_compression_image_in_bytes_considered",
635 164 : "Size of potentially compressible data written into image layers before compression"
636 164 : )
637 164 : .expect("failed to define a metric")
638 164 : });
639 :
640 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
641 164 : register_int_counter!(
642 164 : "pageserver_compression_image_in_bytes_chosen",
643 164 : "Size of data whose compressed form was written into image layers"
644 164 : )
645 164 : .expect("failed to define a metric")
646 164 : });
647 :
648 164 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
649 164 : register_int_counter!(
650 164 : "pageserver_compression_image_out_bytes_total",
651 164 : "Size of compressed image layer written"
652 164 : )
653 164 : .expect("failed to define a metric")
654 164 : });
655 :
656 : pub(crate) mod initial_logical_size {
657 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
658 : use once_cell::sync::Lazy;
659 :
660 : pub(crate) struct StartCalculation(IntCounterVec);
661 172 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
662 172 : StartCalculation(
663 172 : register_int_counter_vec!(
664 172 : "pageserver_initial_logical_size_start_calculation",
665 172 : "Incremented each time we start an initial logical size calculation attempt. \
666 172 : The `circumstances` label provides some additional details.",
667 172 : &["attempt", "circumstances"]
668 172 : )
669 172 : .unwrap(),
670 172 : )
671 172 : });
672 :
673 : struct DropCalculation {
674 : first: IntCounter,
675 : retry: IntCounter,
676 : }
677 :
678 172 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
679 172 : let vec = register_int_counter_vec!(
680 172 : "pageserver_initial_logical_size_drop_calculation",
681 172 : "Incremented each time we abort a started size calculation attmpt.",
682 172 : &["attempt"]
683 172 : )
684 172 : .unwrap();
685 172 : DropCalculation {
686 172 : first: vec.with_label_values(&["first"]),
687 172 : retry: vec.with_label_values(&["retry"]),
688 172 : }
689 172 : });
690 :
691 : pub(crate) struct Calculated {
692 : pub(crate) births: IntCounter,
693 : pub(crate) deaths: IntCounter,
694 : }
695 :
696 172 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
697 172 : births: register_int_counter!(
698 172 : "pageserver_initial_logical_size_finish_calculation",
699 172 : "Incremented every time we finish calculation of initial logical size.\
700 172 : If everything is working well, this should happen at most once per Timeline object."
701 172 : )
702 172 : .unwrap(),
703 172 : deaths: register_int_counter!(
704 172 : "pageserver_initial_logical_size_drop_finished_calculation",
705 172 : "Incremented when we drop a finished initial logical size calculation result.\
706 172 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
707 172 : )
708 172 : .unwrap(),
709 172 : });
710 :
711 : pub(crate) struct OngoingCalculationGuard {
712 : inc_drop_calculation: Option<IntCounter>,
713 : }
714 :
715 184 : #[derive(strum_macros::IntoStaticStr)]
716 : pub(crate) enum StartCircumstances {
717 : EmptyInitial,
718 : SkippedConcurrencyLimiter,
719 : AfterBackgroundTasksRateLimit,
720 : }
721 :
722 : impl StartCalculation {
723 184 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
724 184 : let circumstances_label: &'static str = circumstances.into();
725 184 : self.0
726 184 : .with_label_values(&["first", circumstances_label])
727 184 : .inc();
728 184 : OngoingCalculationGuard {
729 184 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
730 184 : }
731 184 : }
732 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
733 0 : let circumstances_label: &'static str = circumstances.into();
734 0 : self.0
735 0 : .with_label_values(&["retry", circumstances_label])
736 0 : .inc();
737 0 : OngoingCalculationGuard {
738 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
739 0 : }
740 0 : }
741 : }
742 :
743 : impl Drop for OngoingCalculationGuard {
744 184 : fn drop(&mut self) {
745 184 : if let Some(counter) = self.inc_drop_calculation.take() {
746 0 : counter.inc();
747 184 : }
748 184 : }
749 : }
750 :
751 : impl OngoingCalculationGuard {
752 184 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
753 184 : drop(self.inc_drop_calculation.take());
754 184 : CALCULATED.births.inc();
755 184 : FinishedCalculationGuard {
756 184 : inc_on_drop: CALCULATED.deaths.clone(),
757 184 : }
758 184 : }
759 : }
760 :
761 : pub(crate) struct FinishedCalculationGuard {
762 : inc_on_drop: IntCounter,
763 : }
764 :
765 : impl Drop for FinishedCalculationGuard {
766 6 : fn drop(&mut self) {
767 6 : self.inc_on_drop.inc();
768 6 : }
769 : }
770 :
771 : // context: https://github.com/neondatabase/neon/issues/5963
772 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
773 0 : Lazy::new(|| {
774 0 : register_int_counter!(
775 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
776 0 : "Counter for the following event: walreceiver calls\
777 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
778 0 : )
779 0 : .unwrap()
780 0 : });
781 : }
782 :
783 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
784 0 : register_uint_gauge_vec!(
785 0 : "pageserver_directory_entries_count",
786 0 : "Sum of the entries in pageserver-stored directory listings",
787 0 : &["tenant_id", "shard_id", "timeline_id"]
788 0 : )
789 0 : .expect("failed to define a metric")
790 0 : });
791 :
792 174 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
793 174 : register_uint_gauge_vec!(
794 174 : "pageserver_tenant_states_count",
795 174 : "Count of tenants per state",
796 174 : &["state"]
797 174 : )
798 174 : .expect("Failed to register pageserver_tenant_states_count metric")
799 174 : });
800 :
801 : /// A set of broken tenants.
802 : ///
803 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
804 : /// tenant.
805 12 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
806 12 : register_uint_gauge_vec!(
807 12 : "pageserver_broken_tenants_count",
808 12 : "Set of broken tenants",
809 12 : &["tenant_id", "shard_id"]
810 12 : )
811 12 : .expect("Failed to register pageserver_tenant_states_count metric")
812 12 : });
813 :
814 6 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
815 6 : register_uint_gauge_vec!(
816 6 : "pageserver_tenant_synthetic_cached_size_bytes",
817 6 : "Synthetic size of each tenant in bytes",
818 6 : &["tenant_id"]
819 6 : )
820 6 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
821 6 : });
822 :
823 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
824 0 : register_histogram_vec!(
825 0 : "pageserver_eviction_iteration_duration_seconds_global",
826 0 : "Time spent on a single eviction iteration",
827 0 : &["period_secs", "threshold_secs"],
828 0 : STORAGE_OP_BUCKETS.into(),
829 0 : )
830 0 : .expect("failed to define a metric")
831 0 : });
832 :
833 172 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
834 172 : register_int_counter_vec!(
835 172 : "pageserver_evictions",
836 172 : "Number of layers evicted from the pageserver",
837 172 : &["tenant_id", "shard_id", "timeline_id"]
838 172 : )
839 172 : .expect("failed to define a metric")
840 172 : });
841 :
842 172 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
843 172 : register_int_counter_vec!(
844 172 : "pageserver_evictions_with_low_residence_duration",
845 172 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
846 172 : Residence duration is determined using the `residence_duration_data_source`.",
847 172 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
848 172 : )
849 172 : .expect("failed to define a metric")
850 172 : });
851 :
852 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
853 0 : register_int_counter!(
854 0 : "pageserver_unexpected_ondemand_downloads_count",
855 0 : "Number of unexpected on-demand downloads. \
856 0 : We log more context for each increment, so, forgo any labels in this metric.",
857 0 : )
858 0 : .expect("failed to define a metric")
859 0 : });
860 :
861 : /// How long did we take to start up? Broken down by labels to describe
862 : /// different phases of startup.
863 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
864 0 : register_gauge_vec!(
865 0 : "pageserver_startup_duration_seconds",
866 0 : "Time taken by phases of pageserver startup, in seconds",
867 0 : &["phase"]
868 0 : )
869 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
870 0 : });
871 :
872 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
873 0 : register_uint_gauge!(
874 0 : "pageserver_startup_is_loading",
875 0 : "1 while in initial startup load of tenants, 0 at other times"
876 0 : )
877 0 : .expect("Failed to register pageserver_startup_is_loading")
878 0 : });
879 :
880 168 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
881 168 : register_uint_gauge!(
882 168 : "pageserver_timeline_ephemeral_bytes",
883 168 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
884 168 : )
885 168 : .expect("Failed to register metric")
886 168 : });
887 :
888 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
889 : /// like how long it took to load.
890 : ///
891 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
892 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
893 : /// at a timeline level than tenant level.
894 : pub(crate) struct TenantMetrics {
895 : /// How long did tenants take to go from construction to active state?
896 : pub(crate) activation: Histogram,
897 : pub(crate) preload: Histogram,
898 : pub(crate) attach: Histogram,
899 :
900 : /// How many tenants are included in the initial startup of the pagesrever?
901 : pub(crate) startup_scheduled: IntCounter,
902 : pub(crate) startup_complete: IntCounter,
903 : }
904 :
905 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
906 0 : TenantMetrics {
907 0 : activation: register_histogram!(
908 0 : "pageserver_tenant_activation_seconds",
909 0 : "Time taken by tenants to activate, in seconds",
910 0 : CRITICAL_OP_BUCKETS.into()
911 0 : )
912 0 : .expect("Failed to register metric"),
913 0 : preload: register_histogram!(
914 0 : "pageserver_tenant_preload_seconds",
915 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
916 0 : CRITICAL_OP_BUCKETS.into()
917 0 : )
918 0 : .expect("Failed to register metric"),
919 0 : attach: register_histogram!(
920 0 : "pageserver_tenant_attach_seconds",
921 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
922 0 : CRITICAL_OP_BUCKETS.into()
923 0 : )
924 0 : .expect("Failed to register metric"),
925 0 : startup_scheduled: register_int_counter!(
926 0 : "pageserver_tenant_startup_scheduled",
927 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
928 0 : ).expect("Failed to register metric"),
929 0 : startup_complete: register_int_counter!(
930 0 : "pageserver_tenant_startup_complete",
931 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
932 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
933 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
934 0 : ).expect("Failed to register metric"),
935 0 : }
936 0 : });
937 :
938 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
939 : #[derive(Debug)]
940 : pub(crate) struct EvictionsWithLowResidenceDuration {
941 : data_source: &'static str,
942 : threshold: Duration,
943 : counter: Option<IntCounter>,
944 : }
945 :
946 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
947 : data_source: &'static str,
948 : threshold: Duration,
949 : }
950 :
951 : impl EvictionsWithLowResidenceDurationBuilder {
952 418 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
953 418 : Self {
954 418 : data_source,
955 418 : threshold,
956 418 : }
957 418 : }
958 :
959 418 : fn build(
960 418 : &self,
961 418 : tenant_id: &str,
962 418 : shard_id: &str,
963 418 : timeline_id: &str,
964 418 : ) -> EvictionsWithLowResidenceDuration {
965 418 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
966 418 : .get_metric_with_label_values(&[
967 418 : tenant_id,
968 418 : shard_id,
969 418 : timeline_id,
970 418 : self.data_source,
971 418 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
972 418 : ])
973 418 : .unwrap();
974 418 : EvictionsWithLowResidenceDuration {
975 418 : data_source: self.data_source,
976 418 : threshold: self.threshold,
977 418 : counter: Some(counter),
978 418 : }
979 418 : }
980 : }
981 :
982 : impl EvictionsWithLowResidenceDuration {
983 428 : fn threshold_label_value(threshold: Duration) -> String {
984 428 : format!("{}", threshold.as_secs())
985 428 : }
986 :
987 4 : pub fn observe(&self, observed_value: Duration) {
988 4 : if observed_value < self.threshold {
989 4 : self.counter
990 4 : .as_ref()
991 4 : .expect("nobody calls this function after `remove_from_vec`")
992 4 : .inc();
993 4 : }
994 4 : }
995 :
996 0 : pub fn change_threshold(
997 0 : &mut self,
998 0 : tenant_id: &str,
999 0 : shard_id: &str,
1000 0 : timeline_id: &str,
1001 0 : new_threshold: Duration,
1002 0 : ) {
1003 0 : if new_threshold == self.threshold {
1004 0 : return;
1005 0 : }
1006 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1007 0 : self.data_source,
1008 0 : new_threshold,
1009 0 : )
1010 0 : .build(tenant_id, shard_id, timeline_id);
1011 0 : std::mem::swap(self, &mut with_new);
1012 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1013 0 : }
1014 :
1015 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1016 10 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1017 10 : let Some(_counter) = self.counter.take() else {
1018 0 : return;
1019 : };
1020 :
1021 10 : let threshold = Self::threshold_label_value(self.threshold);
1022 10 :
1023 10 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1024 10 : tenant_id,
1025 10 : shard_id,
1026 10 : timeline_id,
1027 10 : self.data_source,
1028 10 : &threshold,
1029 10 : ]);
1030 10 :
1031 10 : match removed {
1032 0 : Err(e) => {
1033 0 : // this has been hit in staging as
1034 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1035 0 : // because we can be in the drop path already, don't risk:
1036 0 : // - "double-panic => illegal instruction" or
1037 0 : // - future "drop panick => abort"
1038 0 : //
1039 0 : // so just nag: (the error has the labels)
1040 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
1041 : }
1042 : Ok(()) => {
1043 : // to help identify cases where we double-remove the same values, let's log all
1044 : // deletions?
1045 10 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
1046 : }
1047 : }
1048 10 : }
1049 : }
1050 :
1051 : // Metrics collected on disk IO operations
1052 : //
1053 : // Roughly logarithmic scale.
1054 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1055 : 0.000030, // 30 usec
1056 : 0.001000, // 1000 usec
1057 : 0.030, // 30 ms
1058 : 1.000, // 1000 ms
1059 : 30.000, // 30000 ms
1060 : ];
1061 :
1062 : /// VirtualFile fs operation variants.
1063 : ///
1064 : /// Operations:
1065 : /// - open ([`std::fs::OpenOptions::open`])
1066 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1067 : /// - close-by-replace (close by replacement algorithm)
1068 : /// - read (`read_at`)
1069 : /// - write (`write_at`)
1070 : /// - seek (modify internal position or file length query)
1071 : /// - fsync ([`std::fs::File::sync_all`])
1072 : /// - metadata ([`std::fs::File::metadata`])
1073 : #[derive(
1074 1818 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1075 : )]
1076 : pub(crate) enum StorageIoOperation {
1077 : Open,
1078 : OpenAfterReplace,
1079 : Close,
1080 : CloseByReplace,
1081 : Read,
1082 : Write,
1083 : Seek,
1084 : Fsync,
1085 : Metadata,
1086 : }
1087 :
1088 : impl StorageIoOperation {
1089 1818 : pub fn as_str(&self) -> &'static str {
1090 1818 : match self {
1091 202 : StorageIoOperation::Open => "open",
1092 202 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1093 202 : StorageIoOperation::Close => "close",
1094 202 : StorageIoOperation::CloseByReplace => "close-by-replace",
1095 202 : StorageIoOperation::Read => "read",
1096 202 : StorageIoOperation::Write => "write",
1097 202 : StorageIoOperation::Seek => "seek",
1098 202 : StorageIoOperation::Fsync => "fsync",
1099 202 : StorageIoOperation::Metadata => "metadata",
1100 : }
1101 1818 : }
1102 : }
1103 :
1104 : /// Tracks time taken by fs operations near VirtualFile.
1105 : #[derive(Debug)]
1106 : pub(crate) struct StorageIoTime {
1107 : metrics: [Histogram; StorageIoOperation::COUNT],
1108 : }
1109 :
1110 : impl StorageIoTime {
1111 202 : fn new() -> Self {
1112 202 : let storage_io_histogram_vec = register_histogram_vec!(
1113 202 : "pageserver_io_operations_seconds",
1114 202 : "Time spent in IO operations",
1115 202 : &["operation"],
1116 202 : STORAGE_IO_TIME_BUCKETS.into()
1117 202 : )
1118 202 : .expect("failed to define a metric");
1119 1818 : let metrics = std::array::from_fn(|i| {
1120 1818 : let op = StorageIoOperation::from_repr(i).unwrap();
1121 1818 : storage_io_histogram_vec
1122 1818 : .get_metric_with_label_values(&[op.as_str()])
1123 1818 : .unwrap()
1124 1818 : });
1125 202 : Self { metrics }
1126 202 : }
1127 :
1128 2292199 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1129 2292199 : &self.metrics[op as usize]
1130 2292199 : }
1131 : }
1132 :
1133 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1134 :
1135 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
1136 :
1137 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1138 198 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1139 198 : register_int_gauge_vec!(
1140 198 : "pageserver_io_operations_bytes_total",
1141 198 : "Total amount of bytes read/written in IO operations",
1142 198 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1143 198 : )
1144 198 : .expect("failed to define a metric")
1145 198 : });
1146 :
1147 : #[cfg(not(test))]
1148 : pub(crate) mod virtual_file_descriptor_cache {
1149 : use super::*;
1150 :
1151 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1152 0 : register_uint_gauge!(
1153 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1154 0 : "Maximum number of open file descriptors in the cache."
1155 0 : )
1156 0 : .unwrap()
1157 0 : });
1158 :
1159 : // SIZE_CURRENT: derive it like so:
1160 : // ```
1161 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1162 : // -ignoring(operation)
1163 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1164 : // ```
1165 : }
1166 :
1167 : #[cfg(not(test))]
1168 : pub(crate) mod virtual_file_io_engine {
1169 : use super::*;
1170 :
1171 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1172 0 : register_uint_gauge_vec!(
1173 0 : "pageserver_virtual_file_io_engine_kind",
1174 0 : "The configured io engine for VirtualFile",
1175 0 : &["kind"],
1176 0 : )
1177 0 : .unwrap()
1178 0 : });
1179 : }
1180 :
1181 : struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
1182 : global_latency_histo: &'a Histogram,
1183 :
1184 : // Optional because not all op types are tracked per-timeline
1185 : per_timeline_latency_histo: Option<&'a Histogram>,
1186 :
1187 : ctx: &'c RequestContext,
1188 : start: std::time::Instant,
1189 : op: SmgrQueryType,
1190 : count: usize,
1191 : }
1192 :
1193 : impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
1194 10 : fn drop(&mut self) {
1195 10 : let elapsed = self.start.elapsed();
1196 10 : let ex_throttled = self
1197 10 : .ctx
1198 10 : .micros_spent_throttled
1199 10 : .close_and_checked_sub_from(elapsed);
1200 10 : let ex_throttled = match ex_throttled {
1201 10 : Ok(res) => res,
1202 0 : Err(error) => {
1203 : use utils::rate_limit::RateLimit;
1204 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1205 0 : Lazy::new(|| {
1206 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1207 0 : RateLimit::new(Duration::from_secs(10))
1208 0 : })))
1209 0 : });
1210 0 : let mut guard = LOGGED.lock().unwrap();
1211 0 : let rate_limit = &mut guard[self.op];
1212 0 : rate_limit.call(|| {
1213 0 : warn!(op=?self.op, error, "error deducting time spent throttled; this message is logged at a global rate limit");
1214 0 : });
1215 0 : elapsed
1216 : }
1217 : };
1218 :
1219 10 : for _ in 0..self.count {
1220 10 : self.global_latency_histo
1221 10 : .observe(ex_throttled.as_secs_f64());
1222 10 : if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
1223 2 : per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
1224 8 : }
1225 : }
1226 10 : }
1227 : }
1228 :
1229 : #[derive(
1230 : Debug,
1231 : Clone,
1232 : Copy,
1233 5166 : IntoStaticStr,
1234 : strum_macros::EnumCount,
1235 24 : strum_macros::EnumIter,
1236 4280 : strum_macros::FromRepr,
1237 : enum_map::Enum,
1238 : )]
1239 : #[strum(serialize_all = "snake_case")]
1240 : pub enum SmgrQueryType {
1241 : GetRelExists,
1242 : GetRelSize,
1243 : GetPageAtLsn,
1244 : GetDbSize,
1245 : GetSlruSegment,
1246 : }
1247 :
1248 : #[derive(Debug)]
1249 : pub(crate) struct SmgrQueryTimePerTimeline {
1250 : global_started: [IntCounter; SmgrQueryType::COUNT],
1251 : global_latency: [Histogram; SmgrQueryType::COUNT],
1252 : per_timeline_getpage_started: IntCounter,
1253 : per_timeline_getpage_latency: Histogram,
1254 : }
1255 :
1256 174 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1257 174 : register_int_counter_vec!(
1258 174 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1259 174 : "pageserver_smgr_query_started_global_count",
1260 174 : "Number of smgr queries started, aggregated by query type.",
1261 174 : &["smgr_query_type"],
1262 174 : )
1263 174 : .expect("failed to define a metric")
1264 174 : });
1265 :
1266 174 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1267 174 : register_int_counter_vec!(
1268 174 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1269 174 : "pageserver_smgr_query_started_count",
1270 174 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1271 174 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1272 174 : )
1273 174 : .expect("failed to define a metric")
1274 174 : });
1275 :
1276 174 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1277 174 : register_histogram_vec!(
1278 174 : "pageserver_smgr_query_seconds",
1279 174 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1280 174 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1281 174 : CRITICAL_OP_BUCKETS.into(),
1282 174 : )
1283 174 : .expect("failed to define a metric")
1284 174 : });
1285 :
1286 174 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1287 174 : [
1288 174 : 1,
1289 174 : 10,
1290 174 : 20,
1291 174 : 40,
1292 174 : 60,
1293 174 : 80,
1294 174 : 100,
1295 174 : 200,
1296 174 : 300,
1297 174 : 400,
1298 174 : 500,
1299 174 : 600,
1300 174 : 700,
1301 174 : 800,
1302 174 : 900,
1303 174 : 1_000, // 1ms
1304 174 : 2_000,
1305 174 : 4_000,
1306 174 : 6_000,
1307 174 : 8_000,
1308 174 : 10_000, // 10ms
1309 174 : 20_000,
1310 174 : 40_000,
1311 174 : 60_000,
1312 174 : 80_000,
1313 174 : 100_000,
1314 174 : 200_000,
1315 174 : 400_000,
1316 174 : 600_000,
1317 174 : 800_000,
1318 174 : 1_000_000, // 1s
1319 174 : 2_000_000,
1320 174 : 4_000_000,
1321 174 : 6_000_000,
1322 174 : 8_000_000,
1323 174 : 10_000_000, // 10s
1324 174 : 20_000_000,
1325 174 : 50_000_000,
1326 174 : 100_000_000,
1327 174 : 200_000_000,
1328 174 : 1_000_000_000, // 1000s
1329 174 : ]
1330 174 : .into_iter()
1331 174 : .map(Duration::from_micros)
1332 7134 : .map(|d| d.as_secs_f64())
1333 174 : .collect()
1334 174 : });
1335 :
1336 174 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1337 174 : register_histogram_vec!(
1338 174 : "pageserver_smgr_query_seconds_global",
1339 174 : "Time spent on smgr query handling, aggregated by query type.",
1340 174 : &["smgr_query_type"],
1341 174 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1342 174 : )
1343 174 : .expect("failed to define a metric")
1344 174 : });
1345 :
1346 : impl SmgrQueryTimePerTimeline {
1347 428 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1348 428 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1349 428 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1350 428 : let timeline_id = timeline_id.to_string();
1351 2140 : let global_started = std::array::from_fn(|i| {
1352 2140 : let op = SmgrQueryType::from_repr(i).unwrap();
1353 2140 : SMGR_QUERY_STARTED_GLOBAL
1354 2140 : .get_metric_with_label_values(&[op.into()])
1355 2140 : .unwrap()
1356 2140 : });
1357 2140 : let global_latency = std::array::from_fn(|i| {
1358 2140 : let op = SmgrQueryType::from_repr(i).unwrap();
1359 2140 : SMGR_QUERY_TIME_GLOBAL
1360 2140 : .get_metric_with_label_values(&[op.into()])
1361 2140 : .unwrap()
1362 2140 : });
1363 428 :
1364 428 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
1365 428 : .get_metric_with_label_values(&[
1366 428 : SmgrQueryType::GetPageAtLsn.into(),
1367 428 : &tenant_id,
1368 428 : &shard_slug,
1369 428 : &timeline_id,
1370 428 : ])
1371 428 : .unwrap();
1372 428 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1373 428 : .get_metric_with_label_values(&[
1374 428 : SmgrQueryType::GetPageAtLsn.into(),
1375 428 : &tenant_id,
1376 428 : &shard_slug,
1377 428 : &timeline_id,
1378 428 : ])
1379 428 : .unwrap();
1380 428 :
1381 428 : Self {
1382 428 : global_started,
1383 428 : global_latency,
1384 428 : per_timeline_getpage_latency,
1385 428 : per_timeline_getpage_started,
1386 428 : }
1387 428 : }
1388 10 : pub(crate) fn start_timer<'c: 'a, 'a>(
1389 10 : &'a self,
1390 10 : op: SmgrQueryType,
1391 10 : ctx: &'c RequestContext,
1392 10 : ) -> Option<impl Drop + 'a> {
1393 10 : self.start_timer_many(op, 1, ctx)
1394 10 : }
1395 10 : pub(crate) fn start_timer_many<'c: 'a, 'a>(
1396 10 : &'a self,
1397 10 : op: SmgrQueryType,
1398 10 : count: usize,
1399 10 : ctx: &'c RequestContext,
1400 10 : ) -> Option<impl Drop + 'a> {
1401 10 : let start = Instant::now();
1402 10 :
1403 10 : self.global_started[op as usize].inc();
1404 10 :
1405 10 : // We subtract time spent throttled from the observed latency.
1406 10 : match ctx.micros_spent_throttled.open() {
1407 10 : Ok(()) => (),
1408 0 : Err(error) => {
1409 0 : use utils::rate_limit::RateLimit;
1410 0 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1411 0 : Lazy::new(|| {
1412 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1413 0 : RateLimit::new(Duration::from_secs(10))
1414 0 : })))
1415 0 : });
1416 0 : let mut guard = LOGGED.lock().unwrap();
1417 0 : let rate_limit = &mut guard[op];
1418 0 : rate_limit.call(|| {
1419 0 : warn!(?op, error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1420 0 : });
1421 0 : }
1422 : }
1423 :
1424 10 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1425 2 : self.per_timeline_getpage_started.inc();
1426 2 : Some(&self.per_timeline_getpage_latency)
1427 : } else {
1428 8 : None
1429 : };
1430 :
1431 10 : Some(GlobalAndPerTimelineHistogramTimer {
1432 10 : global_latency_histo: &self.global_latency[op as usize],
1433 10 : per_timeline_latency_histo,
1434 10 : ctx,
1435 10 : start,
1436 10 : op,
1437 10 : count,
1438 10 : })
1439 10 : }
1440 : }
1441 :
1442 : #[cfg(test)]
1443 : mod smgr_query_time_tests {
1444 : use pageserver_api::shard::TenantShardId;
1445 : use strum::IntoEnumIterator;
1446 : use utils::id::{TenantId, TimelineId};
1447 :
1448 : use crate::{
1449 : context::{DownloadBehavior, RequestContext},
1450 : task_mgr::TaskKind,
1451 : };
1452 :
1453 : // Regression test, we used hard-coded string constants before using an enum.
1454 : #[test]
1455 2 : fn op_label_name() {
1456 : use super::SmgrQueryType::*;
1457 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1458 2 : (GetRelExists, "get_rel_exists"),
1459 2 : (GetRelSize, "get_rel_size"),
1460 2 : (GetPageAtLsn, "get_page_at_lsn"),
1461 2 : (GetDbSize, "get_db_size"),
1462 2 : (GetSlruSegment, "get_slru_segment"),
1463 2 : ];
1464 12 : for (op, expect) in expect {
1465 10 : let actual: &'static str = op.into();
1466 10 : assert_eq!(actual, expect);
1467 : }
1468 2 : }
1469 :
1470 : #[test]
1471 2 : fn basic() {
1472 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1473 :
1474 12 : for op in &ops {
1475 10 : let tenant_id = TenantId::generate();
1476 10 : let timeline_id = TimelineId::generate();
1477 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1478 10 : &TenantShardId::unsharded(tenant_id),
1479 10 : &timeline_id,
1480 10 : );
1481 10 :
1482 20 : let get_counts = || {
1483 20 : let global: u64 = ops
1484 20 : .iter()
1485 100 : .map(|op| metrics.global_latency[*op as usize].get_sample_count())
1486 20 : .sum();
1487 20 : (
1488 20 : global,
1489 20 : metrics.per_timeline_getpage_latency.get_sample_count(),
1490 20 : )
1491 20 : };
1492 :
1493 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1494 10 : assert_eq!(pre_per_tenant_timeline, 0);
1495 :
1496 10 : let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download);
1497 10 : let timer = metrics.start_timer(*op, &ctx);
1498 10 : drop(timer);
1499 10 :
1500 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1501 10 : if matches!(op, super::SmgrQueryType::GetPageAtLsn) {
1502 : // getpage ops are tracked per-timeline, others aren't
1503 2 : assert_eq!(post_per_tenant_timeline, 1);
1504 : } else {
1505 8 : assert_eq!(post_per_tenant_timeline, 0);
1506 : }
1507 10 : assert!(post_global > pre_global);
1508 : }
1509 2 : }
1510 : }
1511 :
1512 : // keep in sync with control plane Go code so that we can validate
1513 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1514 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1515 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1516 0 : [
1517 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1518 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1519 0 : ]
1520 0 : .map(|ms| (ms as f64) / 1000.0)
1521 0 : });
1522 :
1523 : pub(crate) struct BasebackupQueryTime {
1524 : ok: Histogram,
1525 : error: Histogram,
1526 : client_error: Histogram,
1527 : }
1528 :
1529 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1530 0 : let vec = register_histogram_vec!(
1531 0 : "pageserver_basebackup_query_seconds",
1532 0 : "Histogram of basebackup queries durations, by result type",
1533 0 : &["result"],
1534 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1535 0 : )
1536 0 : .expect("failed to define a metric");
1537 0 : BasebackupQueryTime {
1538 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1539 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1540 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
1541 0 : }
1542 0 : });
1543 :
1544 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a, 'c> {
1545 : parent: &'a BasebackupQueryTime,
1546 : ctx: &'c RequestContext,
1547 : start: std::time::Instant,
1548 : }
1549 :
1550 : impl BasebackupQueryTime {
1551 0 : pub(crate) fn start_recording<'c: 'a, 'a>(
1552 0 : &'a self,
1553 0 : ctx: &'c RequestContext,
1554 0 : ) -> BasebackupQueryTimeOngoingRecording<'a, 'a> {
1555 0 : let start = Instant::now();
1556 0 : match ctx.micros_spent_throttled.open() {
1557 0 : Ok(()) => (),
1558 0 : Err(error) => {
1559 0 : use utils::rate_limit::RateLimit;
1560 0 : static LOGGED: Lazy<Mutex<RateLimit>> =
1561 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1562 0 : let mut rate_limit = LOGGED.lock().unwrap();
1563 0 : rate_limit.call(|| {
1564 0 : warn!(error, "error opening micros_spent_throttled; this message is logged at a global rate limit");
1565 0 : });
1566 0 : }
1567 : }
1568 0 : BasebackupQueryTimeOngoingRecording {
1569 0 : parent: self,
1570 0 : ctx,
1571 0 : start,
1572 0 : }
1573 0 : }
1574 : }
1575 :
1576 : impl BasebackupQueryTimeOngoingRecording<'_, '_> {
1577 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
1578 0 : let elapsed = self.start.elapsed();
1579 0 : let ex_throttled = self
1580 0 : .ctx
1581 0 : .micros_spent_throttled
1582 0 : .close_and_checked_sub_from(elapsed);
1583 0 : let ex_throttled = match ex_throttled {
1584 0 : Ok(ex_throttled) => ex_throttled,
1585 0 : Err(error) => {
1586 : use utils::rate_limit::RateLimit;
1587 : static LOGGED: Lazy<Mutex<RateLimit>> =
1588 0 : Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
1589 0 : let mut rate_limit = LOGGED.lock().unwrap();
1590 0 : rate_limit.call(|| {
1591 0 : warn!(error, "error deducting time spent throttled; this message is logged at a global rate limit");
1592 0 : });
1593 0 : elapsed
1594 : }
1595 : };
1596 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
1597 0 : let metric = match res {
1598 0 : Ok(_) => &self.parent.ok,
1599 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
1600 0 : if is_expected_io_error(io_error) =>
1601 0 : {
1602 0 : &self.parent.client_error
1603 : }
1604 0 : Err(_) => &self.parent.error,
1605 : };
1606 0 : metric.observe(ex_throttled.as_secs_f64());
1607 0 : }
1608 : }
1609 :
1610 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1611 0 : register_int_counter_pair_vec!(
1612 0 : "pageserver_live_connections_started",
1613 0 : "Number of network connections that we started handling",
1614 0 : "pageserver_live_connections_finished",
1615 0 : "Number of network connections that we finished handling",
1616 0 : &["pageserver_connection_kind"]
1617 0 : )
1618 0 : .expect("failed to define a metric")
1619 0 : });
1620 :
1621 0 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1622 : pub(crate) enum ComputeCommandKind {
1623 : PageStreamV2,
1624 : Basebackup,
1625 : Fullbackup,
1626 : LeaseLsn,
1627 : }
1628 :
1629 : pub(crate) struct ComputeCommandCounters {
1630 : map: EnumMap<ComputeCommandKind, IntCounter>,
1631 : }
1632 :
1633 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
1634 0 : let inner = register_int_counter_vec!(
1635 0 : "pageserver_compute_commands",
1636 0 : "Number of compute -> pageserver commands processed",
1637 0 : &["command"]
1638 0 : )
1639 0 : .expect("failed to define a metric");
1640 0 :
1641 0 : ComputeCommandCounters {
1642 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
1643 0 : let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
1644 0 : let command_str: &'static str = command.into();
1645 0 : inner.with_label_values(&[command_str])
1646 0 : })),
1647 0 : }
1648 0 : });
1649 :
1650 : impl ComputeCommandCounters {
1651 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
1652 0 : &self.map[command]
1653 0 : }
1654 : }
1655 :
1656 : // remote storage metrics
1657 :
1658 168 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1659 168 : register_int_counter_pair_vec!(
1660 168 : "pageserver_remote_timeline_client_calls_started",
1661 168 : "Number of started calls to remote timeline client.",
1662 168 : "pageserver_remote_timeline_client_calls_finished",
1663 168 : "Number of finshed calls to remote timeline client.",
1664 168 : &[
1665 168 : "tenant_id",
1666 168 : "shard_id",
1667 168 : "timeline_id",
1668 168 : "file_kind",
1669 168 : "op_kind"
1670 168 : ],
1671 168 : )
1672 168 : .unwrap()
1673 168 : });
1674 :
1675 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1676 166 : Lazy::new(|| {
1677 166 : register_int_counter_vec!(
1678 166 : "pageserver_remote_timeline_client_bytes_started",
1679 166 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1680 166 : The increment happens when the operation is scheduled.",
1681 166 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1682 166 : )
1683 166 : .expect("failed to define a metric")
1684 166 : });
1685 :
1686 166 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1687 166 : register_int_counter_vec!(
1688 166 : "pageserver_remote_timeline_client_bytes_finished",
1689 166 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1690 166 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1691 166 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1692 166 : )
1693 166 : .expect("failed to define a metric")
1694 166 : });
1695 :
1696 : pub(crate) struct TenantManagerMetrics {
1697 : tenant_slots_attached: UIntGauge,
1698 : tenant_slots_secondary: UIntGauge,
1699 : tenant_slots_inprogress: UIntGauge,
1700 : pub(crate) tenant_slot_writes: IntCounter,
1701 : pub(crate) unexpected_errors: IntCounter,
1702 : }
1703 :
1704 : impl TenantManagerMetrics {
1705 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
1706 : /// exactly: they track the lifetime of the slots _in the tenant map_.
1707 2 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
1708 2 : match slot {
1709 0 : TenantSlot::Attached(_) => {
1710 0 : self.tenant_slots_attached.inc();
1711 0 : }
1712 0 : TenantSlot::Secondary(_) => {
1713 0 : self.tenant_slots_secondary.inc();
1714 0 : }
1715 2 : TenantSlot::InProgress(_) => {
1716 2 : self.tenant_slots_inprogress.inc();
1717 2 : }
1718 : }
1719 2 : }
1720 :
1721 2 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
1722 2 : match slot {
1723 2 : TenantSlot::Attached(_) => {
1724 2 : self.tenant_slots_attached.dec();
1725 2 : }
1726 0 : TenantSlot::Secondary(_) => {
1727 0 : self.tenant_slots_secondary.dec();
1728 0 : }
1729 0 : TenantSlot::InProgress(_) => {
1730 0 : self.tenant_slots_inprogress.dec();
1731 0 : }
1732 : }
1733 2 : }
1734 :
1735 : #[cfg(all(debug_assertions, not(test)))]
1736 0 : pub(crate) fn slots_total(&self) -> u64 {
1737 0 : self.tenant_slots_attached.get()
1738 0 : + self.tenant_slots_secondary.get()
1739 0 : + self.tenant_slots_inprogress.get()
1740 0 : }
1741 : }
1742 :
1743 2 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1744 2 : let tenant_slots = register_uint_gauge_vec!(
1745 2 : "pageserver_tenant_manager_slots",
1746 2 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1747 2 : &["mode"]
1748 2 : )
1749 2 : .expect("failed to define a metric");
1750 2 : TenantManagerMetrics {
1751 2 : tenant_slots_attached: tenant_slots
1752 2 : .get_metric_with_label_values(&["attached"])
1753 2 : .unwrap(),
1754 2 : tenant_slots_secondary: tenant_slots
1755 2 : .get_metric_with_label_values(&["secondary"])
1756 2 : .unwrap(),
1757 2 : tenant_slots_inprogress: tenant_slots
1758 2 : .get_metric_with_label_values(&["inprogress"])
1759 2 : .unwrap(),
1760 2 : tenant_slot_writes: register_int_counter!(
1761 2 : "pageserver_tenant_manager_slot_writes",
1762 2 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1763 2 : )
1764 2 : .expect("failed to define a metric"),
1765 2 : unexpected_errors: register_int_counter!(
1766 2 : "pageserver_tenant_manager_unexpected_errors_total",
1767 2 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1768 2 : )
1769 2 : .expect("failed to define a metric"),
1770 2 : }
1771 2 : });
1772 :
1773 : pub(crate) struct DeletionQueueMetrics {
1774 : pub(crate) keys_submitted: IntCounter,
1775 : pub(crate) keys_dropped: IntCounter,
1776 : pub(crate) keys_executed: IntCounter,
1777 : pub(crate) keys_validated: IntCounter,
1778 : pub(crate) dropped_lsn_updates: IntCounter,
1779 : pub(crate) unexpected_errors: IntCounter,
1780 : pub(crate) remote_errors: IntCounterVec,
1781 : }
1782 31 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1783 31 : DeletionQueueMetrics{
1784 31 :
1785 31 : keys_submitted: register_int_counter!(
1786 31 : "pageserver_deletion_queue_submitted_total",
1787 31 : "Number of objects submitted for deletion"
1788 31 : )
1789 31 : .expect("failed to define a metric"),
1790 31 :
1791 31 : keys_dropped: register_int_counter!(
1792 31 : "pageserver_deletion_queue_dropped_total",
1793 31 : "Number of object deletions dropped due to stale generation."
1794 31 : )
1795 31 : .expect("failed to define a metric"),
1796 31 :
1797 31 : keys_executed: register_int_counter!(
1798 31 : "pageserver_deletion_queue_executed_total",
1799 31 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1800 31 : )
1801 31 : .expect("failed to define a metric"),
1802 31 :
1803 31 : keys_validated: register_int_counter!(
1804 31 : "pageserver_deletion_queue_validated_total",
1805 31 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1806 31 : )
1807 31 : .expect("failed to define a metric"),
1808 31 :
1809 31 : dropped_lsn_updates: register_int_counter!(
1810 31 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1811 31 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1812 31 : )
1813 31 : .expect("failed to define a metric"),
1814 31 : unexpected_errors: register_int_counter!(
1815 31 : "pageserver_deletion_queue_unexpected_errors_total",
1816 31 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1817 31 : )
1818 31 : .expect("failed to define a metric"),
1819 31 : remote_errors: register_int_counter_vec!(
1820 31 : "pageserver_deletion_queue_remote_errors_total",
1821 31 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1822 31 : &["op_kind"],
1823 31 : )
1824 31 : .expect("failed to define a metric")
1825 31 : }
1826 31 : });
1827 :
1828 : pub(crate) struct SecondaryModeMetrics {
1829 : pub(crate) upload_heatmap: IntCounter,
1830 : pub(crate) upload_heatmap_errors: IntCounter,
1831 : pub(crate) upload_heatmap_duration: Histogram,
1832 : pub(crate) download_heatmap: IntCounter,
1833 : pub(crate) download_layer: IntCounter,
1834 : }
1835 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
1836 0 : SecondaryModeMetrics {
1837 0 : upload_heatmap: register_int_counter!(
1838 0 : "pageserver_secondary_upload_heatmap",
1839 0 : "Number of heatmaps written to remote storage by attached tenants"
1840 0 : )
1841 0 : .expect("failed to define a metric"),
1842 0 : upload_heatmap_errors: register_int_counter!(
1843 0 : "pageserver_secondary_upload_heatmap_errors",
1844 0 : "Failures writing heatmap to remote storage"
1845 0 : )
1846 0 : .expect("failed to define a metric"),
1847 0 : upload_heatmap_duration: register_histogram!(
1848 0 : "pageserver_secondary_upload_heatmap_duration",
1849 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
1850 0 : )
1851 0 : .expect("failed to define a metric"),
1852 0 : download_heatmap: register_int_counter!(
1853 0 : "pageserver_secondary_download_heatmap",
1854 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
1855 0 : )
1856 0 : .expect("failed to define a metric"),
1857 0 : download_layer: register_int_counter!(
1858 0 : "pageserver_secondary_download_layer",
1859 0 : "Number of downloads of layers by secondary mode locations"
1860 0 : )
1861 0 : .expect("failed to define a metric"),
1862 0 : }
1863 0 : });
1864 :
1865 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1866 0 : register_uint_gauge_vec!(
1867 0 : "pageserver_secondary_resident_physical_size",
1868 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
1869 0 : &["tenant_id", "shard_id"]
1870 0 : )
1871 0 : .expect("failed to define a metric")
1872 0 : });
1873 :
1874 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
1875 0 : register_uint_gauge!(
1876 0 : "pageserver_utilization_score",
1877 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
1878 0 : )
1879 0 : .expect("failed to define a metric")
1880 0 : });
1881 :
1882 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1883 0 : register_uint_gauge_vec!(
1884 0 : "pageserver_secondary_heatmap_total_size",
1885 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
1886 0 : &["tenant_id", "shard_id"]
1887 0 : )
1888 0 : .expect("failed to define a metric")
1889 0 : });
1890 :
1891 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
1892 : pub enum RemoteOpKind {
1893 : Upload,
1894 : Download,
1895 : Delete,
1896 : }
1897 : impl RemoteOpKind {
1898 13415 : pub fn as_str(&self) -> &'static str {
1899 13415 : match self {
1900 12646 : Self::Upload => "upload",
1901 52 : Self::Download => "download",
1902 717 : Self::Delete => "delete",
1903 : }
1904 13415 : }
1905 : }
1906 :
1907 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
1908 : pub enum RemoteOpFileKind {
1909 : Layer,
1910 : Index,
1911 : }
1912 : impl RemoteOpFileKind {
1913 13415 : pub fn as_str(&self) -> &'static str {
1914 13415 : match self {
1915 9105 : Self::Layer => "layer",
1916 4310 : Self::Index => "index",
1917 : }
1918 13415 : }
1919 : }
1920 :
1921 166 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1922 166 : register_histogram_vec!(
1923 166 : "pageserver_remote_operation_seconds",
1924 166 : "Time spent on remote storage operations. \
1925 166 : Grouped by tenant, timeline, operation_kind and status. \
1926 166 : Does not account for time spent waiting in remote timeline client's queues.",
1927 166 : &["file_kind", "op_kind", "status"]
1928 166 : )
1929 166 : .expect("failed to define a metric")
1930 166 : });
1931 :
1932 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
1933 0 : register_int_counter_vec!(
1934 0 : "pageserver_tenant_task_events",
1935 0 : "Number of task start/stop/fail events.",
1936 0 : &["event"],
1937 0 : )
1938 0 : .expect("Failed to register tenant_task_events metric")
1939 0 : });
1940 :
1941 : pub struct BackgroundLoopSemaphoreMetrics {
1942 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
1943 : durations: EnumMap<BackgroundLoopKind, Counter>,
1944 : }
1945 :
1946 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
1947 20 : || {
1948 20 : let counters = register_int_counter_pair_vec!(
1949 20 : "pageserver_background_loop_semaphore_wait_start_count",
1950 20 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
1951 20 : "pageserver_background_loop_semaphore_wait_finish_count",
1952 20 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
1953 20 : &["task"],
1954 20 : )
1955 20 : .unwrap();
1956 20 :
1957 20 : let durations = register_counter_vec!(
1958 20 : "pageserver_background_loop_semaphore_wait_duration_seconds",
1959 20 : "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
1960 20 : &["task"],
1961 20 : )
1962 20 : .unwrap();
1963 20 :
1964 20 : BackgroundLoopSemaphoreMetrics {
1965 180 : counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1966 180 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
1967 180 : counters.with_label_values(&[kind.into()])
1968 180 : })),
1969 180 : durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
1970 180 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
1971 180 : durations.with_label_values(&[kind.into()])
1972 180 : })),
1973 20 : }
1974 20 : },
1975 : );
1976 :
1977 : impl BackgroundLoopSemaphoreMetrics {
1978 364 : pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
1979 : struct Record<'a> {
1980 : metrics: &'a BackgroundLoopSemaphoreMetrics,
1981 : task: BackgroundLoopKind,
1982 : _counter_guard: metrics::IntCounterPairGuard,
1983 : start: Instant,
1984 : }
1985 : impl Drop for Record<'_> {
1986 364 : fn drop(&mut self) {
1987 364 : let elapsed = self.start.elapsed().as_secs_f64();
1988 364 : self.metrics.durations[self.task].inc_by(elapsed);
1989 364 : }
1990 : }
1991 364 : Record {
1992 364 : metrics: self,
1993 364 : task,
1994 364 : _counter_guard: self.counters[task].guard(),
1995 364 : start: Instant::now(),
1996 364 : }
1997 364 : }
1998 : }
1999 :
2000 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2001 0 : register_int_counter_vec!(
2002 0 : "pageserver_background_loop_period_overrun_count",
2003 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2004 0 : &["task", "period"],
2005 0 : )
2006 0 : .expect("failed to define a metric")
2007 0 : });
2008 :
2009 : // walreceiver metrics
2010 :
2011 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2012 0 : register_int_counter!(
2013 0 : "pageserver_walreceiver_started_connections_total",
2014 0 : "Number of started walreceiver connections"
2015 0 : )
2016 0 : .expect("failed to define a metric")
2017 0 : });
2018 :
2019 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2020 0 : register_int_gauge!(
2021 0 : "pageserver_walreceiver_active_managers",
2022 0 : "Number of active walreceiver managers"
2023 0 : )
2024 0 : .expect("failed to define a metric")
2025 0 : });
2026 :
2027 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2028 0 : register_int_counter_vec!(
2029 0 : "pageserver_walreceiver_switches_total",
2030 0 : "Number of walreceiver manager change_connection calls",
2031 0 : &["reason"]
2032 0 : )
2033 0 : .expect("failed to define a metric")
2034 0 : });
2035 :
2036 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2037 0 : register_int_counter!(
2038 0 : "pageserver_walreceiver_broker_updates_total",
2039 0 : "Number of received broker updates in walreceiver"
2040 0 : )
2041 0 : .expect("failed to define a metric")
2042 0 : });
2043 :
2044 2 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2045 2 : register_int_counter_vec!(
2046 2 : "pageserver_walreceiver_candidates_events_total",
2047 2 : "Number of walreceiver candidate events",
2048 2 : &["event"]
2049 2 : )
2050 2 : .expect("failed to define a metric")
2051 2 : });
2052 :
2053 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2054 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2055 :
2056 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2057 2 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2058 :
2059 : // Metrics collected on WAL redo operations
2060 : //
2061 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2062 : // for access to the postgres process ('wait') since there is only one for
2063 : // each tenant.
2064 :
2065 : /// Time buckets are small because we want to be able to measure the
2066 : /// smallest redo processing times. These buckets allow us to measure down
2067 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2068 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2069 : ///
2070 : /// Values up to 1s are recorded because metrics show that we have redo
2071 : /// durations and lock times larger than 0.250s.
2072 : macro_rules! redo_histogram_time_buckets {
2073 : () => {
2074 : vec![
2075 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2076 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2077 : 1.000_000,
2078 : ]
2079 : };
2080 : }
2081 :
2082 : /// While we're at it, also measure the amount of records replayed in each
2083 : /// operation. We have a global 'total replayed' counter, but that's not
2084 : /// as useful as 'what is the skew for how many records we replay in one
2085 : /// operation'.
2086 : macro_rules! redo_histogram_count_buckets {
2087 : () => {
2088 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2089 : };
2090 : }
2091 :
2092 : macro_rules! redo_bytes_histogram_count_buckets {
2093 : () => {
2094 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2095 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2096 : vec![
2097 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2098 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2099 : ]
2100 : };
2101 : }
2102 :
2103 : pub(crate) struct WalIngestMetrics {
2104 : pub(crate) bytes_received: IntCounter,
2105 : pub(crate) records_received: IntCounter,
2106 : pub(crate) records_committed: IntCounter,
2107 : pub(crate) records_filtered: IntCounter,
2108 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2109 : }
2110 :
2111 10 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
2112 10 : bytes_received: register_int_counter!(
2113 10 : "pageserver_wal_ingest_bytes_received",
2114 10 : "Bytes of WAL ingested from safekeepers",
2115 10 : )
2116 10 : .unwrap(),
2117 10 : records_received: register_int_counter!(
2118 10 : "pageserver_wal_ingest_records_received",
2119 10 : "Number of WAL records received from safekeepers"
2120 10 : )
2121 10 : .expect("failed to define a metric"),
2122 10 : records_committed: register_int_counter!(
2123 10 : "pageserver_wal_ingest_records_committed",
2124 10 : "Number of WAL records which resulted in writes to pageserver storage"
2125 10 : )
2126 10 : .expect("failed to define a metric"),
2127 10 : records_filtered: register_int_counter!(
2128 10 : "pageserver_wal_ingest_records_filtered",
2129 10 : "Number of WAL records filtered out due to sharding"
2130 10 : )
2131 10 : .expect("failed to define a metric"),
2132 10 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2133 10 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2134 10 : "Total number of zero gap blocks written on relation extends"
2135 10 : )
2136 10 : .expect("failed to define a metric"),
2137 10 : });
2138 :
2139 6 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2140 6 : register_histogram!(
2141 6 : "pageserver_wal_redo_seconds",
2142 6 : "Time spent on WAL redo",
2143 6 : redo_histogram_time_buckets!()
2144 6 : )
2145 6 : .expect("failed to define a metric")
2146 6 : });
2147 :
2148 6 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2149 6 : register_histogram!(
2150 6 : "pageserver_wal_redo_records_histogram",
2151 6 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2152 6 : redo_histogram_count_buckets!(),
2153 6 : )
2154 6 : .expect("failed to define a metric")
2155 6 : });
2156 :
2157 6 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2158 6 : register_histogram!(
2159 6 : "pageserver_wal_redo_bytes_histogram",
2160 6 : "Histogram of number of records replayed per redo sent to Postgres",
2161 6 : redo_bytes_histogram_count_buckets!(),
2162 6 : )
2163 6 : .expect("failed to define a metric")
2164 6 : });
2165 :
2166 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2167 6 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2168 6 : register_int_counter!(
2169 6 : "pageserver_replayed_wal_records_total",
2170 6 : "Number of WAL records replayed in WAL redo process"
2171 6 : )
2172 6 : .unwrap()
2173 6 : });
2174 :
2175 : #[rustfmt::skip]
2176 8 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2177 8 : register_histogram!(
2178 8 : "pageserver_wal_redo_process_launch_duration",
2179 8 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2180 8 : vec![
2181 8 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2182 8 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2183 8 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2184 8 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2185 8 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2186 8 : ],
2187 8 : )
2188 8 : .expect("failed to define a metric")
2189 8 : });
2190 :
2191 : pub(crate) struct WalRedoProcessCounters {
2192 : pub(crate) started: IntCounter,
2193 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
2194 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2195 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2196 : }
2197 :
2198 24 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2199 : pub(crate) enum WalRedoKillCause {
2200 : WalRedoProcessDrop,
2201 : NoLeakChildDrop,
2202 : Startup,
2203 : }
2204 :
2205 : impl Default for WalRedoProcessCounters {
2206 8 : fn default() -> Self {
2207 8 : let started = register_int_counter!(
2208 8 : "pageserver_wal_redo_process_started_total",
2209 8 : "Number of WAL redo processes started",
2210 8 : )
2211 8 : .unwrap();
2212 8 :
2213 8 : let killed = register_int_counter_vec!(
2214 8 : "pageserver_wal_redo_process_stopped_total",
2215 8 : "Number of WAL redo processes stopped",
2216 8 : &["cause"],
2217 8 : )
2218 8 : .unwrap();
2219 8 :
2220 8 : let active_stderr_logger_tasks_started = register_int_counter!(
2221 8 : "pageserver_walredo_stderr_logger_tasks_started_total",
2222 8 : "Number of active walredo stderr logger tasks that have started",
2223 8 : )
2224 8 : .unwrap();
2225 8 :
2226 8 : let active_stderr_logger_tasks_finished = register_int_counter!(
2227 8 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2228 8 : "Number of active walredo stderr logger tasks that have finished",
2229 8 : )
2230 8 : .unwrap();
2231 8 :
2232 8 : Self {
2233 8 : started,
2234 24 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2235 24 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
2236 24 : let cause_str: &'static str = cause.into();
2237 24 : killed.with_label_values(&[cause_str])
2238 24 : })),
2239 8 : active_stderr_logger_tasks_started,
2240 8 : active_stderr_logger_tasks_finished,
2241 8 : }
2242 8 : }
2243 : }
2244 :
2245 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2246 : Lazy::new(WalRedoProcessCounters::default);
2247 :
2248 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2249 : pub(crate) struct StorageTimeMetricsTimer {
2250 : metrics: StorageTimeMetrics,
2251 : start: Instant,
2252 : }
2253 :
2254 : impl StorageTimeMetricsTimer {
2255 3373 : fn new(metrics: StorageTimeMetrics) -> Self {
2256 3373 : Self {
2257 3373 : metrics,
2258 3373 : start: Instant::now(),
2259 3373 : }
2260 3373 : }
2261 :
2262 : /// Record the time from creation to now.
2263 2238 : pub fn stop_and_record(self) {
2264 2238 : let duration = self.start.elapsed().as_secs_f64();
2265 2238 : self.metrics.timeline_sum.inc_by(duration);
2266 2238 : self.metrics.timeline_count.inc();
2267 2238 : self.metrics.global_histogram.observe(duration);
2268 2238 : }
2269 :
2270 : /// Turns this timer into a timer, which will always record -- usually this means recording
2271 : /// regardless an early `?` path was taken in a function.
2272 4 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2273 4 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2274 4 : }
2275 : }
2276 :
2277 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2278 :
2279 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2280 4 : fn drop(&mut self) {
2281 4 : if let Some(inner) = self.0.take() {
2282 4 : inner.stop_and_record();
2283 4 : }
2284 4 : }
2285 : }
2286 :
2287 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2288 : /// timeline total sum and count.
2289 : #[derive(Clone, Debug)]
2290 : pub(crate) struct StorageTimeMetrics {
2291 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2292 : timeline_sum: Counter,
2293 : /// Number of oeprations, per operation, tenant_id and timeline_id
2294 : timeline_count: IntCounter,
2295 : /// Global histogram having only the "operation" label.
2296 : global_histogram: Histogram,
2297 : }
2298 :
2299 : impl StorageTimeMetrics {
2300 3344 : pub fn new(
2301 3344 : operation: StorageTimeOperation,
2302 3344 : tenant_id: &str,
2303 3344 : shard_id: &str,
2304 3344 : timeline_id: &str,
2305 3344 : ) -> Self {
2306 3344 : let operation: &'static str = operation.into();
2307 3344 :
2308 3344 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2309 3344 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2310 3344 : .unwrap();
2311 3344 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2312 3344 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2313 3344 : .unwrap();
2314 3344 : let global_histogram = STORAGE_TIME_GLOBAL
2315 3344 : .get_metric_with_label_values(&[operation])
2316 3344 : .unwrap();
2317 3344 :
2318 3344 : StorageTimeMetrics {
2319 3344 : timeline_sum,
2320 3344 : timeline_count,
2321 3344 : global_histogram,
2322 3344 : }
2323 3344 : }
2324 :
2325 : /// Starts timing a new operation.
2326 : ///
2327 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2328 3373 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2329 3373 : StorageTimeMetricsTimer::new(self.clone())
2330 3373 : }
2331 : }
2332 :
2333 : #[derive(Debug)]
2334 : pub(crate) struct TimelineMetrics {
2335 : tenant_id: String,
2336 : shard_id: String,
2337 : timeline_id: String,
2338 : pub flush_time_histo: StorageTimeMetrics,
2339 : pub compact_time_histo: StorageTimeMetrics,
2340 : pub create_images_time_histo: StorageTimeMetrics,
2341 : pub logical_size_histo: StorageTimeMetrics,
2342 : pub imitate_logical_size_histo: StorageTimeMetrics,
2343 : pub load_layer_map_histo: StorageTimeMetrics,
2344 : pub garbage_collect_histo: StorageTimeMetrics,
2345 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2346 : pub last_record_gauge: IntGauge,
2347 : pub pitr_history_size: UIntGauge,
2348 : pub archival_size: UIntGauge,
2349 : pub(crate) layer_size_image: UIntGauge,
2350 : pub(crate) layer_count_image: UIntGauge,
2351 : pub(crate) layer_size_delta: UIntGauge,
2352 : pub(crate) layer_count_delta: UIntGauge,
2353 : pub standby_horizon_gauge: IntGauge,
2354 : pub resident_physical_size_gauge: UIntGauge,
2355 : pub visible_physical_size_gauge: UIntGauge,
2356 : /// copy of LayeredTimeline.current_logical_size
2357 : pub current_logical_size_gauge: UIntGauge,
2358 : pub aux_file_size_gauge: IntGauge,
2359 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2360 : pub evictions: IntCounter,
2361 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2362 : /// Number of valid LSN leases.
2363 : pub valid_lsn_lease_count_gauge: UIntGauge,
2364 : shutdown: std::sync::atomic::AtomicBool,
2365 : }
2366 :
2367 : impl TimelineMetrics {
2368 418 : pub fn new(
2369 418 : tenant_shard_id: &TenantShardId,
2370 418 : timeline_id_raw: &TimelineId,
2371 418 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2372 418 : ) -> Self {
2373 418 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2374 418 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2375 418 : let timeline_id = timeline_id_raw.to_string();
2376 418 : let flush_time_histo = StorageTimeMetrics::new(
2377 418 : StorageTimeOperation::LayerFlush,
2378 418 : &tenant_id,
2379 418 : &shard_id,
2380 418 : &timeline_id,
2381 418 : );
2382 418 : let compact_time_histo = StorageTimeMetrics::new(
2383 418 : StorageTimeOperation::Compact,
2384 418 : &tenant_id,
2385 418 : &shard_id,
2386 418 : &timeline_id,
2387 418 : );
2388 418 : let create_images_time_histo = StorageTimeMetrics::new(
2389 418 : StorageTimeOperation::CreateImages,
2390 418 : &tenant_id,
2391 418 : &shard_id,
2392 418 : &timeline_id,
2393 418 : );
2394 418 : let logical_size_histo = StorageTimeMetrics::new(
2395 418 : StorageTimeOperation::LogicalSize,
2396 418 : &tenant_id,
2397 418 : &shard_id,
2398 418 : &timeline_id,
2399 418 : );
2400 418 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2401 418 : StorageTimeOperation::ImitateLogicalSize,
2402 418 : &tenant_id,
2403 418 : &shard_id,
2404 418 : &timeline_id,
2405 418 : );
2406 418 : let load_layer_map_histo = StorageTimeMetrics::new(
2407 418 : StorageTimeOperation::LoadLayerMap,
2408 418 : &tenant_id,
2409 418 : &shard_id,
2410 418 : &timeline_id,
2411 418 : );
2412 418 : let garbage_collect_histo = StorageTimeMetrics::new(
2413 418 : StorageTimeOperation::Gc,
2414 418 : &tenant_id,
2415 418 : &shard_id,
2416 418 : &timeline_id,
2417 418 : );
2418 418 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2419 418 : StorageTimeOperation::FindGcCutoffs,
2420 418 : &tenant_id,
2421 418 : &shard_id,
2422 418 : &timeline_id,
2423 418 : );
2424 418 : let last_record_gauge = LAST_RECORD_LSN
2425 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2426 418 : .unwrap();
2427 418 :
2428 418 : let pitr_history_size = PITR_HISTORY_SIZE
2429 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2430 418 : .unwrap();
2431 418 :
2432 418 : let archival_size = TIMELINE_ARCHIVE_SIZE
2433 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2434 418 : .unwrap();
2435 418 :
2436 418 : let layer_size_image = TIMELINE_LAYER_SIZE
2437 418 : .get_metric_with_label_values(&[
2438 418 : &tenant_id,
2439 418 : &shard_id,
2440 418 : &timeline_id,
2441 418 : MetricLayerKind::Image.into(),
2442 418 : ])
2443 418 : .unwrap();
2444 418 :
2445 418 : let layer_count_image = TIMELINE_LAYER_COUNT
2446 418 : .get_metric_with_label_values(&[
2447 418 : &tenant_id,
2448 418 : &shard_id,
2449 418 : &timeline_id,
2450 418 : MetricLayerKind::Image.into(),
2451 418 : ])
2452 418 : .unwrap();
2453 418 :
2454 418 : let layer_size_delta = TIMELINE_LAYER_SIZE
2455 418 : .get_metric_with_label_values(&[
2456 418 : &tenant_id,
2457 418 : &shard_id,
2458 418 : &timeline_id,
2459 418 : MetricLayerKind::Delta.into(),
2460 418 : ])
2461 418 : .unwrap();
2462 418 :
2463 418 : let layer_count_delta = TIMELINE_LAYER_COUNT
2464 418 : .get_metric_with_label_values(&[
2465 418 : &tenant_id,
2466 418 : &shard_id,
2467 418 : &timeline_id,
2468 418 : MetricLayerKind::Delta.into(),
2469 418 : ])
2470 418 : .unwrap();
2471 418 :
2472 418 : let standby_horizon_gauge = STANDBY_HORIZON
2473 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2474 418 : .unwrap();
2475 418 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2476 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2477 418 : .unwrap();
2478 418 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
2479 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2480 418 : .unwrap();
2481 418 : // TODO: we shouldn't expose this metric
2482 418 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2483 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2484 418 : .unwrap();
2485 418 : let aux_file_size_gauge = AUX_FILE_SIZE
2486 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2487 418 : .unwrap();
2488 418 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2489 418 : let directory_entries_count_gauge_closure = {
2490 418 : let tenant_shard_id = *tenant_shard_id;
2491 418 : let timeline_id_raw = *timeline_id_raw;
2492 0 : move || {
2493 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2494 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2495 0 : let timeline_id = timeline_id_raw.to_string();
2496 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2497 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2498 0 : .unwrap();
2499 0 : gauge
2500 0 : }
2501 : };
2502 418 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2503 418 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2504 418 : let evictions = EVICTIONS
2505 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2506 418 : .unwrap();
2507 418 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2508 418 : .build(&tenant_id, &shard_id, &timeline_id);
2509 418 :
2510 418 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2511 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2512 418 : .unwrap();
2513 418 :
2514 418 : TimelineMetrics {
2515 418 : tenant_id,
2516 418 : shard_id,
2517 418 : timeline_id,
2518 418 : flush_time_histo,
2519 418 : compact_time_histo,
2520 418 : create_images_time_histo,
2521 418 : logical_size_histo,
2522 418 : imitate_logical_size_histo,
2523 418 : garbage_collect_histo,
2524 418 : find_gc_cutoffs_histo,
2525 418 : load_layer_map_histo,
2526 418 : last_record_gauge,
2527 418 : pitr_history_size,
2528 418 : archival_size,
2529 418 : layer_size_image,
2530 418 : layer_count_image,
2531 418 : layer_size_delta,
2532 418 : layer_count_delta,
2533 418 : standby_horizon_gauge,
2534 418 : resident_physical_size_gauge,
2535 418 : visible_physical_size_gauge,
2536 418 : current_logical_size_gauge,
2537 418 : aux_file_size_gauge,
2538 418 : directory_entries_count_gauge,
2539 418 : evictions,
2540 418 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2541 418 : evictions_with_low_residence_duration,
2542 418 : ),
2543 418 : valid_lsn_lease_count_gauge,
2544 418 : shutdown: std::sync::atomic::AtomicBool::default(),
2545 418 : }
2546 418 : }
2547 :
2548 1530 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2549 1530 : self.resident_physical_size_add(sz);
2550 1530 : }
2551 :
2552 490 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2553 490 : self.resident_physical_size_gauge.sub(sz);
2554 490 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2555 490 : }
2556 :
2557 1560 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2558 1560 : self.resident_physical_size_gauge.add(sz);
2559 1560 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2560 1560 : }
2561 :
2562 10 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2563 10 : self.resident_physical_size_gauge.get()
2564 10 : }
2565 :
2566 10 : pub(crate) fn shutdown(&self) {
2567 10 : let was_shutdown = self
2568 10 : .shutdown
2569 10 : .swap(true, std::sync::atomic::Ordering::Relaxed);
2570 10 :
2571 10 : if was_shutdown {
2572 : // this happens on tenant deletion because tenant first shuts down timelines, then
2573 : // invokes timeline deletion which first shuts down the timeline again.
2574 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
2575 0 : return;
2576 10 : }
2577 10 :
2578 10 : let tenant_id = &self.tenant_id;
2579 10 : let timeline_id = &self.timeline_id;
2580 10 : let shard_id = &self.shard_id;
2581 10 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2582 10 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2583 10 : {
2584 10 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2585 10 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2586 10 : }
2587 10 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2588 10 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2589 10 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2590 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2591 10 : }
2592 :
2593 10 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2594 10 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2595 10 :
2596 10 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2597 10 : tenant_id,
2598 10 : shard_id,
2599 10 : timeline_id,
2600 10 : MetricLayerKind::Image.into(),
2601 10 : ]);
2602 10 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2603 10 : tenant_id,
2604 10 : shard_id,
2605 10 : timeline_id,
2606 10 : MetricLayerKind::Image.into(),
2607 10 : ]);
2608 10 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2609 10 : tenant_id,
2610 10 : shard_id,
2611 10 : timeline_id,
2612 10 : MetricLayerKind::Delta.into(),
2613 10 : ]);
2614 10 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2615 10 : tenant_id,
2616 10 : shard_id,
2617 10 : timeline_id,
2618 10 : MetricLayerKind::Delta.into(),
2619 10 : ]);
2620 10 :
2621 10 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2622 10 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2623 10 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2624 10 :
2625 10 : self.evictions_with_low_residence_duration
2626 10 : .write()
2627 10 : .unwrap()
2628 10 : .remove(tenant_id, shard_id, timeline_id);
2629 :
2630 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2631 : // removed at the end of it. The idea is to have the metrics outlive the
2632 : // entity during which they're observed, e.g., the smgr metrics shall
2633 : // outlive an individual smgr connection, but not the timeline.
2634 :
2635 90 : for op in StorageTimeOperation::VARIANTS {
2636 80 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2637 80 : op,
2638 80 : tenant_id,
2639 80 : shard_id,
2640 80 : timeline_id,
2641 80 : ]);
2642 80 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2643 80 : op,
2644 80 : tenant_id,
2645 80 : shard_id,
2646 80 : timeline_id,
2647 80 : ]);
2648 80 : }
2649 :
2650 30 : for op in STORAGE_IO_SIZE_OPERATIONS {
2651 20 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2652 20 : }
2653 :
2654 10 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
2655 10 : SmgrQueryType::GetPageAtLsn.into(),
2656 10 : tenant_id,
2657 10 : shard_id,
2658 10 : timeline_id,
2659 10 : ]);
2660 10 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2661 10 : SmgrQueryType::GetPageAtLsn.into(),
2662 10 : tenant_id,
2663 10 : shard_id,
2664 10 : timeline_id,
2665 10 : ]);
2666 10 : }
2667 : }
2668 :
2669 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2670 6 : // Only shard zero deals in synthetic sizes
2671 6 : if tenant_shard_id.is_shard_zero() {
2672 6 : let tid = tenant_shard_id.tenant_id.to_string();
2673 6 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2674 6 : }
2675 :
2676 6 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
2677 6 :
2678 6 : // we leave the BROKEN_TENANTS_SET entry if any
2679 6 : }
2680 :
2681 : use futures::Future;
2682 : use pin_project_lite::pin_project;
2683 : use std::collections::HashMap;
2684 : use std::num::NonZeroUsize;
2685 : use std::pin::Pin;
2686 : use std::sync::atomic::AtomicU64;
2687 : use std::sync::{Arc, Mutex};
2688 : use std::task::{Context, Poll};
2689 : use std::time::{Duration, Instant};
2690 :
2691 : use crate::context::{PageContentKind, RequestContext};
2692 : use crate::task_mgr::TaskKind;
2693 : use crate::tenant::mgr::TenantSlot;
2694 : use crate::tenant::tasks::BackgroundLoopKind;
2695 :
2696 : /// Maintain a per timeline gauge in addition to the global gauge.
2697 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
2698 : last_set: AtomicU64,
2699 : gauge: UIntGauge,
2700 : }
2701 :
2702 : impl PerTimelineRemotePhysicalSizeGauge {
2703 428 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2704 428 : Self {
2705 428 : last_set: AtomicU64::new(0),
2706 428 : gauge: per_timeline_gauge,
2707 428 : }
2708 428 : }
2709 1827 : pub(crate) fn set(&self, sz: u64) {
2710 1827 : self.gauge.set(sz);
2711 1827 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
2712 1827 : if sz < prev {
2713 22 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
2714 1805 : } else {
2715 1805 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
2716 1805 : };
2717 1827 : }
2718 2 : pub(crate) fn get(&self) -> u64 {
2719 2 : self.gauge.get()
2720 2 : }
2721 : }
2722 :
2723 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2724 20 : fn drop(&mut self) {
2725 20 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
2726 20 : }
2727 : }
2728 :
2729 : pub(crate) struct RemoteTimelineClientMetrics {
2730 : tenant_id: String,
2731 : shard_id: String,
2732 : timeline_id: String,
2733 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
2734 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
2735 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2736 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2737 : }
2738 :
2739 : impl RemoteTimelineClientMetrics {
2740 428 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2741 428 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
2742 428 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
2743 428 : let timeline_id_str = timeline_id.to_string();
2744 428 :
2745 428 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
2746 428 : REMOTE_PHYSICAL_SIZE
2747 428 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
2748 428 : .unwrap(),
2749 428 : );
2750 428 :
2751 428 : RemoteTimelineClientMetrics {
2752 428 : tenant_id: tenant_id_str,
2753 428 : shard_id: shard_id_str,
2754 428 : timeline_id: timeline_id_str,
2755 428 : calls: Mutex::new(HashMap::default()),
2756 428 : bytes_started_counter: Mutex::new(HashMap::default()),
2757 428 : bytes_finished_counter: Mutex::new(HashMap::default()),
2758 428 : remote_physical_size_gauge,
2759 428 : }
2760 428 : }
2761 :
2762 2685 : pub fn remote_operation_time(
2763 2685 : &self,
2764 2685 : file_kind: &RemoteOpFileKind,
2765 2685 : op_kind: &RemoteOpKind,
2766 2685 : status: &'static str,
2767 2685 : ) -> Histogram {
2768 2685 : let key = (file_kind.as_str(), op_kind.as_str(), status);
2769 2685 : REMOTE_OPERATION_TIME
2770 2685 : .get_metric_with_label_values(&[key.0, key.1, key.2])
2771 2685 : .unwrap()
2772 2685 : }
2773 :
2774 6392 : fn calls_counter_pair(
2775 6392 : &self,
2776 6392 : file_kind: &RemoteOpFileKind,
2777 6392 : op_kind: &RemoteOpKind,
2778 6392 : ) -> IntCounterPair {
2779 6392 : let mut guard = self.calls.lock().unwrap();
2780 6392 : let key = (file_kind.as_str(), op_kind.as_str());
2781 6392 : let metric = guard.entry(key).or_insert_with(move || {
2782 753 : REMOTE_TIMELINE_CLIENT_CALLS
2783 753 : .get_metric_with_label_values(&[
2784 753 : &self.tenant_id,
2785 753 : &self.shard_id,
2786 753 : &self.timeline_id,
2787 753 : key.0,
2788 753 : key.1,
2789 753 : ])
2790 753 : .unwrap()
2791 6392 : });
2792 6392 : metric.clone()
2793 6392 : }
2794 :
2795 1538 : fn bytes_started_counter(
2796 1538 : &self,
2797 1538 : file_kind: &RemoteOpFileKind,
2798 1538 : op_kind: &RemoteOpKind,
2799 1538 : ) -> IntCounter {
2800 1538 : let mut guard = self.bytes_started_counter.lock().unwrap();
2801 1538 : let key = (file_kind.as_str(), op_kind.as_str());
2802 1538 : let metric = guard.entry(key).or_insert_with(move || {
2803 290 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
2804 290 : .get_metric_with_label_values(&[
2805 290 : &self.tenant_id,
2806 290 : &self.shard_id,
2807 290 : &self.timeline_id,
2808 290 : key.0,
2809 290 : key.1,
2810 290 : ])
2811 290 : .unwrap()
2812 1538 : });
2813 1538 : metric.clone()
2814 1538 : }
2815 :
2816 2788 : fn bytes_finished_counter(
2817 2788 : &self,
2818 2788 : file_kind: &RemoteOpFileKind,
2819 2788 : op_kind: &RemoteOpKind,
2820 2788 : ) -> IntCounter {
2821 2788 : let mut guard = self.bytes_finished_counter.lock().unwrap();
2822 2788 : let key = (file_kind.as_str(), op_kind.as_str());
2823 2788 : let metric = guard.entry(key).or_insert_with(move || {
2824 290 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
2825 290 : .get_metric_with_label_values(&[
2826 290 : &self.tenant_id,
2827 290 : &self.shard_id,
2828 290 : &self.timeline_id,
2829 290 : key.0,
2830 290 : key.1,
2831 290 : ])
2832 290 : .unwrap()
2833 2788 : });
2834 2788 : metric.clone()
2835 2788 : }
2836 : }
2837 :
2838 : #[cfg(test)]
2839 : impl RemoteTimelineClientMetrics {
2840 6 : pub fn get_bytes_started_counter_value(
2841 6 : &self,
2842 6 : file_kind: &RemoteOpFileKind,
2843 6 : op_kind: &RemoteOpKind,
2844 6 : ) -> Option<u64> {
2845 6 : let guard = self.bytes_started_counter.lock().unwrap();
2846 6 : let key = (file_kind.as_str(), op_kind.as_str());
2847 6 : guard.get(&key).map(|counter| counter.get())
2848 6 : }
2849 :
2850 6 : pub fn get_bytes_finished_counter_value(
2851 6 : &self,
2852 6 : file_kind: &RemoteOpFileKind,
2853 6 : op_kind: &RemoteOpKind,
2854 6 : ) -> Option<u64> {
2855 6 : let guard = self.bytes_finished_counter.lock().unwrap();
2856 6 : let key = (file_kind.as_str(), op_kind.as_str());
2857 6 : guard.get(&key).map(|counter| counter.get())
2858 6 : }
2859 : }
2860 :
2861 : /// See [`RemoteTimelineClientMetrics::call_begin`].
2862 : #[must_use]
2863 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
2864 : /// Decremented on drop.
2865 : calls_counter_pair: Option<IntCounterPair>,
2866 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
2867 : bytes_finished: Option<(IntCounter, u64)>,
2868 : }
2869 :
2870 : impl RemoteTimelineClientCallMetricGuard {
2871 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
2872 : /// The caller vouches to do the metric updates manually.
2873 3466 : pub fn will_decrement_manually(mut self) {
2874 3466 : let RemoteTimelineClientCallMetricGuard {
2875 3466 : calls_counter_pair,
2876 3466 : bytes_finished,
2877 3466 : } = &mut self;
2878 3466 : calls_counter_pair.take();
2879 3466 : bytes_finished.take();
2880 3466 : }
2881 : }
2882 :
2883 : impl Drop for RemoteTimelineClientCallMetricGuard {
2884 3492 : fn drop(&mut self) {
2885 3492 : let RemoteTimelineClientCallMetricGuard {
2886 3492 : calls_counter_pair,
2887 3492 : bytes_finished,
2888 3492 : } = self;
2889 3492 : if let Some(guard) = calls_counter_pair.take() {
2890 26 : guard.dec();
2891 3466 : }
2892 3492 : if let Some((bytes_finished_metric, value)) = bytes_finished {
2893 0 : bytes_finished_metric.inc_by(*value);
2894 3492 : }
2895 3492 : }
2896 : }
2897 :
2898 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
2899 : /// track the byte size of this call in applicable metric(s).
2900 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
2901 : /// Do not account for this call's byte size in any metrics.
2902 : /// The `reason` field is there to make the call sites self-documenting
2903 : /// about why they don't need the metric.
2904 : DontTrackSize { reason: &'static str },
2905 : /// Track the byte size of the call in applicable metric(s).
2906 : Bytes(u64),
2907 : }
2908 :
2909 : impl RemoteTimelineClientMetrics {
2910 : /// Update the metrics that change when a call to the remote timeline client instance starts.
2911 : ///
2912 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
2913 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
2914 : /// is more suitable.
2915 : /// Never do both.
2916 3492 : pub(crate) fn call_begin(
2917 3492 : &self,
2918 3492 : file_kind: &RemoteOpFileKind,
2919 3492 : op_kind: &RemoteOpKind,
2920 3492 : size: RemoteTimelineClientMetricsCallTrackSize,
2921 3492 : ) -> RemoteTimelineClientCallMetricGuard {
2922 3492 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2923 3492 : calls_counter_pair.inc();
2924 :
2925 3492 : let bytes_finished = match size {
2926 1954 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
2927 1954 : // nothing to do
2928 1954 : None
2929 : }
2930 1538 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2931 1538 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
2932 1538 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
2933 1538 : Some((finished_counter, size))
2934 : }
2935 : };
2936 3492 : RemoteTimelineClientCallMetricGuard {
2937 3492 : calls_counter_pair: Some(calls_counter_pair),
2938 3492 : bytes_finished,
2939 3492 : }
2940 3492 : }
2941 :
2942 : /// Manually udpate the metrics that track completions, instead of using the guard object.
2943 : /// Using the guard object is generally preferable.
2944 : /// See [`call_begin`](Self::call_begin) for more context.
2945 2900 : pub(crate) fn call_end(
2946 2900 : &self,
2947 2900 : file_kind: &RemoteOpFileKind,
2948 2900 : op_kind: &RemoteOpKind,
2949 2900 : size: RemoteTimelineClientMetricsCallTrackSize,
2950 2900 : ) {
2951 2900 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
2952 2900 : calls_counter_pair.dec();
2953 2900 : match size {
2954 1650 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
2955 1250 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
2956 1250 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
2957 1250 : }
2958 : }
2959 2900 : }
2960 : }
2961 :
2962 : impl Drop for RemoteTimelineClientMetrics {
2963 20 : fn drop(&mut self) {
2964 20 : let RemoteTimelineClientMetrics {
2965 20 : tenant_id,
2966 20 : shard_id,
2967 20 : timeline_id,
2968 20 : remote_physical_size_gauge,
2969 20 : calls,
2970 20 : bytes_started_counter,
2971 20 : bytes_finished_counter,
2972 20 : } = self;
2973 24 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
2974 24 : let mut res = [Ok(()), Ok(())];
2975 24 : REMOTE_TIMELINE_CLIENT_CALLS
2976 24 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
2977 24 : // don't care about results
2978 24 : }
2979 20 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
2980 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
2981 6 : tenant_id,
2982 6 : shard_id,
2983 6 : timeline_id,
2984 6 : a,
2985 6 : b,
2986 6 : ]);
2987 6 : }
2988 20 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
2989 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
2990 6 : tenant_id,
2991 6 : shard_id,
2992 6 : timeline_id,
2993 6 : a,
2994 6 : b,
2995 6 : ]);
2996 6 : }
2997 20 : {
2998 20 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
2999 20 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3000 20 : }
3001 20 : }
3002 : }
3003 :
3004 : /// Wrapper future that measures the time spent by a remote storage operation,
3005 : /// and records the time and success/failure as a prometheus metric.
3006 : pub(crate) trait MeasureRemoteOp: Sized {
3007 2876 : fn measure_remote_op(
3008 2876 : self,
3009 2876 : file_kind: RemoteOpFileKind,
3010 2876 : op: RemoteOpKind,
3011 2876 : metrics: Arc<RemoteTimelineClientMetrics>,
3012 2876 : ) -> MeasuredRemoteOp<Self> {
3013 2876 : let start = Instant::now();
3014 2876 : MeasuredRemoteOp {
3015 2876 : inner: self,
3016 2876 : file_kind,
3017 2876 : op,
3018 2876 : start,
3019 2876 : metrics,
3020 2876 : }
3021 2876 : }
3022 : }
3023 :
3024 : impl<T: Sized> MeasureRemoteOp for T {}
3025 :
3026 : pin_project! {
3027 : pub(crate) struct MeasuredRemoteOp<F>
3028 : {
3029 : #[pin]
3030 : inner: F,
3031 : file_kind: RemoteOpFileKind,
3032 : op: RemoteOpKind,
3033 : start: Instant,
3034 : metrics: Arc<RemoteTimelineClientMetrics>,
3035 : }
3036 : }
3037 :
3038 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
3039 : type Output = Result<O, E>;
3040 :
3041 46912 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
3042 46912 : let this = self.project();
3043 46912 : let poll_result = this.inner.poll(cx);
3044 46912 : if let Poll::Ready(ref res) = poll_result {
3045 2685 : let duration = this.start.elapsed();
3046 2685 : let status = if res.is_ok() { &"success" } else { &"failure" };
3047 2685 : this.metrics
3048 2685 : .remote_operation_time(this.file_kind, this.op, status)
3049 2685 : .observe(duration.as_secs_f64());
3050 44227 : }
3051 46912 : poll_result
3052 46912 : }
3053 : }
3054 :
3055 : pub mod tokio_epoll_uring {
3056 : use std::{
3057 : collections::HashMap,
3058 : sync::{Arc, Mutex},
3059 : };
3060 :
3061 : use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
3062 : use once_cell::sync::Lazy;
3063 :
3064 : /// Shared storage for tokio-epoll-uring thread local metrics.
3065 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3066 102 : Lazy::new(|| {
3067 102 : let slots_submission_queue_depth = register_histogram!(
3068 102 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3069 102 : "The slots waiters queue depth of each tokio_epoll_uring system",
3070 102 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
3071 102 : )
3072 102 : .expect("failed to define a metric");
3073 102 : ThreadLocalMetricsStorage {
3074 102 : observers: Mutex::new(HashMap::new()),
3075 102 : slots_submission_queue_depth,
3076 102 : }
3077 102 : });
3078 :
3079 : pub struct ThreadLocalMetricsStorage {
3080 : /// List of thread local metrics observers.
3081 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3082 : /// A histogram shared between all thread local systems
3083 : /// for collecting slots submission queue depth.
3084 : slots_submission_queue_depth: Histogram,
3085 : }
3086 :
3087 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3088 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3089 : ///
3090 : /// The System makes observations into [`Self`] and periodically, the collector
3091 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3092 : ///
3093 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3094 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3095 : /// for cache coherence protocol to get an exclusive cache line.
3096 : pub struct ThreadLocalMetrics {
3097 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3098 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3099 : }
3100 :
3101 : impl ThreadLocalMetricsStorage {
3102 : /// Registers a new thread local system. Returns a thread local metrics observer.
3103 391 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3104 391 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3105 391 : self.slots_submission_queue_depth.local(),
3106 391 : ));
3107 391 : let mut g = self.observers.lock().unwrap();
3108 391 : g.insert(id, Arc::clone(&per_system_metrics));
3109 391 : per_system_metrics
3110 391 : }
3111 :
3112 : /// Removes metrics observer for a thread local system.
3113 : /// This should be called before dropping a thread local system.
3114 102 : pub fn remove_system(&self, id: u64) {
3115 102 : let mut g = self.observers.lock().unwrap();
3116 102 : g.remove(&id);
3117 102 : }
3118 :
3119 : /// Flush all thread local metrics to the shared storage.
3120 0 : pub fn flush_thread_local_metrics(&self) {
3121 0 : let g = self.observers.lock().unwrap();
3122 0 : g.values().for_each(|local| {
3123 0 : local.flush();
3124 0 : });
3125 0 : }
3126 : }
3127 :
3128 : impl ThreadLocalMetrics {
3129 391 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3130 391 : ThreadLocalMetrics {
3131 391 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3132 391 : }
3133 391 : }
3134 :
3135 : /// Flushes the thread local metrics to shared aggregator.
3136 0 : pub fn flush(&self) {
3137 0 : let Self {
3138 0 : slots_submission_queue_depth,
3139 0 : } = self;
3140 0 : slots_submission_queue_depth.lock().unwrap().flush();
3141 0 : }
3142 : }
3143 :
3144 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3145 1047930 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3146 1047930 : let Self {
3147 1047930 : slots_submission_queue_depth,
3148 1047930 : } = self;
3149 1047930 : slots_submission_queue_depth
3150 1047930 : .lock()
3151 1047930 : .unwrap()
3152 1047930 : .observe(queue_depth as f64);
3153 1047930 : }
3154 : }
3155 :
3156 : pub struct Collector {
3157 : descs: Vec<metrics::core::Desc>,
3158 : systems_created: UIntGauge,
3159 : systems_destroyed: UIntGauge,
3160 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3161 : }
3162 :
3163 : impl metrics::core::Collector for Collector {
3164 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3165 0 : self.descs.iter().collect()
3166 0 : }
3167 :
3168 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3169 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3170 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3171 0 : systems_created,
3172 0 : systems_destroyed,
3173 0 : } = tokio_epoll_uring::metrics::global();
3174 0 : self.systems_created.set(systems_created);
3175 0 : mfs.extend(self.systems_created.collect());
3176 0 : self.systems_destroyed.set(systems_destroyed);
3177 0 : mfs.extend(self.systems_destroyed.collect());
3178 0 :
3179 0 : self.thread_local_metrics_storage
3180 0 : .flush_thread_local_metrics();
3181 0 :
3182 0 : mfs.extend(
3183 0 : self.thread_local_metrics_storage
3184 0 : .slots_submission_queue_depth
3185 0 : .collect(),
3186 0 : );
3187 0 : mfs
3188 0 : }
3189 : }
3190 :
3191 : impl Collector {
3192 : const NMETRICS: usize = 3;
3193 :
3194 : #[allow(clippy::new_without_default)]
3195 0 : pub fn new() -> Self {
3196 0 : let mut descs = Vec::new();
3197 0 :
3198 0 : let systems_created = UIntGauge::new(
3199 0 : "pageserver_tokio_epoll_uring_systems_created",
3200 0 : "counter of tokio-epoll-uring systems that were created",
3201 0 : )
3202 0 : .unwrap();
3203 0 : descs.extend(
3204 0 : metrics::core::Collector::desc(&systems_created)
3205 0 : .into_iter()
3206 0 : .cloned(),
3207 0 : );
3208 0 :
3209 0 : let systems_destroyed = UIntGauge::new(
3210 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
3211 0 : "counter of tokio-epoll-uring systems that were destroyed",
3212 0 : )
3213 0 : .unwrap();
3214 0 : descs.extend(
3215 0 : metrics::core::Collector::desc(&systems_destroyed)
3216 0 : .into_iter()
3217 0 : .cloned(),
3218 0 : );
3219 0 :
3220 0 : Self {
3221 0 : descs,
3222 0 : systems_created,
3223 0 : systems_destroyed,
3224 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
3225 0 : }
3226 0 : }
3227 : }
3228 :
3229 102 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3230 102 : register_int_counter!(
3231 102 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
3232 102 : "Number of times where thread_local_system creation spanned multiple executor threads",
3233 102 : )
3234 102 : .unwrap()
3235 102 : });
3236 :
3237 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3238 0 : register_int_counter!(
3239 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
3240 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
3241 0 : )
3242 0 : .unwrap()
3243 0 : });
3244 : }
3245 :
3246 : pub(crate) mod tenant_throttling {
3247 : use metrics::{register_int_counter_vec, IntCounter};
3248 : use once_cell::sync::Lazy;
3249 : use utils::shard::TenantShardId;
3250 :
3251 : use crate::tenant::{self, throttle::Metric};
3252 :
3253 : struct GlobalAndPerTenantIntCounter {
3254 : global: IntCounter,
3255 : per_tenant: IntCounter,
3256 : }
3257 :
3258 : impl GlobalAndPerTenantIntCounter {
3259 : #[inline(always)]
3260 0 : pub(crate) fn inc(&self) {
3261 0 : self.inc_by(1)
3262 0 : }
3263 : #[inline(always)]
3264 0 : pub(crate) fn inc_by(&self, n: u64) {
3265 0 : self.global.inc_by(n);
3266 0 : self.per_tenant.inc_by(n);
3267 0 : }
3268 : }
3269 :
3270 : pub(crate) struct TimelineGet {
3271 : count_accounted_start: GlobalAndPerTenantIntCounter,
3272 : count_accounted_finish: GlobalAndPerTenantIntCounter,
3273 : wait_time: GlobalAndPerTenantIntCounter,
3274 : count_throttled: GlobalAndPerTenantIntCounter,
3275 : }
3276 :
3277 174 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3278 174 : register_int_counter_vec!(
3279 174 : "pageserver_tenant_throttling_count_accounted_start_global",
3280 174 : "Count of tenant throttling starts, by kind of throttle.",
3281 174 : &["kind"]
3282 174 : )
3283 174 : .unwrap()
3284 174 : });
3285 174 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3286 174 : register_int_counter_vec!(
3287 174 : "pageserver_tenant_throttling_count_accounted_start",
3288 174 : "Count of tenant throttling starts, by kind of throttle.",
3289 174 : &["kind", "tenant_id", "shard_id"]
3290 174 : )
3291 174 : .unwrap()
3292 174 : });
3293 174 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3294 174 : register_int_counter_vec!(
3295 174 : "pageserver_tenant_throttling_count_accounted_finish_global",
3296 174 : "Count of tenant throttling finishes, by kind of throttle.",
3297 174 : &["kind"]
3298 174 : )
3299 174 : .unwrap()
3300 174 : });
3301 174 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3302 174 : register_int_counter_vec!(
3303 174 : "pageserver_tenant_throttling_count_accounted_finish",
3304 174 : "Count of tenant throttling finishes, by kind of throttle.",
3305 174 : &["kind", "tenant_id", "shard_id"]
3306 174 : )
3307 174 : .unwrap()
3308 174 : });
3309 174 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3310 174 : register_int_counter_vec!(
3311 174 : "pageserver_tenant_throttling_wait_usecs_sum_global",
3312 174 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3313 174 : &["kind"]
3314 174 : )
3315 174 : .unwrap()
3316 174 : });
3317 174 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3318 174 : register_int_counter_vec!(
3319 174 : "pageserver_tenant_throttling_wait_usecs_sum",
3320 174 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3321 174 : &["kind", "tenant_id", "shard_id"]
3322 174 : )
3323 174 : .unwrap()
3324 174 : });
3325 :
3326 174 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3327 174 : register_int_counter_vec!(
3328 174 : "pageserver_tenant_throttling_count_global",
3329 174 : "Count of tenant throttlings, by kind of throttle.",
3330 174 : &["kind"]
3331 174 : )
3332 174 : .unwrap()
3333 174 : });
3334 174 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3335 174 : register_int_counter_vec!(
3336 174 : "pageserver_tenant_throttling_count",
3337 174 : "Count of tenant throttlings, by kind of throttle.",
3338 174 : &["kind", "tenant_id", "shard_id"]
3339 174 : )
3340 174 : .unwrap()
3341 174 : });
3342 :
3343 : const KIND: &str = "timeline_get";
3344 :
3345 : impl TimelineGet {
3346 192 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
3347 192 : let per_tenant_label_values = &[
3348 192 : KIND,
3349 192 : &tenant_shard_id.tenant_id.to_string(),
3350 192 : &tenant_shard_id.shard_slug().to_string(),
3351 192 : ];
3352 192 : TimelineGet {
3353 192 : count_accounted_start: {
3354 192 : GlobalAndPerTenantIntCounter {
3355 192 : global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
3356 192 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
3357 192 : .with_label_values(per_tenant_label_values),
3358 192 : }
3359 192 : },
3360 192 : count_accounted_finish: {
3361 192 : GlobalAndPerTenantIntCounter {
3362 192 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
3363 192 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
3364 192 : .with_label_values(per_tenant_label_values),
3365 192 : }
3366 192 : },
3367 192 : wait_time: {
3368 192 : GlobalAndPerTenantIntCounter {
3369 192 : global: WAIT_USECS.with_label_values(&[KIND]),
3370 192 : per_tenant: WAIT_USECS_PER_TENANT
3371 192 : .with_label_values(per_tenant_label_values),
3372 192 : }
3373 192 : },
3374 192 : count_throttled: {
3375 192 : GlobalAndPerTenantIntCounter {
3376 192 : global: WAIT_COUNT.with_label_values(&[KIND]),
3377 192 : per_tenant: WAIT_COUNT_PER_TENANT
3378 192 : .with_label_values(per_tenant_label_values),
3379 192 : }
3380 192 : },
3381 192 : }
3382 192 : }
3383 : }
3384 :
3385 0 : pub(crate) fn preinitialize_global_metrics() {
3386 0 : Lazy::force(&COUNT_ACCOUNTED_START);
3387 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
3388 0 : Lazy::force(&WAIT_USECS);
3389 0 : Lazy::force(&WAIT_COUNT);
3390 0 : }
3391 :
3392 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3393 24 : for m in &[
3394 6 : &COUNT_ACCOUNTED_START_PER_TENANT,
3395 6 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
3396 6 : &WAIT_USECS_PER_TENANT,
3397 6 : &WAIT_COUNT_PER_TENANT,
3398 24 : ] {
3399 24 : let _ = m.remove_label_values(&[
3400 24 : KIND,
3401 24 : &tenant_shard_id.tenant_id.to_string(),
3402 24 : &tenant_shard_id.shard_slug().to_string(),
3403 24 : ]);
3404 24 : }
3405 6 : }
3406 :
3407 : impl Metric for TimelineGet {
3408 : #[inline(always)]
3409 0 : fn accounting_start(&self) {
3410 0 : self.count_accounted_start.inc();
3411 0 : }
3412 : #[inline(always)]
3413 0 : fn accounting_finish(&self) {
3414 0 : self.count_accounted_finish.inc();
3415 0 : }
3416 : #[inline(always)]
3417 0 : fn observe_throttling(
3418 0 : &self,
3419 0 : tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
3420 0 : ) {
3421 0 : let val = u64::try_from(wait_time.as_micros()).unwrap();
3422 0 : self.wait_time.inc_by(val);
3423 0 : self.count_throttled.inc();
3424 0 : }
3425 : }
3426 : }
3427 :
3428 : pub(crate) mod disk_usage_based_eviction {
3429 : use super::*;
3430 :
3431 : pub(crate) struct Metrics {
3432 : pub(crate) tenant_collection_time: Histogram,
3433 : pub(crate) tenant_layer_count: Histogram,
3434 : pub(crate) layers_collected: IntCounter,
3435 : pub(crate) layers_selected: IntCounter,
3436 : pub(crate) layers_evicted: IntCounter,
3437 : }
3438 :
3439 : impl Default for Metrics {
3440 0 : fn default() -> Self {
3441 0 : let tenant_collection_time = register_histogram!(
3442 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
3443 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
3444 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
3445 0 : )
3446 0 : .unwrap();
3447 0 :
3448 0 : let tenant_layer_count = register_histogram!(
3449 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
3450 0 : "Amount of layers gathered from a tenant",
3451 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
3452 0 : )
3453 0 : .unwrap();
3454 0 :
3455 0 : let layers_collected = register_int_counter!(
3456 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
3457 0 : "Amount of layers collected"
3458 0 : )
3459 0 : .unwrap();
3460 0 :
3461 0 : let layers_selected = register_int_counter!(
3462 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
3463 0 : "Amount of layers selected"
3464 0 : )
3465 0 : .unwrap();
3466 0 :
3467 0 : let layers_evicted = register_int_counter!(
3468 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
3469 0 : "Amount of layers successfully evicted"
3470 0 : )
3471 0 : .unwrap();
3472 0 :
3473 0 : Self {
3474 0 : tenant_collection_time,
3475 0 : tenant_layer_count,
3476 0 : layers_collected,
3477 0 : layers_selected,
3478 0 : layers_evicted,
3479 0 : }
3480 0 : }
3481 : }
3482 :
3483 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
3484 : }
3485 :
3486 168 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
3487 168 : register_uint_gauge_vec!(
3488 168 : "pageserver_tokio_executor_thread_configured_count",
3489 168 : "Total number of configued tokio executor threads in the process.
3490 168 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
3491 168 : &["setup"],
3492 168 : )
3493 168 : .unwrap()
3494 168 : });
3495 :
3496 168 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
3497 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
3498 168 : let _guard = SERIALIZE.lock().unwrap();
3499 168 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
3500 168 : TOKIO_EXECUTOR_THREAD_COUNT
3501 168 : .get_metric_with_label_values(&[setup])
3502 168 : .unwrap()
3503 168 : .set(u64::try_from(num_threads.get()).unwrap());
3504 168 : }
3505 :
3506 0 : pub fn preinitialize_metrics() {
3507 0 : // Python tests need these and on some we do alerting.
3508 0 : //
3509 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
3510 0 : // order:
3511 0 : // - global metrics reside in a Lazy<PageserverMetrics>
3512 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
3513 0 : // - could move the statics into TimelineMetrics::new()?
3514 0 :
3515 0 : // counters
3516 0 : [
3517 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
3518 0 : &WALRECEIVER_STARTED_CONNECTIONS,
3519 0 : &WALRECEIVER_BROKER_UPDATES,
3520 0 : &WALRECEIVER_CANDIDATES_ADDED,
3521 0 : &WALRECEIVER_CANDIDATES_REMOVED,
3522 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
3523 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
3524 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
3525 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
3526 0 : &CIRCUIT_BREAKERS_BROKEN,
3527 0 : &CIRCUIT_BREAKERS_UNBROKEN,
3528 0 : ]
3529 0 : .into_iter()
3530 0 : .for_each(|c| {
3531 0 : Lazy::force(c);
3532 0 : });
3533 0 :
3534 0 : // Deletion queue stats
3535 0 : Lazy::force(&DELETION_QUEUE);
3536 0 :
3537 0 : // Tenant stats
3538 0 : Lazy::force(&TENANT);
3539 0 :
3540 0 : // Tenant manager stats
3541 0 : Lazy::force(&TENANT_MANAGER);
3542 0 :
3543 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
3544 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
3545 :
3546 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
3547 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
3548 0 : // values from last restart.
3549 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
3550 0 : }
3551 :
3552 : // countervecs
3553 0 : [
3554 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
3555 0 : &SMGR_QUERY_STARTED_GLOBAL,
3556 0 : ]
3557 0 : .into_iter()
3558 0 : .for_each(|c| {
3559 0 : Lazy::force(c);
3560 0 : });
3561 0 :
3562 0 : // gauges
3563 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
3564 0 :
3565 0 : // histograms
3566 0 : [
3567 0 : &READ_NUM_LAYERS_VISITED,
3568 0 : &VEC_READ_NUM_LAYERS_VISITED,
3569 0 : &WAIT_LSN_TIME,
3570 0 : &WAL_REDO_TIME,
3571 0 : &WAL_REDO_RECORDS_HISTOGRAM,
3572 0 : &WAL_REDO_BYTES_HISTOGRAM,
3573 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
3574 0 : ]
3575 0 : .into_iter()
3576 0 : .for_each(|h| {
3577 0 : Lazy::force(h);
3578 0 : });
3579 0 :
3580 0 : // Custom
3581 0 : Lazy::force(&RECONSTRUCT_TIME);
3582 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
3583 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
3584 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
3585 0 :
3586 0 : tenant_throttling::preinitialize_global_metrics();
3587 0 : }
|