Line data Source code
1 : use std::collections::HashMap;
2 : use std::num::NonZeroUsize;
3 : use std::os::fd::RawFd;
4 : use std::sync::atomic::AtomicU64;
5 : use std::sync::{Arc, Mutex};
6 : use std::time::{Duration, Instant};
7 :
8 : use enum_map::{Enum as _, EnumMap};
9 : use futures::Future;
10 : use metrics::{
11 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
12 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
13 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
14 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
15 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
16 : };
17 : use once_cell::sync::Lazy;
18 : use pageserver_api::config::{
19 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
20 : PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
21 : };
22 : use pageserver_api::models::InMemoryLayerInfo;
23 : use pageserver_api::shard::TenantShardId;
24 : use postgres_backend::{QueryError, is_expected_io_error};
25 : use pq_proto::framed::ConnectionError;
26 : use strum::{EnumCount, IntoEnumIterator as _, VariantNames};
27 : use strum_macros::{IntoStaticStr, VariantNames};
28 : use utils::id::TimelineId;
29 :
30 : use crate::config;
31 : use crate::config::PageServerConf;
32 : use crate::context::{PageContentKind, RequestContext};
33 : use crate::pgdatadir_mapping::DatadirModificationStats;
34 : use crate::task_mgr::TaskKind;
35 : use crate::tenant::Timeline;
36 : use crate::tenant::layer_map::LayerMap;
37 : use crate::tenant::mgr::TenantSlot;
38 : use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc};
39 : use crate::tenant::tasks::BackgroundLoopKind;
40 : use crate::tenant::throttle::ThrottleResult;
41 :
42 : /// Prometheus histogram buckets (in seconds) for operations in the critical
43 : /// path. In other words, operations that directly affect that latency of user
44 : /// queries.
45 : ///
46 : /// The buckets capture the majority of latencies in the microsecond and
47 : /// millisecond range but also extend far enough up to distinguish "bad" from
48 : /// "really bad".
49 : const CRITICAL_OP_BUCKETS: &[f64] = &[
50 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
51 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
52 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
53 : ];
54 :
55 : // Metrics collected on operations on the storage repository.
56 : #[derive(Debug, VariantNames, IntoStaticStr)]
57 : #[strum(serialize_all = "kebab_case")]
58 : pub(crate) enum StorageTimeOperation {
59 : #[strum(serialize = "layer flush")]
60 : LayerFlush,
61 :
62 : #[strum(serialize = "layer flush delay")]
63 : LayerFlushDelay,
64 :
65 : #[strum(serialize = "compact")]
66 : Compact,
67 :
68 : #[strum(serialize = "create images")]
69 : CreateImages,
70 :
71 : #[strum(serialize = "logical size")]
72 : LogicalSize,
73 :
74 : #[strum(serialize = "imitate logical size")]
75 : ImitateLogicalSize,
76 :
77 : #[strum(serialize = "load layer map")]
78 : LoadLayerMap,
79 :
80 : #[strum(serialize = "gc")]
81 : Gc,
82 :
83 : #[strum(serialize = "find gc cutoffs")]
84 : FindGcCutoffs,
85 : }
86 :
87 1272 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
88 1272 : register_counter_vec!(
89 1272 : "pageserver_storage_operations_seconds_sum",
90 1272 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
91 1272 : &["operation", "tenant_id", "shard_id", "timeline_id"],
92 1272 : )
93 1272 : .expect("failed to define a metric")
94 1272 : });
95 :
96 1272 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
97 1272 : register_int_counter_vec!(
98 1272 : "pageserver_storage_operations_seconds_count",
99 1272 : "Count of storage operations with operation, tenant and timeline dimensions",
100 1272 : &["operation", "tenant_id", "shard_id", "timeline_id"],
101 1272 : )
102 1272 : .expect("failed to define a metric")
103 1272 : });
104 :
105 : // Buckets for background operation duration in seconds, like compaction, GC, size calculation.
106 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
107 :
108 1272 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
109 1272 : register_histogram_vec!(
110 1272 : "pageserver_storage_operations_seconds_global",
111 1272 : "Time spent on storage operations",
112 1272 : &["operation"],
113 1272 : STORAGE_OP_BUCKETS.into(),
114 1272 : )
115 1272 : .expect("failed to define a metric")
116 1272 : });
117 :
118 : /// Measures layers visited per read (i.e. read amplification).
119 : ///
120 : /// NB: for a batch, we count all visited layers towards each read. While the cost of layer visits
121 : /// are amortized across the batch, and some layers may not intersect with a given key, each visited
122 : /// layer contributes directly to the observed latency for every read in the batch, which is what we
123 : /// care about.
124 1272 : pub(crate) static LAYERS_PER_READ: Lazy<HistogramVec> = Lazy::new(|| {
125 1272 : register_histogram_vec!(
126 1272 : "pageserver_layers_per_read",
127 1272 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
128 1272 : &["tenant_id", "shard_id", "timeline_id"],
129 1272 : // Low resolution to reduce cardinality.
130 1272 : vec![4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
131 1272 : )
132 1272 : .expect("failed to define a metric")
133 1272 : });
134 :
135 1248 : pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
136 1248 : register_histogram!(
137 1248 : "pageserver_layers_per_read_global",
138 1248 : "Layers visited to serve a single read (read amplification). In a batch, all visited layers count towards every read.",
139 1248 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
140 1248 : )
141 1248 : .expect("failed to define a metric")
142 1248 : });
143 :
144 1248 : pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
145 1248 : register_histogram!(
146 1248 : "pageserver_layers_per_read_batch_global",
147 1248 : "Layers visited to serve a single read batch (read amplification), regardless of number of reads.",
148 1248 : vec![
149 1248 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
150 1248 : ],
151 1248 : )
152 1248 : .expect("failed to define a metric")
153 1248 : });
154 :
155 1248 : pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
156 1248 : register_histogram!(
157 1248 : "pageserver_layers_per_read_amortized_global",
158 1248 : "Layers visited to serve a single read (read amplification). Amortized across a batch: \
159 1248 : all visited layers are divided by number of reads.",
160 1248 : vec![
161 1248 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
162 1248 : ],
163 1248 : )
164 1248 : .expect("failed to define a metric")
165 1248 : });
166 :
167 1248 : pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
168 1248 : // We expect this to be low because of Postgres checkpoints. Let's see if that holds.
169 1248 : register_histogram!(
170 1248 : "pageserver_deltas_per_read_global",
171 1248 : "Number of delta pages applied to image page per read",
172 1248 : vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0],
173 1248 : )
174 1248 : .expect("failed to define a metric")
175 1248 : });
176 :
177 0 : pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
178 0 : register_uint_gauge!(
179 0 : "pageserver_concurrent_initdb",
180 0 : "Number of initdb processes running"
181 0 : )
182 0 : .expect("failed to define a metric")
183 0 : });
184 :
185 0 : pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
186 0 : register_histogram!(
187 0 : "pageserver_initdb_semaphore_seconds_global",
188 0 : "Time spent getting a permit from the global initdb semaphore",
189 0 : STORAGE_OP_BUCKETS.into()
190 0 : )
191 0 : .expect("failed to define metric")
192 0 : });
193 :
194 0 : pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
195 0 : register_histogram!(
196 0 : "pageserver_initdb_seconds_global",
197 0 : "Time spent performing initdb",
198 0 : STORAGE_OP_BUCKETS.into()
199 0 : )
200 0 : .expect("failed to define metric")
201 0 : });
202 :
203 : pub(crate) struct GetVectoredLatency {
204 : map: EnumMap<TaskKind, Option<Histogram>>,
205 : }
206 :
207 : #[allow(dead_code)]
208 : pub(crate) struct ScanLatency {
209 : map: EnumMap<TaskKind, Option<Histogram>>,
210 : }
211 :
212 : impl GetVectoredLatency {
213 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
214 : // cardinality of the metric.
215 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
216 :
217 130512 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
218 130512 : self.map[task_kind].as_ref()
219 130512 : }
220 : }
221 :
222 : impl ScanLatency {
223 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
224 : // cardinality of the metric.
225 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
226 :
227 72 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
228 72 : self.map[task_kind].as_ref()
229 72 : }
230 : }
231 :
232 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
233 : parent: &'a Histogram,
234 : start: std::time::Instant,
235 : }
236 :
237 : impl<'a> ScanLatencyOngoingRecording<'a> {
238 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
239 0 : let start = Instant::now();
240 0 : ScanLatencyOngoingRecording { parent, start }
241 0 : }
242 :
243 0 : pub(crate) fn observe(self) {
244 0 : let elapsed = self.start.elapsed();
245 0 : self.parent.observe(elapsed.as_secs_f64());
246 0 : }
247 : }
248 :
249 1224 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
250 1224 : let inner = register_histogram_vec!(
251 1224 : "pageserver_get_vectored_seconds",
252 1224 : "Time spent in get_vectored.",
253 1224 : &["task_kind"],
254 1224 : CRITICAL_OP_BUCKETS.into(),
255 1224 : )
256 1224 : .expect("failed to define a metric");
257 1224 :
258 1224 : GetVectoredLatency {
259 37944 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
260 37944 : let task_kind = TaskKind::from_usize(task_kind_idx);
261 37944 :
262 37944 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
263 2448 : let task_kind = task_kind.into();
264 2448 : Some(inner.with_label_values(&[task_kind]))
265 : } else {
266 35496 : None
267 : }
268 37944 : })),
269 1224 : }
270 1224 : });
271 :
272 24 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
273 24 : let inner = register_histogram_vec!(
274 24 : "pageserver_scan_seconds",
275 24 : "Time spent in scan.",
276 24 : &["task_kind"],
277 24 : CRITICAL_OP_BUCKETS.into(),
278 24 : )
279 24 : .expect("failed to define a metric");
280 24 :
281 24 : ScanLatency {
282 744 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
283 744 : let task_kind = TaskKind::from_usize(task_kind_idx);
284 744 :
285 744 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
286 24 : let task_kind = task_kind.into();
287 24 : Some(inner.with_label_values(&[task_kind]))
288 : } else {
289 720 : None
290 : }
291 744 : })),
292 24 : }
293 24 : });
294 :
295 : pub(crate) struct PageCacheMetricsForTaskKind {
296 : pub read_accesses_immutable: IntCounter,
297 : pub read_hits_immutable: IntCounter,
298 : }
299 :
300 : pub(crate) struct PageCacheMetrics {
301 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
302 : }
303 :
304 600 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
305 600 : register_int_counter_vec!(
306 600 : "pageserver_page_cache_read_hits_total",
307 600 : "Number of read accesses to the page cache that hit",
308 600 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
309 600 : )
310 600 : .expect("failed to define a metric")
311 600 : });
312 :
313 600 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
314 600 : register_int_counter_vec!(
315 600 : "pageserver_page_cache_read_accesses_total",
316 600 : "Number of read accesses to the page cache",
317 600 : &["task_kind", "key_kind", "content_kind"]
318 600 : )
319 600 : .expect("failed to define a metric")
320 600 : });
321 :
322 600 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
323 18600 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
324 18600 : let task_kind = TaskKind::from_usize(task_kind);
325 18600 : let task_kind: &'static str = task_kind.into();
326 148800 : EnumMap::from_array(std::array::from_fn(|content_kind| {
327 148800 : let content_kind = PageContentKind::from_usize(content_kind);
328 148800 : let content_kind: &'static str = content_kind.into();
329 148800 : PageCacheMetricsForTaskKind {
330 148800 : read_accesses_immutable: {
331 148800 : PAGE_CACHE_READ_ACCESSES
332 148800 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
333 148800 : .unwrap()
334 148800 : },
335 148800 :
336 148800 : read_hits_immutable: {
337 148800 : PAGE_CACHE_READ_HITS
338 148800 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
339 148800 : .unwrap()
340 148800 : },
341 148800 : }
342 148800 : }))
343 18600 : })),
344 600 : });
345 :
346 : impl PageCacheMetrics {
347 6769912 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
348 6769912 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
349 6769912 : }
350 : }
351 :
352 : pub(crate) struct PageCacheSizeMetrics {
353 : pub max_bytes: UIntGauge,
354 :
355 : pub current_bytes_immutable: UIntGauge,
356 : }
357 :
358 600 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
359 600 : register_uint_gauge_vec!(
360 600 : "pageserver_page_cache_size_current_bytes",
361 600 : "Current size of the page cache in bytes, by key kind",
362 600 : &["key_kind"]
363 600 : )
364 600 : .expect("failed to define a metric")
365 600 : });
366 :
367 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
368 600 : Lazy::new(|| PageCacheSizeMetrics {
369 600 : max_bytes: {
370 600 : register_uint_gauge!(
371 600 : "pageserver_page_cache_size_max_bytes",
372 600 : "Maximum size of the page cache in bytes"
373 600 : )
374 600 : .expect("failed to define a metric")
375 600 : },
376 600 : current_bytes_immutable: {
377 600 : PAGE_CACHE_SIZE_CURRENT_BYTES
378 600 : .get_metric_with_label_values(&["immutable"])
379 600 : .unwrap()
380 600 : },
381 600 : });
382 :
383 : pub(crate) mod page_cache_eviction_metrics {
384 : use std::num::NonZeroUsize;
385 :
386 : use metrics::{IntCounter, IntCounterVec, register_int_counter_vec};
387 : use once_cell::sync::Lazy;
388 :
389 : #[derive(Clone, Copy)]
390 : pub(crate) enum Outcome {
391 : FoundSlotUnused { iters: NonZeroUsize },
392 : FoundSlotEvicted { iters: NonZeroUsize },
393 : ItersExceeded { iters: NonZeroUsize },
394 : }
395 :
396 600 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
397 600 : register_int_counter_vec!(
398 600 : "pageserver_page_cache_find_victim_iters_total",
399 600 : "Counter for the number of iterations in the find_victim loop",
400 600 : &["outcome"],
401 600 : )
402 600 : .expect("failed to define a metric")
403 600 : });
404 :
405 600 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
406 600 : register_int_counter_vec!(
407 600 : "pageserver_page_cache_find_victim_calls",
408 600 : "Incremented at the end of each find_victim() call.\
409 600 : Filter by outcome to get e.g., eviction rate.",
410 600 : &["outcome"]
411 600 : )
412 600 : .unwrap()
413 600 : });
414 :
415 185181 : pub(crate) fn observe(outcome: Outcome) {
416 : macro_rules! dry {
417 : ($label:literal, $iters:expr) => {{
418 : static LABEL: &'static str = $label;
419 : static ITERS_TOTAL: Lazy<IntCounter> =
420 708 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
421 : static CALLS: Lazy<IntCounter> =
422 708 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
423 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
424 : CALLS.inc();
425 : }};
426 : }
427 185181 : match outcome {
428 9840 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
429 175341 : Outcome::FoundSlotEvicted { iters } => {
430 175341 : dry!("found_evicted", iters)
431 : }
432 0 : Outcome::ItersExceeded { iters } => {
433 0 : dry!("err_iters_exceeded", iters);
434 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
435 0 : }
436 : }
437 185181 : }
438 : }
439 :
440 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
441 0 : register_int_counter_vec!(
442 0 : "page_cache_errors_total",
443 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
444 0 : &["error_kind"]
445 0 : )
446 0 : .expect("failed to define a metric")
447 0 : });
448 :
449 : #[derive(IntoStaticStr)]
450 : #[strum(serialize_all = "kebab_case")]
451 : pub(crate) enum PageCacheErrorKind {
452 : AcquirePinnedSlotTimeout,
453 : EvictIterLimit,
454 : }
455 :
456 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
457 0 : PAGE_CACHE_ERRORS
458 0 : .get_metric_with_label_values(&[error_kind.into()])
459 0 : .unwrap()
460 0 : .inc();
461 0 : }
462 :
463 132 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
464 132 : register_histogram!(
465 132 : "pageserver_wait_lsn_seconds",
466 132 : "Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.",
467 132 : CRITICAL_OP_BUCKETS.into(),
468 132 : )
469 132 : .expect("failed to define a metric")
470 132 : });
471 :
472 1272 : pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy<IntCounterPairVec> = Lazy::new(|| {
473 1272 : register_int_counter_pair_vec!(
474 1272 : "pageserver_wait_lsn_started_count",
475 1272 : "Number of wait_lsn operations started.",
476 1272 : "pageserver_wait_lsn_finished_count",
477 1272 : "Number of wait_lsn operations finished.",
478 1272 : &["tenant_id", "shard_id", "timeline_id"],
479 1272 : )
480 1272 : .expect("failed to define a metric")
481 1272 : });
482 :
483 1272 : pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
484 1272 : register_int_counter_vec!(
485 1272 : "pageserver_wait_lsn_in_progress_micros",
486 1272 : "Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.",
487 1272 : &["tenant_id", "shard_id", "timeline_id"],
488 1272 : )
489 1272 : .expect("failed to define a metric")
490 1272 : });
491 :
492 1272 : pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::new(|| {
493 1272 : register_int_counter!(
494 1272 : "pageserver_wait_lsn_in_progress_micros_global",
495 1272 : "Time spent waiting for WAL to arrive, globally. Updated periodically while waiting."
496 1272 : )
497 1272 : .expect("failed to define a metric")
498 1272 : });
499 :
500 : pub(crate) mod wait_ondemand_download_time {
501 : use super::*;
502 : const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[
503 : 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, // 10 ms - 100ms
504 : 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, // 100ms to 1s
505 : 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, // 1s to 10s
506 : 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, // 10s to 1m
507 : ];
508 :
509 : /// The task kinds for which we want to track wait times for on-demand downloads.
510 : /// Other task kinds' wait times are accumulated in label value `unknown`.
511 : pub(crate) const WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS: [TaskKind; 2] = [
512 : TaskKind::PageRequestHandler,
513 : TaskKind::WalReceiverConnectionHandler,
514 : ];
515 :
516 0 : pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL: Lazy<Vec<Histogram>> = Lazy::new(|| {
517 0 : let histo = register_histogram_vec!(
518 0 : "pageserver_wait_ondemand_download_seconds_global",
519 0 : "Observations are individual tasks' wait times for on-demand downloads. \
520 0 : If N tasks coalesce on an on-demand download, and it takes 10s, than we observe N * 10s.",
521 0 : &["task_kind"],
522 0 : WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS.into(),
523 0 : )
524 0 : .expect("failed to define a metric");
525 0 : WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
526 0 : .iter()
527 0 : .map(|task_kind| histo.with_label_values(&[task_kind.into()]))
528 0 : .collect::<Vec<_>>()
529 0 : });
530 :
531 1272 : pub(crate) static WAIT_ONDEMAND_DOWNLOAD_TIME_SUM: Lazy<CounterVec> = Lazy::new(|| {
532 1272 : register_counter_vec!(
533 1272 : // use a name that _could_ be evolved into a per-timeline histogram later
534 1272 : "pageserver_wait_ondemand_download_seconds_sum",
535 1272 : "Like `pageserver_wait_ondemand_download_seconds_global` but per timeline",
536 1272 : &["tenant_id", "shard_id", "timeline_id", "task_kind"],
537 1272 : )
538 1272 : .unwrap()
539 1272 : });
540 :
541 : pub struct WaitOndemandDownloadTimeSum {
542 : counters: [Counter; WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS.len()],
543 : }
544 :
545 : impl WaitOndemandDownloadTimeSum {
546 2784 : pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
547 2784 : let counters = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
548 2784 : .iter()
549 5568 : .map(|task_kind| {
550 5568 : WAIT_ONDEMAND_DOWNLOAD_TIME_SUM
551 5568 : .get_metric_with_label_values(&[
552 5568 : tenant_id,
553 5568 : shard_id,
554 5568 : timeline_id,
555 5568 : task_kind.into(),
556 5568 : ])
557 5568 : .unwrap()
558 5568 : })
559 2784 : .collect::<Vec<_>>();
560 2784 : Self {
561 2784 : counters: counters.try_into().unwrap(),
562 2784 : }
563 2784 : }
564 144 : pub(crate) fn observe(&self, task_kind: TaskKind, duration: Duration) {
565 144 : let maybe = WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS
566 144 : .iter()
567 144 : .enumerate()
568 288 : .find(|(_, kind)| **kind == task_kind);
569 144 : let Some((idx, _)) = maybe else {
570 144 : return;
571 : };
572 0 : WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL[idx].observe(duration.as_secs_f64());
573 0 : let counter = &self.counters[idx];
574 0 : counter.inc_by(duration.as_secs_f64());
575 144 : }
576 : }
577 :
578 60 : pub(crate) fn shutdown_timeline(tenant_id: &str, shard_id: &str, timeline_id: &str) {
579 180 : for task_kind in WAIT_ONDEMAND_DOWNLOAD_METRIC_TASK_KINDS {
580 120 : let _ = WAIT_ONDEMAND_DOWNLOAD_TIME_SUM.remove_label_values(&[
581 120 : tenant_id,
582 120 : shard_id,
583 120 : timeline_id,
584 120 : task_kind.into(),
585 120 : ]);
586 120 : }
587 60 : }
588 :
589 0 : pub(crate) fn preinitialize_global_metrics() {
590 0 : Lazy::force(&WAIT_ONDEMAND_DOWNLOAD_TIME_GLOBAL);
591 0 : }
592 : }
593 :
594 1272 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
595 1272 : register_int_gauge_vec!(
596 1272 : "pageserver_last_record_lsn",
597 1272 : "Last record LSN grouped by timeline",
598 1272 : &["tenant_id", "shard_id", "timeline_id"]
599 1272 : )
600 1272 : .expect("failed to define a metric")
601 1272 : });
602 :
603 1272 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
604 1272 : register_int_gauge_vec!(
605 1272 : "pageserver_disk_consistent_lsn",
606 1272 : "Disk consistent LSN grouped by timeline",
607 1272 : &["tenant_id", "shard_id", "timeline_id"]
608 1272 : )
609 1272 : .expect("failed to define a metric")
610 1272 : });
611 :
612 1272 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
613 1272 : register_uint_gauge_vec!(
614 1272 : "pageserver_projected_remote_consistent_lsn",
615 1272 : "Projected remote consistent LSN grouped by timeline",
616 1272 : &["tenant_id", "shard_id", "timeline_id"]
617 1272 : )
618 1272 : .expect("failed to define a metric")
619 1272 : });
620 :
621 1272 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
622 1272 : register_uint_gauge_vec!(
623 1272 : "pageserver_pitr_history_size",
624 1272 : "Data written since PITR cutoff on this timeline",
625 1272 : &["tenant_id", "shard_id", "timeline_id"]
626 1272 : )
627 1272 : .expect("failed to define a metric")
628 1272 : });
629 :
630 : #[derive(
631 720 : strum_macros::EnumIter,
632 0 : strum_macros::EnumString,
633 : strum_macros::Display,
634 : strum_macros::IntoStaticStr,
635 : )]
636 : #[strum(serialize_all = "kebab_case")]
637 : pub(crate) enum LayerKind {
638 : Delta,
639 : Image,
640 : }
641 :
642 : #[derive(
643 300 : strum_macros::EnumIter,
644 0 : strum_macros::EnumString,
645 : strum_macros::Display,
646 : strum_macros::IntoStaticStr,
647 : )]
648 : #[strum(serialize_all = "kebab_case")]
649 : pub(crate) enum LayerLevel {
650 : // We don't track the currently open ephemeral layer, since there's always exactly 1 and its
651 : // size changes. See `TIMELINE_EPHEMERAL_BYTES`.
652 : Frozen,
653 : L0,
654 : L1,
655 : }
656 :
657 1248 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
658 1248 : register_uint_gauge_vec!(
659 1248 : "pageserver_layer_bytes",
660 1248 : "Sum of frozen, L0, and L1 layer physical sizes in bytes (excluding the open ephemeral layer)",
661 1248 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
662 1248 : )
663 1248 : .expect("failed to define a metric")
664 1248 : });
665 :
666 1248 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
667 1248 : register_uint_gauge_vec!(
668 1248 : "pageserver_layer_count",
669 1248 : "Number of frozen, L0, and L1 layers (excluding the open ephemeral layer)",
670 1248 : &["tenant_id", "shard_id", "timeline_id", "level", "kind"]
671 1248 : )
672 1248 : .expect("failed to define a metric")
673 1248 : });
674 :
675 1272 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
676 1272 : register_uint_gauge_vec!(
677 1272 : "pageserver_archive_size",
678 1272 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
679 1272 : &["tenant_id", "shard_id", "timeline_id"]
680 1272 : )
681 1272 : .expect("failed to define a metric")
682 1272 : });
683 :
684 1272 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
685 1272 : register_int_gauge_vec!(
686 1272 : "pageserver_standby_horizon",
687 1272 : "Standby apply LSN for which GC is hold off, by timeline.",
688 1272 : &["tenant_id", "shard_id", "timeline_id"]
689 1272 : )
690 1272 : .expect("failed to define a metric")
691 1272 : });
692 :
693 1272 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
694 1272 : register_uint_gauge_vec!(
695 1272 : "pageserver_resident_physical_size",
696 1272 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
697 1272 : &["tenant_id", "shard_id", "timeline_id"]
698 1272 : )
699 1272 : .expect("failed to define a metric")
700 1272 : });
701 :
702 1272 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
703 1272 : register_uint_gauge_vec!(
704 1272 : "pageserver_visible_physical_size",
705 1272 : "The size of the layer files present in the pageserver's filesystem.",
706 1272 : &["tenant_id", "shard_id", "timeline_id"]
707 1272 : )
708 1272 : .expect("failed to define a metric")
709 1272 : });
710 :
711 1248 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
712 1248 : register_uint_gauge!(
713 1248 : "pageserver_resident_physical_size_global",
714 1248 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
715 1248 : )
716 1248 : .expect("failed to define a metric")
717 1248 : });
718 :
719 1272 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
720 1272 : register_uint_gauge_vec!(
721 1272 : "pageserver_remote_physical_size",
722 1272 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
723 1272 : // Corollary: If any files are missing from the index part, they won't be included here.
724 1272 : &["tenant_id", "shard_id", "timeline_id"]
725 1272 : )
726 1272 : .expect("failed to define a metric")
727 1272 : });
728 :
729 1272 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
730 1272 : register_uint_gauge!(
731 1272 : "pageserver_remote_physical_size_global",
732 1272 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
733 1272 : )
734 1272 : .expect("failed to define a metric")
735 1272 : });
736 :
737 36 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
738 36 : register_int_counter!(
739 36 : "pageserver_remote_ondemand_downloaded_layers_total",
740 36 : "Total on-demand downloaded layers"
741 36 : )
742 36 : .unwrap()
743 36 : });
744 :
745 36 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
746 36 : register_int_counter!(
747 36 : "pageserver_remote_ondemand_downloaded_bytes_total",
748 36 : "Total bytes of layers on-demand downloaded",
749 36 : )
750 36 : .unwrap()
751 36 : });
752 :
753 1272 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
754 1272 : register_uint_gauge_vec!(
755 1272 : "pageserver_current_logical_size",
756 1272 : "Current logical size grouped by timeline",
757 1272 : &["tenant_id", "shard_id", "timeline_id"]
758 1272 : )
759 1272 : .expect("failed to define current logical size metric")
760 1272 : });
761 :
762 1272 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
763 1272 : register_int_gauge_vec!(
764 1272 : "pageserver_aux_file_estimated_size",
765 1272 : "The size of all aux files for a timeline in aux file v2 store.",
766 1272 : &["tenant_id", "shard_id", "timeline_id"]
767 1272 : )
768 1272 : .expect("failed to define a metric")
769 1272 : });
770 :
771 1272 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
772 1272 : register_uint_gauge_vec!(
773 1272 : "pageserver_valid_lsn_lease_count",
774 1272 : "The number of valid leases after refreshing gc info.",
775 1272 : &["tenant_id", "shard_id", "timeline_id"],
776 1272 : )
777 1272 : .expect("failed to define a metric")
778 1272 : });
779 :
780 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
781 0 : register_int_counter!(
782 0 : "pageserver_circuit_breaker_broken",
783 0 : "How many times a circuit breaker has broken"
784 0 : )
785 0 : .expect("failed to define a metric")
786 0 : });
787 :
788 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
789 0 : register_int_counter!(
790 0 : "pageserver_circuit_breaker_unbroken",
791 0 : "How many times a circuit breaker has been un-broken (recovered)"
792 0 : )
793 0 : .expect("failed to define a metric")
794 0 : });
795 :
796 1224 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
797 1224 : register_int_counter!(
798 1224 : "pageserver_compression_image_in_bytes_total",
799 1224 : "Size of data written into image layers before compression"
800 1224 : )
801 1224 : .expect("failed to define a metric")
802 1224 : });
803 :
804 1224 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
805 1224 : register_int_counter!(
806 1224 : "pageserver_compression_image_in_bytes_considered",
807 1224 : "Size of potentially compressible data written into image layers before compression"
808 1224 : )
809 1224 : .expect("failed to define a metric")
810 1224 : });
811 :
812 1224 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
813 1224 : register_int_counter!(
814 1224 : "pageserver_compression_image_in_bytes_chosen",
815 1224 : "Size of data whose compressed form was written into image layers"
816 1224 : )
817 1224 : .expect("failed to define a metric")
818 1224 : });
819 :
820 1224 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
821 1224 : register_int_counter!(
822 1224 : "pageserver_compression_image_out_bytes_total",
823 1224 : "Size of compressed image layer written"
824 1224 : )
825 1224 : .expect("failed to define a metric")
826 1224 : });
827 :
828 60 : pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
829 60 : register_uint_gauge!(
830 60 : "pageserver_relsize_cache_entries",
831 60 : "Number of entries in the relation size cache",
832 60 : )
833 60 : .expect("failed to define a metric")
834 60 : });
835 :
836 60 : pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
837 60 : register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
838 60 : .expect("failed to define a metric")
839 60 : });
840 :
841 60 : pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
842 60 : register_int_counter!(
843 60 : "pageserver_relsize_cache_misses",
844 60 : "Relation size cache misses",
845 60 : )
846 60 : .expect("failed to define a metric")
847 60 : });
848 :
849 24 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
850 24 : register_int_counter!(
851 24 : "pageserver_relsize_cache_misses_old",
852 24 : "Relation size cache misses where the lookup LSN is older than the last relation update"
853 24 : )
854 24 : .expect("failed to define a metric")
855 24 : });
856 :
857 : pub(crate) mod initial_logical_size {
858 : use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec};
859 : use once_cell::sync::Lazy;
860 :
861 : pub(crate) struct StartCalculation(IntCounterVec);
862 1272 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
863 1272 : StartCalculation(
864 1272 : register_int_counter_vec!(
865 1272 : "pageserver_initial_logical_size_start_calculation",
866 1272 : "Incremented each time we start an initial logical size calculation attempt. \
867 1272 : The `circumstances` label provides some additional details.",
868 1272 : &["attempt", "circumstances"]
869 1272 : )
870 1272 : .unwrap(),
871 1272 : )
872 1272 : });
873 :
874 : struct DropCalculation {
875 : first: IntCounter,
876 : retry: IntCounter,
877 : }
878 :
879 1272 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
880 1272 : let vec = register_int_counter_vec!(
881 1272 : "pageserver_initial_logical_size_drop_calculation",
882 1272 : "Incremented each time we abort a started size calculation attmpt.",
883 1272 : &["attempt"]
884 1272 : )
885 1272 : .unwrap();
886 1272 : DropCalculation {
887 1272 : first: vec.with_label_values(&["first"]),
888 1272 : retry: vec.with_label_values(&["retry"]),
889 1272 : }
890 1272 : });
891 :
892 : pub(crate) struct Calculated {
893 : pub(crate) births: IntCounter,
894 : pub(crate) deaths: IntCounter,
895 : }
896 :
897 1272 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
898 1272 : births: register_int_counter!(
899 1272 : "pageserver_initial_logical_size_finish_calculation",
900 1272 : "Incremented every time we finish calculation of initial logical size.\
901 1272 : If everything is working well, this should happen at most once per Timeline object."
902 1272 : )
903 1272 : .unwrap(),
904 1272 : deaths: register_int_counter!(
905 1272 : "pageserver_initial_logical_size_drop_finished_calculation",
906 1272 : "Incremented when we drop a finished initial logical size calculation result.\
907 1272 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
908 1272 : )
909 1272 : .unwrap(),
910 1272 : });
911 :
912 : pub(crate) struct OngoingCalculationGuard {
913 : inc_drop_calculation: Option<IntCounter>,
914 : }
915 :
916 : #[derive(strum_macros::IntoStaticStr)]
917 : pub(crate) enum StartCircumstances {
918 : EmptyInitial,
919 : SkippedConcurrencyLimiter,
920 : AfterBackgroundTasksRateLimit,
921 : }
922 :
923 : impl StartCalculation {
924 1344 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
925 1344 : let circumstances_label: &'static str = circumstances.into();
926 1344 : self.0
927 1344 : .with_label_values(&["first", circumstances_label])
928 1344 : .inc();
929 1344 : OngoingCalculationGuard {
930 1344 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
931 1344 : }
932 1344 : }
933 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
934 0 : let circumstances_label: &'static str = circumstances.into();
935 0 : self.0
936 0 : .with_label_values(&["retry", circumstances_label])
937 0 : .inc();
938 0 : OngoingCalculationGuard {
939 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
940 0 : }
941 0 : }
942 : }
943 :
944 : impl Drop for OngoingCalculationGuard {
945 1344 : fn drop(&mut self) {
946 1344 : if let Some(counter) = self.inc_drop_calculation.take() {
947 0 : counter.inc();
948 1344 : }
949 1344 : }
950 : }
951 :
952 : impl OngoingCalculationGuard {
953 1344 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
954 1344 : drop(self.inc_drop_calculation.take());
955 1344 : CALCULATED.births.inc();
956 1344 : FinishedCalculationGuard {
957 1344 : inc_on_drop: CALCULATED.deaths.clone(),
958 1344 : }
959 1344 : }
960 : }
961 :
962 : pub(crate) struct FinishedCalculationGuard {
963 : inc_on_drop: IntCounter,
964 : }
965 :
966 : impl Drop for FinishedCalculationGuard {
967 36 : fn drop(&mut self) {
968 36 : self.inc_on_drop.inc();
969 36 : }
970 : }
971 :
972 : // context: https://github.com/neondatabase/neon/issues/5963
973 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
974 0 : Lazy::new(|| {
975 0 : register_int_counter!(
976 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
977 0 : "Counter for the following event: walreceiver calls\
978 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
979 0 : )
980 0 : .unwrap()
981 0 : });
982 : }
983 :
984 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
985 0 : register_uint_gauge_vec!(
986 0 : "pageserver_directory_entries_count",
987 0 : "Sum of the entries in pageserver-stored directory listings",
988 0 : &["tenant_id", "shard_id", "timeline_id"]
989 0 : )
990 0 : .expect("failed to define a metric")
991 0 : });
992 :
993 1284 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
994 1284 : register_uint_gauge_vec!(
995 1284 : "pageserver_tenant_states_count",
996 1284 : "Count of tenants per state",
997 1284 : &["state"]
998 1284 : )
999 1284 : .expect("Failed to register pageserver_tenant_states_count metric")
1000 1284 : });
1001 :
1002 : /// A set of broken tenants.
1003 : ///
1004 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
1005 : /// tenant.
1006 60 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
1007 60 : register_uint_gauge_vec!(
1008 60 : "pageserver_broken_tenants_count",
1009 60 : "Set of broken tenants",
1010 60 : &["tenant_id", "shard_id"]
1011 60 : )
1012 60 : .expect("Failed to register pageserver_tenant_states_count metric")
1013 60 : });
1014 :
1015 36 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
1016 36 : register_uint_gauge_vec!(
1017 36 : "pageserver_tenant_synthetic_cached_size_bytes",
1018 36 : "Synthetic size of each tenant in bytes",
1019 36 : &["tenant_id"]
1020 36 : )
1021 36 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
1022 36 : });
1023 :
1024 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
1025 0 : register_histogram_vec!(
1026 0 : "pageserver_eviction_iteration_duration_seconds_global",
1027 0 : "Time spent on a single eviction iteration",
1028 0 : &["period_secs", "threshold_secs"],
1029 0 : STORAGE_OP_BUCKETS.into(),
1030 0 : )
1031 0 : .expect("failed to define a metric")
1032 0 : });
1033 :
1034 1272 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
1035 1272 : register_int_counter_vec!(
1036 1272 : "pageserver_evictions",
1037 1272 : "Number of layers evicted from the pageserver",
1038 1272 : &["tenant_id", "shard_id", "timeline_id"]
1039 1272 : )
1040 1272 : .expect("failed to define a metric")
1041 1272 : });
1042 :
1043 1272 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
1044 1272 : register_int_counter_vec!(
1045 1272 : "pageserver_evictions_with_low_residence_duration",
1046 1272 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
1047 1272 : Residence duration is determined using the `residence_duration_data_source`.",
1048 1272 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
1049 1272 : )
1050 1272 : .expect("failed to define a metric")
1051 1272 : });
1052 :
1053 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
1054 0 : register_int_counter!(
1055 0 : "pageserver_unexpected_ondemand_downloads_count",
1056 0 : "Number of unexpected on-demand downloads. \
1057 0 : We log more context for each increment, so, forgo any labels in this metric.",
1058 0 : )
1059 0 : .expect("failed to define a metric")
1060 0 : });
1061 :
1062 : /// How long did we take to start up? Broken down by labels to describe
1063 : /// different phases of startup.
1064 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
1065 0 : register_gauge_vec!(
1066 0 : "pageserver_startup_duration_seconds",
1067 0 : "Time taken by phases of pageserver startup, in seconds",
1068 0 : &["phase"]
1069 0 : )
1070 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
1071 0 : });
1072 :
1073 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
1074 0 : register_uint_gauge!(
1075 0 : "pageserver_startup_is_loading",
1076 0 : "1 while in initial startup load of tenants, 0 at other times"
1077 0 : )
1078 0 : .expect("Failed to register pageserver_startup_is_loading")
1079 0 : });
1080 :
1081 1248 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
1082 1248 : register_uint_gauge!(
1083 1248 : "pageserver_timeline_ephemeral_bytes",
1084 1248 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
1085 1248 : )
1086 1248 : .expect("Failed to register metric")
1087 1248 : });
1088 :
1089 : /// Metrics related to the lifecycle of a [`crate::tenant::TenantShard`] object: things
1090 : /// like how long it took to load.
1091 : ///
1092 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
1093 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
1094 : /// at a timeline level than tenant level.
1095 : pub(crate) struct TenantMetrics {
1096 : /// How long did tenants take to go from construction to active state?
1097 : pub(crate) activation: Histogram,
1098 : pub(crate) preload: Histogram,
1099 : pub(crate) attach: Histogram,
1100 :
1101 : /// How many tenants are included in the initial startup of the pagesrever?
1102 : pub(crate) startup_scheduled: IntCounter,
1103 : pub(crate) startup_complete: IntCounter,
1104 : }
1105 :
1106 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
1107 0 : TenantMetrics {
1108 0 : activation: register_histogram!(
1109 0 : "pageserver_tenant_activation_seconds",
1110 0 : "Time taken by tenants to activate, in seconds",
1111 0 : CRITICAL_OP_BUCKETS.into()
1112 0 : )
1113 0 : .expect("Failed to register metric"),
1114 0 : preload: register_histogram!(
1115 0 : "pageserver_tenant_preload_seconds",
1116 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
1117 0 : CRITICAL_OP_BUCKETS.into()
1118 0 : )
1119 0 : .expect("Failed to register metric"),
1120 0 : attach: register_histogram!(
1121 0 : "pageserver_tenant_attach_seconds",
1122 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
1123 0 : CRITICAL_OP_BUCKETS.into()
1124 0 : )
1125 0 : .expect("Failed to register metric"),
1126 0 : startup_scheduled: register_int_counter!(
1127 0 : "pageserver_tenant_startup_scheduled",
1128 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
1129 0 : ).expect("Failed to register metric"),
1130 0 : startup_complete: register_int_counter!(
1131 0 : "pageserver_tenant_startup_complete",
1132 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
1133 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
1134 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
1135 0 : ).expect("Failed to register metric"),
1136 0 : }
1137 0 : });
1138 :
1139 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
1140 : #[derive(Debug)]
1141 : pub(crate) struct EvictionsWithLowResidenceDuration {
1142 : data_source: &'static str,
1143 : threshold: Duration,
1144 : counter: Option<IntCounter>,
1145 : }
1146 :
1147 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
1148 : data_source: &'static str,
1149 : threshold: Duration,
1150 : }
1151 :
1152 : impl EvictionsWithLowResidenceDurationBuilder {
1153 2784 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
1154 2784 : Self {
1155 2784 : data_source,
1156 2784 : threshold,
1157 2784 : }
1158 2784 : }
1159 :
1160 2784 : fn build(
1161 2784 : &self,
1162 2784 : tenant_id: &str,
1163 2784 : shard_id: &str,
1164 2784 : timeline_id: &str,
1165 2784 : ) -> EvictionsWithLowResidenceDuration {
1166 2784 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
1167 2784 : .get_metric_with_label_values(&[
1168 2784 : tenant_id,
1169 2784 : shard_id,
1170 2784 : timeline_id,
1171 2784 : self.data_source,
1172 2784 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
1173 2784 : ])
1174 2784 : .unwrap();
1175 2784 : EvictionsWithLowResidenceDuration {
1176 2784 : data_source: self.data_source,
1177 2784 : threshold: self.threshold,
1178 2784 : counter: Some(counter),
1179 2784 : }
1180 2784 : }
1181 : }
1182 :
1183 : impl EvictionsWithLowResidenceDuration {
1184 2844 : fn threshold_label_value(threshold: Duration) -> String {
1185 2844 : format!("{}", threshold.as_secs())
1186 2844 : }
1187 :
1188 24 : pub fn observe(&self, observed_value: Duration) {
1189 24 : if observed_value < self.threshold {
1190 24 : self.counter
1191 24 : .as_ref()
1192 24 : .expect("nobody calls this function after `remove_from_vec`")
1193 24 : .inc();
1194 24 : }
1195 24 : }
1196 :
1197 0 : pub fn change_threshold(
1198 0 : &mut self,
1199 0 : tenant_id: &str,
1200 0 : shard_id: &str,
1201 0 : timeline_id: &str,
1202 0 : new_threshold: Duration,
1203 0 : ) {
1204 0 : if new_threshold == self.threshold {
1205 0 : return;
1206 0 : }
1207 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1208 0 : self.data_source,
1209 0 : new_threshold,
1210 0 : )
1211 0 : .build(tenant_id, shard_id, timeline_id);
1212 0 : std::mem::swap(self, &mut with_new);
1213 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1214 0 : }
1215 :
1216 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1217 60 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1218 60 : let Some(_counter) = self.counter.take() else {
1219 0 : return;
1220 : };
1221 :
1222 60 : let threshold = Self::threshold_label_value(self.threshold);
1223 60 :
1224 60 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1225 60 : tenant_id,
1226 60 : shard_id,
1227 60 : timeline_id,
1228 60 : self.data_source,
1229 60 : &threshold,
1230 60 : ]);
1231 60 :
1232 60 : match removed {
1233 0 : Err(e) => {
1234 0 : // this has been hit in staging as
1235 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1236 0 : // because we can be in the drop path already, don't risk:
1237 0 : // - "double-panic => illegal instruction" or
1238 0 : // - future "drop panick => abort"
1239 0 : //
1240 0 : // so just nag: (the error has the labels)
1241 0 : tracing::warn!(
1242 0 : "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"
1243 : );
1244 : }
1245 : Ok(()) => {
1246 : // to help identify cases where we double-remove the same values, let's log all
1247 : // deletions?
1248 60 : tracing::info!(
1249 0 : "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}",
1250 : self.data_source
1251 : );
1252 : }
1253 : }
1254 60 : }
1255 : }
1256 :
1257 : // Metrics collected on disk IO operations
1258 : //
1259 : // Roughly logarithmic scale.
1260 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1261 : 0.000030, // 30 usec
1262 : 0.001000, // 1000 usec
1263 : 0.030, // 30 ms
1264 : 1.000, // 1000 ms
1265 : 30.000, // 30000 ms
1266 : ];
1267 :
1268 : /// VirtualFile fs operation variants.
1269 : ///
1270 : /// Operations:
1271 : /// - open ([`std::fs::OpenOptions::open`])
1272 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1273 : /// - close-by-replace (close by replacement algorithm)
1274 : /// - read (`read_at`)
1275 : /// - write (`write_at`)
1276 : /// - seek (modify internal position or file length query)
1277 : /// - fsync ([`std::fs::File::sync_all`])
1278 : /// - metadata ([`std::fs::File::metadata`])
1279 : #[derive(
1280 0 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1281 : )]
1282 : pub(crate) enum StorageIoOperation {
1283 : Open,
1284 : OpenAfterReplace,
1285 : Close,
1286 : CloseByReplace,
1287 : Read,
1288 : Write,
1289 : Seek,
1290 : Fsync,
1291 : Metadata,
1292 : SetLen,
1293 : }
1294 :
1295 : impl StorageIoOperation {
1296 14520 : pub fn as_str(&self) -> &'static str {
1297 14520 : match self {
1298 1452 : StorageIoOperation::Open => "open",
1299 1452 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1300 1452 : StorageIoOperation::Close => "close",
1301 1452 : StorageIoOperation::CloseByReplace => "close-by-replace",
1302 1452 : StorageIoOperation::Read => "read",
1303 1452 : StorageIoOperation::Write => "write",
1304 1452 : StorageIoOperation::Seek => "seek",
1305 1452 : StorageIoOperation::Fsync => "fsync",
1306 1452 : StorageIoOperation::Metadata => "metadata",
1307 1452 : StorageIoOperation::SetLen => "set_len",
1308 : }
1309 14520 : }
1310 : }
1311 :
1312 : /// Tracks time taken by fs operations near VirtualFile.
1313 : #[derive(Debug)]
1314 : pub(crate) struct StorageIoTime {
1315 : metrics: [Histogram; StorageIoOperation::COUNT],
1316 : }
1317 :
1318 : impl StorageIoTime {
1319 1452 : fn new() -> Self {
1320 1452 : let storage_io_histogram_vec = register_histogram_vec!(
1321 1452 : "pageserver_io_operations_seconds",
1322 1452 : "Time spent in IO operations",
1323 1452 : &["operation"],
1324 1452 : STORAGE_IO_TIME_BUCKETS.into()
1325 1452 : )
1326 1452 : .expect("failed to define a metric");
1327 14520 : let metrics = std::array::from_fn(|i| {
1328 14520 : let op = StorageIoOperation::from_repr(i).unwrap();
1329 14520 : storage_io_histogram_vec
1330 14520 : .get_metric_with_label_values(&[op.as_str()])
1331 14520 : .unwrap()
1332 14520 : });
1333 1452 : Self { metrics }
1334 1452 : }
1335 :
1336 5872455 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1337 5872455 : &self.metrics[op as usize]
1338 5872455 : }
1339 : }
1340 :
1341 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1342 :
1343 : #[derive(Clone, Copy)]
1344 : #[repr(usize)]
1345 : pub(crate) enum StorageIoSizeOperation {
1346 : Read,
1347 : Write,
1348 : }
1349 :
1350 : impl StorageIoSizeOperation {
1351 : pub(crate) const VARIANTS: &'static [&'static str] = &["read", "write"];
1352 :
1353 9000 : fn as_str(&self) -> &'static str {
1354 9000 : Self::VARIANTS[*self as usize]
1355 9000 : }
1356 : }
1357 :
1358 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1359 1716 : pub(crate) static STORAGE_IO_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
1360 1716 : register_uint_gauge_vec!(
1361 1716 : "pageserver_io_operations_bytes_total",
1362 1716 : "Total amount of bytes read/written in IO operations",
1363 1716 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1364 1716 : )
1365 1716 : .expect("failed to define a metric")
1366 1716 : });
1367 :
1368 : #[derive(Clone, Debug)]
1369 : pub(crate) struct StorageIoSizeMetrics {
1370 : pub read: UIntGauge,
1371 : pub write: UIntGauge,
1372 : }
1373 :
1374 : impl StorageIoSizeMetrics {
1375 4500 : pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self {
1376 4500 : let read = STORAGE_IO_SIZE
1377 4500 : .get_metric_with_label_values(&[
1378 4500 : StorageIoSizeOperation::Read.as_str(),
1379 4500 : tenant_id,
1380 4500 : shard_id,
1381 4500 : timeline_id,
1382 4500 : ])
1383 4500 : .unwrap();
1384 4500 : let write = STORAGE_IO_SIZE
1385 4500 : .get_metric_with_label_values(&[
1386 4500 : StorageIoSizeOperation::Write.as_str(),
1387 4500 : tenant_id,
1388 4500 : shard_id,
1389 4500 : timeline_id,
1390 4500 : ])
1391 4500 : .unwrap();
1392 4500 : Self { read, write }
1393 4500 : }
1394 : }
1395 :
1396 : #[cfg(not(test))]
1397 : pub(crate) mod virtual_file_descriptor_cache {
1398 : use super::*;
1399 :
1400 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1401 0 : register_uint_gauge!(
1402 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1403 0 : "Maximum number of open file descriptors in the cache."
1404 0 : )
1405 0 : .unwrap()
1406 0 : });
1407 :
1408 : // SIZE_CURRENT: derive it like so:
1409 : // ```
1410 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1411 : // -ignoring(operation)
1412 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1413 : // ```
1414 : }
1415 :
1416 : #[cfg(not(test))]
1417 : pub(crate) mod virtual_file_io_engine {
1418 : use super::*;
1419 :
1420 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1421 0 : register_uint_gauge_vec!(
1422 0 : "pageserver_virtual_file_io_engine_kind",
1423 0 : "The configured io engine for VirtualFile",
1424 0 : &["kind"],
1425 0 : )
1426 0 : .unwrap()
1427 0 : });
1428 : }
1429 :
1430 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1431 : pub(crate) struct SmgrOpTimerInner {
1432 : global_execution_latency_histo: Histogram,
1433 : per_timeline_execution_latency_histo: Option<Histogram>,
1434 :
1435 : global_batch_wait_time: Histogram,
1436 : per_timeline_batch_wait_time: Histogram,
1437 :
1438 : global_flush_in_progress_micros: IntCounter,
1439 : per_timeline_flush_in_progress_micros: IntCounter,
1440 :
1441 : throttling: Arc<tenant_throttling::Pagestream>,
1442 :
1443 : timings: SmgrOpTimerState,
1444 : }
1445 :
1446 : /// The stages of request processing are represented by the enum variants.
1447 : /// Used as part of [`SmgrOpTimerInner::timings`].
1448 : ///
1449 : /// Request processing calls into the `SmgrOpTimer::observe_*` methods at the
1450 : /// transition points.
1451 : /// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]
1452 : /// to the next state.
1453 : ///
1454 : /// Each request goes through every stage, in all configurations.
1455 : ///
1456 : #[derive(Debug)]
1457 : enum SmgrOpTimerState {
1458 : Received {
1459 : // In the future, we may want to track the full time the request spent
1460 : // inside pageserver process (time spent in kernel buffers can't be tracked).
1461 : // `received_at` would be used for that.
1462 : #[allow(dead_code)]
1463 : received_at: Instant,
1464 : },
1465 : Throttling {
1466 : throttle_started_at: Instant,
1467 : },
1468 : Batching {
1469 : throttle_done_at: Instant,
1470 : },
1471 : Executing {
1472 : execution_started_at: Instant,
1473 : },
1474 : Flushing,
1475 : // NB: when adding observation points, remember to update the Drop impl.
1476 : }
1477 :
1478 : // NB: when adding observation points, remember to update the Drop impl.
1479 : impl SmgrOpTimer {
1480 : /// See [`SmgrOpTimerState`] for more context.
1481 0 : pub(crate) fn observe_throttle_start(&mut self, at: Instant) {
1482 0 : let Some(inner) = self.0.as_mut() else {
1483 0 : return;
1484 : };
1485 0 : let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {
1486 0 : return;
1487 : };
1488 0 : inner.throttling.count_accounted_start.inc();
1489 0 : inner.timings = SmgrOpTimerState::Throttling {
1490 0 : throttle_started_at: at,
1491 0 : };
1492 0 : }
1493 :
1494 : /// See [`SmgrOpTimerState`] for more context.
1495 0 : pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {
1496 0 : let Some(inner) = self.0.as_mut() else {
1497 0 : return;
1498 : };
1499 : let SmgrOpTimerState::Throttling {
1500 0 : throttle_started_at,
1501 0 : } = &inner.timings
1502 : else {
1503 0 : return;
1504 : };
1505 0 : inner.throttling.count_accounted_finish.inc();
1506 0 : match throttle {
1507 0 : ThrottleResult::NotThrottled { end } => {
1508 0 : inner.timings = SmgrOpTimerState::Batching {
1509 0 : throttle_done_at: end,
1510 0 : };
1511 0 : }
1512 0 : ThrottleResult::Throttled { end } => {
1513 0 : // update metrics
1514 0 : inner.throttling.count_throttled.inc();
1515 0 : inner
1516 0 : .throttling
1517 0 : .wait_time
1518 0 : .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());
1519 0 : // state transition
1520 0 : inner.timings = SmgrOpTimerState::Batching {
1521 0 : throttle_done_at: end,
1522 0 : };
1523 0 : }
1524 : }
1525 0 : }
1526 :
1527 : /// See [`SmgrOpTimerState`] for more context.
1528 0 : pub(crate) fn observe_execution_start(&mut self, at: Instant) {
1529 0 : let Some(inner) = self.0.as_mut() else {
1530 0 : return;
1531 : };
1532 0 : let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {
1533 0 : return;
1534 : };
1535 : // update metrics
1536 0 : let batch = at - *throttle_done_at;
1537 0 : inner.global_batch_wait_time.observe(batch.as_secs_f64());
1538 0 : inner
1539 0 : .per_timeline_batch_wait_time
1540 0 : .observe(batch.as_secs_f64());
1541 0 : // state transition
1542 0 : inner.timings = SmgrOpTimerState::Executing {
1543 0 : execution_started_at: at,
1544 0 : }
1545 0 : }
1546 :
1547 : /// For all but the first caller, this is a no-op.
1548 : /// The first callers receives Some, subsequent ones None.
1549 : ///
1550 : /// See [`SmgrOpTimerState`] for more context.
1551 0 : pub(crate) fn observe_execution_end(&mut self, at: Instant) -> Option<SmgrOpFlushInProgress> {
1552 : // NB: unlike the other observe_* methods, this one take()s.
1553 : #[allow(clippy::question_mark)] // maintain similar code pattern.
1554 0 : let Some(mut inner) = self.0.take() else {
1555 0 : return None;
1556 : };
1557 : let SmgrOpTimerState::Executing {
1558 0 : execution_started_at,
1559 0 : } = &inner.timings
1560 : else {
1561 0 : return None;
1562 : };
1563 : // update metrics
1564 0 : let execution = at - *execution_started_at;
1565 0 : inner
1566 0 : .global_execution_latency_histo
1567 0 : .observe(execution.as_secs_f64());
1568 0 : if let Some(per_timeline_execution_latency_histo) =
1569 0 : &inner.per_timeline_execution_latency_histo
1570 0 : {
1571 0 : per_timeline_execution_latency_histo.observe(execution.as_secs_f64());
1572 0 : }
1573 :
1574 : // state transition
1575 0 : inner.timings = SmgrOpTimerState::Flushing;
1576 0 :
1577 0 : // return the flush in progress object which
1578 0 : // will do the remaining metrics updates
1579 0 : let SmgrOpTimerInner {
1580 0 : global_flush_in_progress_micros,
1581 0 : per_timeline_flush_in_progress_micros,
1582 0 : ..
1583 0 : } = inner;
1584 0 : Some(SmgrOpFlushInProgress {
1585 0 : global_micros: global_flush_in_progress_micros,
1586 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1587 0 : })
1588 0 : }
1589 : }
1590 :
1591 : /// The last stage of request processing is serializing and flushing the request
1592 : /// into the TCP connection. We want to make slow flushes observable
1593 : /// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]
1594 : /// to periodically bump the metric.
1595 : ///
1596 : /// If in the future we decide that we're not interested in live updates, we can
1597 : /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
1598 : /// and remove this struct from the code base.
1599 : pub(crate) struct SmgrOpFlushInProgress {
1600 : global_micros: IntCounter,
1601 : per_timeline_micros: IntCounter,
1602 : }
1603 :
1604 : impl Drop for SmgrOpTimer {
1605 0 : fn drop(&mut self) {
1606 0 : // In case of early drop, update any of the remaining metrics with
1607 0 : // observations so that (started,finished) counter pairs balance out
1608 0 : // and all counters on the latency path have the the same number of
1609 0 : // observations.
1610 0 : // It's technically lying and it would be better if each metric had
1611 0 : // a separate label or similar for cancelled requests.
1612 0 : // But we don't have that right now and counter pairs balancing
1613 0 : // out is useful when using the metrics in panels and whatnot.
1614 0 : let now = Instant::now();
1615 0 : self.observe_throttle_start(now);
1616 0 : self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
1617 0 : self.observe_execution_start(now);
1618 0 : let maybe_flush_timer = self.observe_execution_end(now);
1619 0 : drop(maybe_flush_timer);
1620 0 : }
1621 : }
1622 :
1623 : impl SmgrOpFlushInProgress {
1624 : /// The caller must guarantee that `socket_fd`` outlives this function.
1625 0 : pub(crate) async fn measure<Fut, O>(
1626 0 : self,
1627 0 : started_at: Instant,
1628 0 : mut fut: Fut,
1629 0 : socket_fd: RawFd,
1630 0 : ) -> O
1631 0 : where
1632 0 : Fut: std::future::Future<Output = O>,
1633 0 : {
1634 0 : let mut fut = std::pin::pin!(fut);
1635 0 :
1636 0 : let mut logged = false;
1637 0 : let mut last_counter_increment_at = started_at;
1638 0 : let mut observe_guard = scopeguard::guard(
1639 0 : |is_timeout| {
1640 0 : let now = Instant::now();
1641 0 :
1642 0 : // Increment counter
1643 0 : {
1644 0 : let elapsed_since_last_observe = now - last_counter_increment_at;
1645 0 : self.global_micros
1646 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1647 0 : self.per_timeline_micros
1648 0 : .inc_by(u64::try_from(elapsed_since_last_observe.as_micros()).unwrap());
1649 0 : last_counter_increment_at = now;
1650 0 : }
1651 0 :
1652 0 : // Log something on every timeout, and on completion but only if we hit a timeout.
1653 0 : if is_timeout || logged {
1654 0 : logged = true;
1655 0 : let elapsed_total = now - started_at;
1656 0 : let msg = if is_timeout {
1657 0 : "slow flush ongoing"
1658 : } else {
1659 0 : "slow flush completed or cancelled"
1660 : };
1661 :
1662 0 : let (inq, outq) = {
1663 0 : // SAFETY: caller guarantees that `socket_fd` outlives this function.
1664 0 : #[cfg(target_os = "linux")]
1665 0 : unsafe {
1666 0 : (
1667 0 : utils::linux_socket_ioctl::inq(socket_fd).unwrap_or(-2),
1668 0 : utils::linux_socket_ioctl::outq(socket_fd).unwrap_or(-2),
1669 0 : )
1670 0 : }
1671 0 : #[cfg(not(target_os = "linux"))]
1672 0 : {
1673 0 : _ = socket_fd; // appease unused lint on macOS
1674 0 : (-1, -1)
1675 0 : }
1676 0 : };
1677 0 :
1678 0 : let elapsed_total_secs = format!("{:.6}", elapsed_total.as_secs_f64());
1679 0 : tracing::info!(elapsed_total_secs, inq, outq, msg);
1680 0 : }
1681 0 : },
1682 0 : |mut observe| {
1683 0 : observe(false);
1684 0 : },
1685 0 : );
1686 :
1687 : loop {
1688 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1689 0 : Ok(v) => return v,
1690 0 : Err(_timeout) => {
1691 0 : (*observe_guard)(true);
1692 0 : }
1693 : }
1694 : }
1695 0 : }
1696 : }
1697 :
1698 : #[derive(
1699 : Debug,
1700 : Clone,
1701 : Copy,
1702 : IntoStaticStr,
1703 : strum_macros::EnumCount,
1704 0 : strum_macros::EnumIter,
1705 : strum_macros::FromRepr,
1706 : enum_map::Enum,
1707 : )]
1708 : #[strum(serialize_all = "snake_case")]
1709 : pub enum SmgrQueryType {
1710 : GetRelExists,
1711 : GetRelSize,
1712 : GetPageAtLsn,
1713 : GetDbSize,
1714 : GetSlruSegment,
1715 : #[cfg(feature = "testing")]
1716 : Test,
1717 : }
1718 :
1719 : #[derive(
1720 : Debug,
1721 : Clone,
1722 : Copy,
1723 : IntoStaticStr,
1724 : strum_macros::EnumCount,
1725 540 : strum_macros::EnumIter,
1726 : strum_macros::FromRepr,
1727 : enum_map::Enum,
1728 : )]
1729 : #[strum(serialize_all = "snake_case")]
1730 : pub enum GetPageBatchBreakReason {
1731 : BatchFull,
1732 : NonBatchableRequest,
1733 : NonUniformLsn,
1734 : SamePageAtDifferentLsn,
1735 : NonUniformTimeline,
1736 : ExecutorSteal,
1737 : #[cfg(feature = "testing")]
1738 : NonUniformKey,
1739 : }
1740 :
1741 : pub(crate) struct SmgrQueryTimePerTimeline {
1742 : global_started: [IntCounter; SmgrQueryType::COUNT],
1743 : global_latency: [Histogram; SmgrQueryType::COUNT],
1744 : per_timeline_getpage_started: IntCounter,
1745 : per_timeline_getpage_latency: Histogram,
1746 : global_batch_size: Histogram,
1747 : per_timeline_batch_size: Histogram,
1748 : global_flush_in_progress_micros: IntCounter,
1749 : per_timeline_flush_in_progress_micros: IntCounter,
1750 : global_batch_wait_time: Histogram,
1751 : per_timeline_batch_wait_time: Histogram,
1752 : global_batch_break_reason: [IntCounter; GetPageBatchBreakReason::COUNT],
1753 : per_timeline_batch_break_reason: GetPageBatchBreakReasonTimelineMetrics,
1754 : throttling: Arc<tenant_throttling::Pagestream>,
1755 : }
1756 :
1757 1272 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1758 1272 : register_int_counter_vec!(
1759 1272 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1760 1272 : "pageserver_smgr_query_started_global_count",
1761 1272 : "Number of smgr queries started, aggregated by query type.",
1762 1272 : &["smgr_query_type"],
1763 1272 : )
1764 1272 : .expect("failed to define a metric")
1765 1272 : });
1766 :
1767 1272 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1768 1272 : register_int_counter_vec!(
1769 1272 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1770 1272 : "pageserver_smgr_query_started_count",
1771 1272 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1772 1272 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1773 1272 : )
1774 1272 : .expect("failed to define a metric")
1775 1272 : });
1776 :
1777 : // Alias so all histograms recording per-timeline smgr timings use the same buckets.
1778 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
1779 :
1780 1272 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1781 1272 : register_histogram_vec!(
1782 1272 : "pageserver_smgr_query_seconds",
1783 1272 : "Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
1784 1272 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1785 1272 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1786 1272 : )
1787 1272 : .expect("failed to define a metric")
1788 1272 : });
1789 :
1790 1272 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1791 1272 : [
1792 1272 : 1,
1793 1272 : 10,
1794 1272 : 20,
1795 1272 : 40,
1796 1272 : 60,
1797 1272 : 80,
1798 1272 : 100,
1799 1272 : 200,
1800 1272 : 300,
1801 1272 : 400,
1802 1272 : 500,
1803 1272 : 600,
1804 1272 : 700,
1805 1272 : 800,
1806 1272 : 900,
1807 1272 : 1_000, // 1ms
1808 1272 : 2_000,
1809 1272 : 4_000,
1810 1272 : 6_000,
1811 1272 : 8_000,
1812 1272 : 10_000, // 10ms
1813 1272 : 20_000,
1814 1272 : 40_000,
1815 1272 : 60_000,
1816 1272 : 80_000,
1817 1272 : 100_000,
1818 1272 : 200_000,
1819 1272 : 400_000,
1820 1272 : 600_000,
1821 1272 : 800_000,
1822 1272 : 1_000_000, // 1s
1823 1272 : 2_000_000,
1824 1272 : 4_000_000,
1825 1272 : 6_000_000,
1826 1272 : 8_000_000,
1827 1272 : 10_000_000, // 10s
1828 1272 : 20_000_000,
1829 1272 : 50_000_000,
1830 1272 : 100_000_000,
1831 1272 : 200_000_000,
1832 1272 : 1_000_000_000, // 1000s
1833 1272 : ]
1834 1272 : .into_iter()
1835 1272 : .map(Duration::from_micros)
1836 52152 : .map(|d| d.as_secs_f64())
1837 1272 : .collect()
1838 1272 : });
1839 :
1840 1272 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1841 1272 : register_histogram_vec!(
1842 1272 : "pageserver_smgr_query_seconds_global",
1843 1272 : "Like pageserver_smgr_query_seconds, but aggregated to instance level.",
1844 1272 : &["smgr_query_type"],
1845 1272 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1846 1272 : )
1847 1272 : .expect("failed to define a metric")
1848 1272 : });
1849 :
1850 1272 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1851 1272 : (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
1852 40704 : .map(|v| v.into())
1853 1272 : .collect()
1854 1272 : });
1855 :
1856 1272 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1857 1272 : register_histogram!(
1858 1272 : "pageserver_page_service_batch_size_global",
1859 1272 : "Batch size of pageserver page service requests",
1860 1272 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1861 1272 : )
1862 1272 : .expect("failed to define a metric")
1863 1272 : });
1864 :
1865 1272 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1866 1272 : let mut buckets = Vec::new();
1867 8904 : for i in 0.. {
1868 8904 : let bucket = 1 << i;
1869 8904 : if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
1870 1272 : break;
1871 7632 : }
1872 7632 : buckets.push(bucket.into());
1873 : }
1874 1272 : buckets
1875 1272 : });
1876 :
1877 1272 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1878 1272 : register_histogram_vec!(
1879 1272 : "pageserver_page_service_batch_size",
1880 1272 : "Batch size of pageserver page service requests",
1881 1272 : &["tenant_id", "shard_id", "timeline_id"],
1882 1272 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1883 1272 : )
1884 1272 : .expect("failed to define a metric")
1885 1272 : });
1886 :
1887 1272 : static PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1888 1272 : register_int_counter_vec!(
1889 1272 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1890 1272 : "pageserver_page_service_batch_break_reason_global",
1891 1272 : "Reason for breaking batches of get page requests",
1892 1272 : &["reason"],
1893 1272 : )
1894 1272 : .expect("failed to define a metric")
1895 1272 : });
1896 :
1897 : struct GetPageBatchBreakReasonTimelineMetrics {
1898 : map: EnumMap<GetPageBatchBreakReason, IntCounter>,
1899 : }
1900 :
1901 : impl GetPageBatchBreakReasonTimelineMetrics {
1902 2784 : fn new(tenant_id: &str, shard_slug: &str, timeline_id: &str) -> Self {
1903 2784 : GetPageBatchBreakReasonTimelineMetrics {
1904 19488 : map: EnumMap::from_array(std::array::from_fn(|reason_idx| {
1905 19488 : let reason = GetPageBatchBreakReason::from_usize(reason_idx);
1906 19488 : PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.with_label_values(&[
1907 19488 : tenant_id,
1908 19488 : shard_slug,
1909 19488 : timeline_id,
1910 19488 : reason.into(),
1911 19488 : ])
1912 19488 : })),
1913 2784 : }
1914 2784 : }
1915 :
1916 0 : fn inc(&self, reason: GetPageBatchBreakReason) {
1917 0 : self.map[reason].inc()
1918 0 : }
1919 : }
1920 :
1921 1272 : static PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1922 1272 : register_int_counter_vec!(
1923 1272 : "pageserver_page_service_batch_break_reason",
1924 1272 : "Reason for breaking batches of get page requests",
1925 1272 : &["tenant_id", "shard_id", "timeline_id", "reason"],
1926 1272 : )
1927 1272 : .expect("failed to define a metric")
1928 1272 : });
1929 :
1930 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1931 0 : register_int_gauge_vec!(
1932 0 : "pageserver_page_service_config_max_batch_size",
1933 0 : "Configured maximum batch size for the server-side batching functionality of page_service. \
1934 0 : Labels expose more of the configuration parameters.",
1935 0 : &["mode", "execution", "batching"]
1936 0 : )
1937 0 : .expect("failed to define a metric")
1938 0 : });
1939 :
1940 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
1941 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
1942 0 : let (label_values, value) = match conf {
1943 0 : PageServicePipeliningConfig::Serial => (["serial", "-", "-"], 1),
1944 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
1945 0 : max_batch_size,
1946 0 : execution,
1947 0 : batching,
1948 0 : }) => {
1949 0 : let mode = "pipelined";
1950 0 : let execution = match execution {
1951 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
1952 0 : "concurrent-futures"
1953 : }
1954 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
1955 : };
1956 0 : let batching = match batching {
1957 0 : PageServiceProtocolPipelinedBatchingStrategy::UniformLsn => "uniform-lsn",
1958 0 : PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn => "scattered-lsn",
1959 : };
1960 :
1961 0 : ([mode, execution, batching], max_batch_size.get())
1962 : }
1963 : };
1964 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
1965 0 : .with_label_values(&label_values)
1966 0 : .set(value.try_into().unwrap());
1967 0 : }
1968 :
1969 1272 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
1970 1272 : register_int_counter_vec!(
1971 1272 : "pageserver_page_service_pagestream_flush_in_progress_micros",
1972 1272 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
1973 1272 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
1974 1272 : easily discoverable in monitoring. \
1975 1272 : Hence, this is NOT a completion latency historgram.",
1976 1272 : &["tenant_id", "shard_id", "timeline_id"],
1977 1272 : )
1978 1272 : .expect("failed to define a metric")
1979 1272 : });
1980 :
1981 1272 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
1982 1272 : register_int_counter!(
1983 1272 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
1984 1272 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
1985 1272 : )
1986 1272 : .expect("failed to define a metric")
1987 1272 : });
1988 :
1989 1272 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1990 1272 : register_histogram_vec!(
1991 1272 : "pageserver_page_service_pagestream_batch_wait_time_seconds",
1992 1272 : "Time a request spent waiting in its batch until the batch moved to throttle&execution.",
1993 1272 : &["tenant_id", "shard_id", "timeline_id"],
1994 1272 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1995 1272 : )
1996 1272 : .expect("failed to define a metric")
1997 1272 : });
1998 :
1999 1272 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
2000 1272 : register_histogram!(
2001 1272 : "pageserver_page_service_pagestream_batch_wait_time_seconds_global",
2002 1272 : "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
2003 1272 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
2004 1272 : )
2005 1272 : .expect("failed to define a metric")
2006 1272 : });
2007 :
2008 : impl SmgrQueryTimePerTimeline {
2009 2784 : pub(crate) fn new(
2010 2784 : tenant_shard_id: &TenantShardId,
2011 2784 : timeline_id: &TimelineId,
2012 2784 : pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,
2013 2784 : ) -> Self {
2014 2784 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2015 2784 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
2016 2784 : let timeline_id = timeline_id.to_string();
2017 16704 : let global_started = std::array::from_fn(|i| {
2018 16704 : let op = SmgrQueryType::from_repr(i).unwrap();
2019 16704 : SMGR_QUERY_STARTED_GLOBAL
2020 16704 : .get_metric_with_label_values(&[op.into()])
2021 16704 : .unwrap()
2022 16704 : });
2023 16704 : let global_latency = std::array::from_fn(|i| {
2024 16704 : let op = SmgrQueryType::from_repr(i).unwrap();
2025 16704 : SMGR_QUERY_TIME_GLOBAL
2026 16704 : .get_metric_with_label_values(&[op.into()])
2027 16704 : .unwrap()
2028 16704 : });
2029 2784 :
2030 2784 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
2031 2784 : .get_metric_with_label_values(&[
2032 2784 : SmgrQueryType::GetPageAtLsn.into(),
2033 2784 : &tenant_id,
2034 2784 : &shard_slug,
2035 2784 : &timeline_id,
2036 2784 : ])
2037 2784 : .unwrap();
2038 2784 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
2039 2784 : .get_metric_with_label_values(&[
2040 2784 : SmgrQueryType::GetPageAtLsn.into(),
2041 2784 : &tenant_id,
2042 2784 : &shard_slug,
2043 2784 : &timeline_id,
2044 2784 : ])
2045 2784 : .unwrap();
2046 2784 :
2047 2784 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
2048 2784 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
2049 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2050 2784 : .unwrap();
2051 2784 :
2052 2784 : let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
2053 2784 : let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
2054 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2055 2784 : .unwrap();
2056 2784 :
2057 19488 : let global_batch_break_reason = std::array::from_fn(|i| {
2058 19488 : let reason = GetPageBatchBreakReason::from_usize(i);
2059 19488 : PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL
2060 19488 : .get_metric_with_label_values(&[reason.into()])
2061 19488 : .unwrap()
2062 19488 : });
2063 2784 : let per_timeline_batch_break_reason =
2064 2784 : GetPageBatchBreakReasonTimelineMetrics::new(&tenant_id, &shard_slug, &timeline_id);
2065 2784 :
2066 2784 : let global_flush_in_progress_micros =
2067 2784 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
2068 2784 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
2069 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
2070 2784 : .unwrap();
2071 2784 :
2072 2784 : Self {
2073 2784 : global_started,
2074 2784 : global_latency,
2075 2784 : per_timeline_getpage_latency,
2076 2784 : per_timeline_getpage_started,
2077 2784 : global_batch_size,
2078 2784 : per_timeline_batch_size,
2079 2784 : global_flush_in_progress_micros,
2080 2784 : per_timeline_flush_in_progress_micros,
2081 2784 : global_batch_wait_time,
2082 2784 : per_timeline_batch_wait_time,
2083 2784 : global_batch_break_reason,
2084 2784 : per_timeline_batch_break_reason,
2085 2784 : throttling: pagestream_throttle_metrics,
2086 2784 : }
2087 2784 : }
2088 0 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
2089 0 : self.global_started[op as usize].inc();
2090 :
2091 0 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
2092 0 : self.per_timeline_getpage_started.inc();
2093 0 : Some(self.per_timeline_getpage_latency.clone())
2094 : } else {
2095 0 : None
2096 : };
2097 :
2098 0 : SmgrOpTimer(Some(SmgrOpTimerInner {
2099 0 : global_execution_latency_histo: self.global_latency[op as usize].clone(),
2100 0 : per_timeline_execution_latency_histo: per_timeline_latency_histo,
2101 0 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
2102 0 : per_timeline_flush_in_progress_micros: self
2103 0 : .per_timeline_flush_in_progress_micros
2104 0 : .clone(),
2105 0 : global_batch_wait_time: self.global_batch_wait_time.clone(),
2106 0 : per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
2107 0 : throttling: self.throttling.clone(),
2108 0 : timings: SmgrOpTimerState::Received { received_at },
2109 0 : }))
2110 0 : }
2111 :
2112 : /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
2113 0 : pub(crate) fn observe_getpage_batch_start(
2114 0 : &self,
2115 0 : batch_size: usize,
2116 0 : break_reason: GetPageBatchBreakReason,
2117 0 : ) {
2118 0 : self.global_batch_size.observe(batch_size as f64);
2119 0 : self.per_timeline_batch_size.observe(batch_size as f64);
2120 0 :
2121 0 : self.global_batch_break_reason[break_reason.into_usize()].inc();
2122 0 : self.per_timeline_batch_break_reason.inc(break_reason);
2123 0 : }
2124 : }
2125 :
2126 : // keep in sync with control plane Go code so that we can validate
2127 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
2128 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
2129 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
2130 0 : [
2131 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
2132 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
2133 0 : ]
2134 0 : .map(|ms| (ms as f64) / 1000.0)
2135 0 : });
2136 :
2137 : pub(crate) struct BasebackupQueryTime {
2138 : ok: Histogram,
2139 : error: Histogram,
2140 : client_error: Histogram,
2141 : }
2142 :
2143 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
2144 0 : let vec = register_histogram_vec!(
2145 0 : "pageserver_basebackup_query_seconds",
2146 0 : "Histogram of basebackup queries durations, by result type",
2147 0 : &["result"],
2148 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
2149 0 : )
2150 0 : .expect("failed to define a metric");
2151 0 : BasebackupQueryTime {
2152 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
2153 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
2154 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
2155 0 : }
2156 0 : });
2157 :
2158 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
2159 : parent: &'a BasebackupQueryTime,
2160 : start: std::time::Instant,
2161 : }
2162 :
2163 : impl BasebackupQueryTime {
2164 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
2165 0 : let start = Instant::now();
2166 0 : BasebackupQueryTimeOngoingRecording {
2167 0 : parent: self,
2168 0 : start,
2169 0 : }
2170 0 : }
2171 : }
2172 :
2173 : impl BasebackupQueryTimeOngoingRecording<'_> {
2174 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
2175 0 : let elapsed = self.start.elapsed().as_secs_f64();
2176 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
2177 0 : let metric = match res {
2178 0 : Ok(_) => &self.parent.ok,
2179 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
2180 0 : if is_expected_io_error(io_error) =>
2181 0 : {
2182 0 : &self.parent.client_error
2183 : }
2184 0 : Err(_) => &self.parent.error,
2185 : };
2186 0 : metric.observe(elapsed);
2187 0 : }
2188 : }
2189 :
2190 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
2191 0 : register_int_counter_pair_vec!(
2192 0 : "pageserver_live_connections_started",
2193 0 : "Number of network connections that we started handling",
2194 0 : "pageserver_live_connections_finished",
2195 0 : "Number of network connections that we finished handling",
2196 0 : &["pageserver_connection_kind"]
2197 0 : )
2198 0 : .expect("failed to define a metric")
2199 0 : });
2200 :
2201 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
2202 : pub(crate) enum ComputeCommandKind {
2203 : PageStreamV3,
2204 : PageStreamV2,
2205 : Basebackup,
2206 : Fullbackup,
2207 : LeaseLsn,
2208 : }
2209 :
2210 : pub(crate) struct ComputeCommandCounters {
2211 : map: EnumMap<ComputeCommandKind, IntCounter>,
2212 : }
2213 :
2214 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
2215 0 : let inner = register_int_counter_vec!(
2216 0 : "pageserver_compute_commands",
2217 0 : "Number of compute -> pageserver commands processed",
2218 0 : &["command"]
2219 0 : )
2220 0 : .expect("failed to define a metric");
2221 0 :
2222 0 : ComputeCommandCounters {
2223 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
2224 0 : let command = ComputeCommandKind::from_usize(i);
2225 0 : let command_str: &'static str = command.into();
2226 0 : inner.with_label_values(&[command_str])
2227 0 : })),
2228 0 : }
2229 0 : });
2230 :
2231 : impl ComputeCommandCounters {
2232 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
2233 0 : &self.map[command]
2234 0 : }
2235 : }
2236 :
2237 : // remote storage metrics
2238 :
2239 1248 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
2240 1248 : register_int_counter_pair_vec!(
2241 1248 : "pageserver_remote_timeline_client_calls_started",
2242 1248 : "Number of started calls to remote timeline client.",
2243 1248 : "pageserver_remote_timeline_client_calls_finished",
2244 1248 : "Number of finshed calls to remote timeline client.",
2245 1248 : &[
2246 1248 : "tenant_id",
2247 1248 : "shard_id",
2248 1248 : "timeline_id",
2249 1248 : "file_kind",
2250 1248 : "op_kind"
2251 1248 : ],
2252 1248 : )
2253 1248 : .unwrap()
2254 1248 : });
2255 :
2256 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
2257 1236 : Lazy::new(|| {
2258 1236 : register_int_counter_vec!(
2259 1236 : "pageserver_remote_timeline_client_bytes_started",
2260 1236 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2261 1236 : The increment happens when the operation is scheduled.",
2262 1236 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2263 1236 : )
2264 1236 : .expect("failed to define a metric")
2265 1236 : });
2266 :
2267 1236 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
2268 1236 : register_int_counter_vec!(
2269 1236 : "pageserver_remote_timeline_client_bytes_finished",
2270 1236 : "Incremented by the number of bytes associated with a remote timeline client operation. \
2271 1236 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
2272 1236 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
2273 1236 : )
2274 1236 : .expect("failed to define a metric")
2275 1236 : });
2276 :
2277 : pub(crate) struct TenantManagerMetrics {
2278 : tenant_slots_attached: UIntGauge,
2279 : tenant_slots_secondary: UIntGauge,
2280 : tenant_slots_inprogress: UIntGauge,
2281 : pub(crate) tenant_slot_writes: IntCounter,
2282 : pub(crate) unexpected_errors: IntCounter,
2283 : }
2284 :
2285 : impl TenantManagerMetrics {
2286 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
2287 : /// exactly: they track the lifetime of the slots _in the tenant map_.
2288 12 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
2289 12 : match slot {
2290 0 : TenantSlot::Attached(_) => {
2291 0 : self.tenant_slots_attached.inc();
2292 0 : }
2293 0 : TenantSlot::Secondary(_) => {
2294 0 : self.tenant_slots_secondary.inc();
2295 0 : }
2296 12 : TenantSlot::InProgress(_) => {
2297 12 : self.tenant_slots_inprogress.inc();
2298 12 : }
2299 : }
2300 12 : }
2301 :
2302 12 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
2303 12 : match slot {
2304 12 : TenantSlot::Attached(_) => {
2305 12 : self.tenant_slots_attached.dec();
2306 12 : }
2307 0 : TenantSlot::Secondary(_) => {
2308 0 : self.tenant_slots_secondary.dec();
2309 0 : }
2310 0 : TenantSlot::InProgress(_) => {
2311 0 : self.tenant_slots_inprogress.dec();
2312 0 : }
2313 : }
2314 12 : }
2315 :
2316 : #[cfg(all(debug_assertions, not(test)))]
2317 0 : pub(crate) fn slots_total(&self) -> u64 {
2318 0 : self.tenant_slots_attached.get()
2319 0 : + self.tenant_slots_secondary.get()
2320 0 : + self.tenant_slots_inprogress.get()
2321 0 : }
2322 : }
2323 :
2324 12 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
2325 12 : let tenant_slots = register_uint_gauge_vec!(
2326 12 : "pageserver_tenant_manager_slots",
2327 12 : "How many slots currently exist, including all attached, secondary and in-progress operations",
2328 12 : &["mode"]
2329 12 : )
2330 12 : .expect("failed to define a metric");
2331 12 : TenantManagerMetrics {
2332 12 : tenant_slots_attached: tenant_slots
2333 12 : .get_metric_with_label_values(&["attached"])
2334 12 : .unwrap(),
2335 12 : tenant_slots_secondary: tenant_slots
2336 12 : .get_metric_with_label_values(&["secondary"])
2337 12 : .unwrap(),
2338 12 : tenant_slots_inprogress: tenant_slots
2339 12 : .get_metric_with_label_values(&["inprogress"])
2340 12 : .unwrap(),
2341 12 : tenant_slot_writes: register_int_counter!(
2342 12 : "pageserver_tenant_manager_slot_writes",
2343 12 : "Writes to a tenant slot, including all of create/attach/detach/delete"
2344 12 : )
2345 12 : .expect("failed to define a metric"),
2346 12 : unexpected_errors: register_int_counter!(
2347 12 : "pageserver_tenant_manager_unexpected_errors_total",
2348 12 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
2349 12 : )
2350 12 : .expect("failed to define a metric"),
2351 12 : }
2352 12 : });
2353 :
2354 : pub(crate) struct DeletionQueueMetrics {
2355 : pub(crate) keys_submitted: IntCounter,
2356 : pub(crate) keys_dropped: IntCounter,
2357 : pub(crate) keys_executed: IntCounter,
2358 : pub(crate) keys_validated: IntCounter,
2359 : pub(crate) dropped_lsn_updates: IntCounter,
2360 : pub(crate) unexpected_errors: IntCounter,
2361 : pub(crate) remote_errors: IntCounterVec,
2362 : }
2363 189 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
2364 189 : DeletionQueueMetrics{
2365 189 :
2366 189 : keys_submitted: register_int_counter!(
2367 189 : "pageserver_deletion_queue_submitted_total",
2368 189 : "Number of objects submitted for deletion"
2369 189 : )
2370 189 : .expect("failed to define a metric"),
2371 189 :
2372 189 : keys_dropped: register_int_counter!(
2373 189 : "pageserver_deletion_queue_dropped_total",
2374 189 : "Number of object deletions dropped due to stale generation."
2375 189 : )
2376 189 : .expect("failed to define a metric"),
2377 189 :
2378 189 : keys_executed: register_int_counter!(
2379 189 : "pageserver_deletion_queue_executed_total",
2380 189 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
2381 189 : )
2382 189 : .expect("failed to define a metric"),
2383 189 :
2384 189 : keys_validated: register_int_counter!(
2385 189 : "pageserver_deletion_queue_validated_total",
2386 189 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
2387 189 : )
2388 189 : .expect("failed to define a metric"),
2389 189 :
2390 189 : dropped_lsn_updates: register_int_counter!(
2391 189 : "pageserver_deletion_queue_dropped_lsn_updates_total",
2392 189 : "Updates to remote_consistent_lsn dropped due to stale generation number."
2393 189 : )
2394 189 : .expect("failed to define a metric"),
2395 189 : unexpected_errors: register_int_counter!(
2396 189 : "pageserver_deletion_queue_unexpected_errors_total",
2397 189 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
2398 189 : )
2399 189 : .expect("failed to define a metric"),
2400 189 : remote_errors: register_int_counter_vec!(
2401 189 : "pageserver_deletion_queue_remote_errors_total",
2402 189 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
2403 189 : &["op_kind"],
2404 189 : )
2405 189 : .expect("failed to define a metric")
2406 189 : }
2407 189 : });
2408 :
2409 : pub(crate) struct SecondaryModeMetrics {
2410 : pub(crate) upload_heatmap: IntCounter,
2411 : pub(crate) upload_heatmap_errors: IntCounter,
2412 : pub(crate) upload_heatmap_duration: Histogram,
2413 : pub(crate) download_heatmap: IntCounter,
2414 : pub(crate) download_layer: IntCounter,
2415 : }
2416 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2417 0 : SecondaryModeMetrics {
2418 0 : upload_heatmap: register_int_counter!(
2419 0 : "pageserver_secondary_upload_heatmap",
2420 0 : "Number of heatmaps written to remote storage by attached tenants"
2421 0 : )
2422 0 : .expect("failed to define a metric"),
2423 0 : upload_heatmap_errors: register_int_counter!(
2424 0 : "pageserver_secondary_upload_heatmap_errors",
2425 0 : "Failures writing heatmap to remote storage"
2426 0 : )
2427 0 : .expect("failed to define a metric"),
2428 0 : upload_heatmap_duration: register_histogram!(
2429 0 : "pageserver_secondary_upload_heatmap_duration",
2430 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2431 0 : )
2432 0 : .expect("failed to define a metric"),
2433 0 : download_heatmap: register_int_counter!(
2434 0 : "pageserver_secondary_download_heatmap",
2435 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2436 0 : )
2437 0 : .expect("failed to define a metric"),
2438 0 : download_layer: register_int_counter!(
2439 0 : "pageserver_secondary_download_layer",
2440 0 : "Number of downloads of layers by secondary mode locations"
2441 0 : )
2442 0 : .expect("failed to define a metric"),
2443 0 : }
2444 0 : });
2445 :
2446 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2447 0 : register_uint_gauge_vec!(
2448 0 : "pageserver_secondary_resident_physical_size",
2449 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2450 0 : &["tenant_id", "shard_id"]
2451 0 : )
2452 0 : .expect("failed to define a metric")
2453 0 : });
2454 :
2455 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2456 0 : register_uint_gauge!(
2457 0 : "pageserver_utilization_score",
2458 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2459 0 : )
2460 0 : .expect("failed to define a metric")
2461 0 : });
2462 :
2463 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2464 0 : register_uint_gauge_vec!(
2465 0 : "pageserver_secondary_heatmap_total_size",
2466 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2467 0 : &["tenant_id", "shard_id"]
2468 0 : )
2469 0 : .expect("failed to define a metric")
2470 0 : });
2471 :
2472 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2473 : pub enum RemoteOpKind {
2474 : Upload,
2475 : Download,
2476 : Delete,
2477 : }
2478 : impl RemoteOpKind {
2479 92495 : pub fn as_str(&self) -> &'static str {
2480 92495 : match self {
2481 87144 : Self::Upload => "upload",
2482 408 : Self::Download => "download",
2483 4943 : Self::Delete => "delete",
2484 : }
2485 92495 : }
2486 : }
2487 :
2488 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2489 : pub enum RemoteOpFileKind {
2490 : Layer,
2491 : Index,
2492 : }
2493 : impl RemoteOpFileKind {
2494 92495 : pub fn as_str(&self) -> &'static str {
2495 92495 : match self {
2496 65057 : Self::Layer => "layer",
2497 27438 : Self::Index => "index",
2498 : }
2499 92495 : }
2500 : }
2501 :
2502 1226 : pub(crate) static REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
2503 1226 : register_histogram_vec!(
2504 1226 : "pageserver_remote_timeline_client_seconds_global",
2505 1226 : "Time spent on remote timeline client operations. \
2506 1226 : Grouped by task_kind, file_kind, operation_kind and status. \
2507 1226 : The task_kind is \
2508 1226 : - for layer downloads, populated from RequestContext (primary objective of having the label) \
2509 1226 : - for index downloads, set to 'unknown' \
2510 1226 : - for any upload operation, set to 'RemoteUploadTask' \
2511 1226 : This keeps dimensionality at bay. \
2512 1226 : Does not account for time spent waiting in remote timeline client's queues.",
2513 1226 : &["task_kind", "file_kind", "op_kind", "status"]
2514 1226 : )
2515 1226 : .expect("failed to define a metric")
2516 1226 : });
2517 :
2518 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2519 0 : register_int_counter_vec!(
2520 0 : "pageserver_tenant_task_events",
2521 0 : "Number of task start/stop/fail events.",
2522 0 : &["event"],
2523 0 : )
2524 0 : .expect("Failed to register tenant_task_events metric")
2525 0 : });
2526 :
2527 : pub struct BackgroundLoopSemaphoreMetrics {
2528 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2529 : durations: EnumMap<BackgroundLoopKind, Histogram>,
2530 : waiting_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2531 : running_tasks: EnumMap<BackgroundLoopKind, IntGauge>,
2532 : }
2533 :
2534 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> =
2535 120 : Lazy::new(|| {
2536 120 : let counters = register_int_counter_pair_vec!(
2537 120 : "pageserver_background_loop_semaphore_wait_start_count",
2538 120 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2539 120 : "pageserver_background_loop_semaphore_wait_finish_count",
2540 120 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2541 120 : &["task"],
2542 120 : )
2543 120 : .unwrap();
2544 120 :
2545 120 : let durations = register_histogram_vec!(
2546 120 : "pageserver_background_loop_semaphore_wait_seconds",
2547 120 : "Seconds spent waiting on background loop semaphore acquisition",
2548 120 : &["task"],
2549 120 : vec![0.01, 1.0, 5.0, 10.0, 30.0, 60.0, 180.0, 300.0, 600.0],
2550 120 : )
2551 120 : .unwrap();
2552 120 :
2553 120 : let waiting_tasks = register_int_gauge_vec!(
2554 120 : "pageserver_background_loop_semaphore_waiting_tasks",
2555 120 : "Number of background loop tasks waiting for semaphore",
2556 120 : &["task"],
2557 120 : )
2558 120 : .unwrap();
2559 120 :
2560 120 : let running_tasks = register_int_gauge_vec!(
2561 120 : "pageserver_background_loop_semaphore_running_tasks",
2562 120 : "Number of background loop tasks running concurrently",
2563 120 : &["task"],
2564 120 : )
2565 120 : .unwrap();
2566 120 :
2567 120 : BackgroundLoopSemaphoreMetrics {
2568 1200 : counters: EnumMap::from_array(std::array::from_fn(|i| {
2569 1200 : let kind = BackgroundLoopKind::from_usize(i);
2570 1200 : counters.with_label_values(&[kind.into()])
2571 1200 : })),
2572 1200 : durations: EnumMap::from_array(std::array::from_fn(|i| {
2573 1200 : let kind = BackgroundLoopKind::from_usize(i);
2574 1200 : durations.with_label_values(&[kind.into()])
2575 1200 : })),
2576 1200 : waiting_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2577 1200 : let kind = BackgroundLoopKind::from_usize(i);
2578 1200 : waiting_tasks.with_label_values(&[kind.into()])
2579 1200 : })),
2580 1200 : running_tasks: EnumMap::from_array(std::array::from_fn(|i| {
2581 1200 : let kind = BackgroundLoopKind::from_usize(i);
2582 1200 : running_tasks.with_label_values(&[kind.into()])
2583 1200 : })),
2584 120 : }
2585 120 : });
2586 :
2587 : impl BackgroundLoopSemaphoreMetrics {
2588 : /// Starts recording semaphore metrics. Call `acquired()` on the returned recorder when the
2589 : /// semaphore is acquired, and drop it when the task completes or is cancelled.
2590 2184 : pub(crate) fn record(
2591 2184 : &self,
2592 2184 : task: BackgroundLoopKind,
2593 2184 : ) -> BackgroundLoopSemaphoreMetricsRecorder {
2594 2184 : BackgroundLoopSemaphoreMetricsRecorder::start(self, task)
2595 2184 : }
2596 : }
2597 :
2598 : /// Records metrics for a background task.
2599 : pub struct BackgroundLoopSemaphoreMetricsRecorder<'a> {
2600 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2601 : task: BackgroundLoopKind,
2602 : start: Instant,
2603 : wait_counter_guard: Option<metrics::IntCounterPairGuard>,
2604 : }
2605 :
2606 : impl<'a> BackgroundLoopSemaphoreMetricsRecorder<'a> {
2607 : /// Starts recording semaphore metrics, by recording wait time and incrementing
2608 : /// `wait_start_count` and `waiting_tasks`.
2609 2184 : fn start(metrics: &'a BackgroundLoopSemaphoreMetrics, task: BackgroundLoopKind) -> Self {
2610 2184 : metrics.waiting_tasks[task].inc();
2611 2184 : Self {
2612 2184 : metrics,
2613 2184 : task,
2614 2184 : start: Instant::now(),
2615 2184 : wait_counter_guard: Some(metrics.counters[task].guard()),
2616 2184 : }
2617 2184 : }
2618 :
2619 : /// Signals that the semaphore has been acquired, and updates relevant metrics.
2620 2184 : pub fn acquired(&mut self) -> Duration {
2621 2184 : let waited = self.start.elapsed();
2622 2184 : self.wait_counter_guard.take().expect("already acquired");
2623 2184 : self.metrics.durations[self.task].observe(waited.as_secs_f64());
2624 2184 : self.metrics.waiting_tasks[self.task].dec();
2625 2184 : self.metrics.running_tasks[self.task].inc();
2626 2184 : waited
2627 2184 : }
2628 : }
2629 :
2630 : impl Drop for BackgroundLoopSemaphoreMetricsRecorder<'_> {
2631 : /// The task either completed or was cancelled.
2632 2184 : fn drop(&mut self) {
2633 2184 : if self.wait_counter_guard.take().is_some() {
2634 0 : // Waiting.
2635 0 : self.metrics.durations[self.task].observe(self.start.elapsed().as_secs_f64());
2636 0 : self.metrics.waiting_tasks[self.task].dec();
2637 2184 : } else {
2638 2184 : // Running.
2639 2184 : self.metrics.running_tasks[self.task].dec();
2640 2184 : }
2641 2184 : }
2642 : }
2643 :
2644 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2645 0 : register_int_counter_vec!(
2646 0 : "pageserver_background_loop_period_overrun_count",
2647 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2648 0 : &["task", "period"],
2649 0 : )
2650 0 : .expect("failed to define a metric")
2651 0 : });
2652 :
2653 : // walreceiver metrics
2654 :
2655 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2656 0 : register_int_counter!(
2657 0 : "pageserver_walreceiver_started_connections_total",
2658 0 : "Number of started walreceiver connections"
2659 0 : )
2660 0 : .expect("failed to define a metric")
2661 0 : });
2662 :
2663 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2664 0 : register_int_gauge!(
2665 0 : "pageserver_walreceiver_active_managers",
2666 0 : "Number of active walreceiver managers"
2667 0 : )
2668 0 : .expect("failed to define a metric")
2669 0 : });
2670 :
2671 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2672 0 : register_int_counter_vec!(
2673 0 : "pageserver_walreceiver_switches_total",
2674 0 : "Number of walreceiver manager change_connection calls",
2675 0 : &["reason"]
2676 0 : )
2677 0 : .expect("failed to define a metric")
2678 0 : });
2679 :
2680 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2681 0 : register_int_counter!(
2682 0 : "pageserver_walreceiver_broker_updates_total",
2683 0 : "Number of received broker updates in walreceiver"
2684 0 : )
2685 0 : .expect("failed to define a metric")
2686 0 : });
2687 :
2688 12 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2689 12 : register_int_counter_vec!(
2690 12 : "pageserver_walreceiver_candidates_events_total",
2691 12 : "Number of walreceiver candidate events",
2692 12 : &["event"]
2693 12 : )
2694 12 : .expect("failed to define a metric")
2695 12 : });
2696 :
2697 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2698 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2699 :
2700 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2701 12 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2702 :
2703 : // Metrics collected on WAL redo operations
2704 : //
2705 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2706 : // for access to the postgres process ('wait') since there is only one for
2707 : // each tenant.
2708 :
2709 : /// Time buckets are small because we want to be able to measure the
2710 : /// smallest redo processing times. These buckets allow us to measure down
2711 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2712 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2713 : ///
2714 : /// Values up to 1s are recorded because metrics show that we have redo
2715 : /// durations and lock times larger than 0.250s.
2716 : macro_rules! redo_histogram_time_buckets {
2717 : () => {
2718 : vec![
2719 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2720 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2721 : 1.000_000,
2722 : ]
2723 : };
2724 : }
2725 :
2726 : /// While we're at it, also measure the amount of records replayed in each
2727 : /// operation. We have a global 'total replayed' counter, but that's not
2728 : /// as useful as 'what is the skew for how many records we replay in one
2729 : /// operation'.
2730 : macro_rules! redo_histogram_count_buckets {
2731 : () => {
2732 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2733 : };
2734 : }
2735 :
2736 : macro_rules! redo_bytes_histogram_count_buckets {
2737 : () => {
2738 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2739 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2740 : vec![
2741 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2742 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2743 : ]
2744 : };
2745 : }
2746 :
2747 : pub(crate) struct WalIngestMetrics {
2748 : pub(crate) bytes_received: IntCounter,
2749 : pub(crate) records_received: IntCounter,
2750 : pub(crate) records_observed: IntCounter,
2751 : pub(crate) records_committed: IntCounter,
2752 : pub(crate) records_filtered: IntCounter,
2753 : pub(crate) values_committed_metadata_images: IntCounter,
2754 : pub(crate) values_committed_metadata_deltas: IntCounter,
2755 : pub(crate) values_committed_data_images: IntCounter,
2756 : pub(crate) values_committed_data_deltas: IntCounter,
2757 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2758 : }
2759 :
2760 : impl WalIngestMetrics {
2761 0 : pub(crate) fn inc_values_committed(&self, stats: &DatadirModificationStats) {
2762 0 : if stats.metadata_images > 0 {
2763 0 : self.values_committed_metadata_images
2764 0 : .inc_by(stats.metadata_images);
2765 0 : }
2766 0 : if stats.metadata_deltas > 0 {
2767 0 : self.values_committed_metadata_deltas
2768 0 : .inc_by(stats.metadata_deltas);
2769 0 : }
2770 0 : if stats.data_images > 0 {
2771 0 : self.values_committed_data_images.inc_by(stats.data_images);
2772 0 : }
2773 0 : if stats.data_deltas > 0 {
2774 0 : self.values_committed_data_deltas.inc_by(stats.data_deltas);
2775 0 : }
2776 0 : }
2777 : }
2778 :
2779 60 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
2780 60 : let values_committed = register_int_counter_vec!(
2781 60 : "pageserver_wal_ingest_values_committed",
2782 60 : "Number of values committed to pageserver storage from WAL records",
2783 60 : &["class", "kind"],
2784 60 : )
2785 60 : .expect("failed to define a metric");
2786 60 :
2787 60 : WalIngestMetrics {
2788 60 : bytes_received: register_int_counter!(
2789 60 : "pageserver_wal_ingest_bytes_received",
2790 60 : "Bytes of WAL ingested from safekeepers",
2791 60 : )
2792 60 : .unwrap(),
2793 60 : records_received: register_int_counter!(
2794 60 : "pageserver_wal_ingest_records_received",
2795 60 : "Number of WAL records received from safekeepers"
2796 60 : )
2797 60 : .expect("failed to define a metric"),
2798 60 : records_observed: register_int_counter!(
2799 60 : "pageserver_wal_ingest_records_observed",
2800 60 : "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
2801 60 : )
2802 60 : .expect("failed to define a metric"),
2803 60 : records_committed: register_int_counter!(
2804 60 : "pageserver_wal_ingest_records_committed",
2805 60 : "Number of WAL records which resulted in writes to pageserver storage"
2806 60 : )
2807 60 : .expect("failed to define a metric"),
2808 60 : records_filtered: register_int_counter!(
2809 60 : "pageserver_wal_ingest_records_filtered",
2810 60 : "Number of WAL records filtered out due to sharding"
2811 60 : )
2812 60 : .expect("failed to define a metric"),
2813 60 : values_committed_metadata_images: values_committed.with_label_values(&["metadata", "image"]),
2814 60 : values_committed_metadata_deltas: values_committed.with_label_values(&["metadata", "delta"]),
2815 60 : values_committed_data_images: values_committed.with_label_values(&["data", "image"]),
2816 60 : values_committed_data_deltas: values_committed.with_label_values(&["data", "delta"]),
2817 60 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2818 60 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2819 60 : "Total number of zero gap blocks written on relation extends"
2820 60 : )
2821 60 : .expect("failed to define a metric"),
2822 60 : }
2823 60 : });
2824 :
2825 1272 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2826 1272 : register_int_counter_vec!(
2827 1272 : "pageserver_timeline_wal_records_received",
2828 1272 : "Number of WAL records received per shard",
2829 1272 : &["tenant_id", "shard_id", "timeline_id"]
2830 1272 : )
2831 1272 : .expect("failed to define a metric")
2832 1272 : });
2833 :
2834 36 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2835 36 : register_histogram!(
2836 36 : "pageserver_wal_redo_seconds",
2837 36 : "Time spent on WAL redo",
2838 36 : redo_histogram_time_buckets!()
2839 36 : )
2840 36 : .expect("failed to define a metric")
2841 36 : });
2842 :
2843 36 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2844 36 : register_histogram!(
2845 36 : "pageserver_wal_redo_records_histogram",
2846 36 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2847 36 : redo_histogram_count_buckets!(),
2848 36 : )
2849 36 : .expect("failed to define a metric")
2850 36 : });
2851 :
2852 36 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2853 36 : register_histogram!(
2854 36 : "pageserver_wal_redo_bytes_histogram",
2855 36 : "Histogram of number of records replayed per redo sent to Postgres",
2856 36 : redo_bytes_histogram_count_buckets!(),
2857 36 : )
2858 36 : .expect("failed to define a metric")
2859 36 : });
2860 :
2861 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2862 36 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2863 36 : register_int_counter!(
2864 36 : "pageserver_replayed_wal_records_total",
2865 36 : "Number of WAL records replayed in WAL redo process"
2866 36 : )
2867 36 : .unwrap()
2868 36 : });
2869 :
2870 : #[rustfmt::skip]
2871 48 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2872 48 : register_histogram!(
2873 48 : "pageserver_wal_redo_process_launch_duration",
2874 48 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2875 48 : vec![
2876 48 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2877 48 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2878 48 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2879 48 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2880 48 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2881 48 : ],
2882 48 : )
2883 48 : .expect("failed to define a metric")
2884 48 : });
2885 :
2886 : pub(crate) struct WalRedoProcessCounters {
2887 : pub(crate) started: IntCounter,
2888 : pub(crate) killed_by_cause: EnumMap<WalRedoKillCause, IntCounter>,
2889 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2890 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2891 : }
2892 :
2893 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2894 : pub(crate) enum WalRedoKillCause {
2895 : WalRedoProcessDrop,
2896 : NoLeakChildDrop,
2897 : Startup,
2898 : }
2899 :
2900 : impl Default for WalRedoProcessCounters {
2901 48 : fn default() -> Self {
2902 48 : let started = register_int_counter!(
2903 48 : "pageserver_wal_redo_process_started_total",
2904 48 : "Number of WAL redo processes started",
2905 48 : )
2906 48 : .unwrap();
2907 48 :
2908 48 : let killed = register_int_counter_vec!(
2909 48 : "pageserver_wal_redo_process_stopped_total",
2910 48 : "Number of WAL redo processes stopped",
2911 48 : &["cause"],
2912 48 : )
2913 48 : .unwrap();
2914 48 :
2915 48 : let active_stderr_logger_tasks_started = register_int_counter!(
2916 48 : "pageserver_walredo_stderr_logger_tasks_started_total",
2917 48 : "Number of active walredo stderr logger tasks that have started",
2918 48 : )
2919 48 : .unwrap();
2920 48 :
2921 48 : let active_stderr_logger_tasks_finished = register_int_counter!(
2922 48 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2923 48 : "Number of active walredo stderr logger tasks that have finished",
2924 48 : )
2925 48 : .unwrap();
2926 48 :
2927 48 : Self {
2928 48 : started,
2929 144 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2930 144 : let cause = WalRedoKillCause::from_usize(i);
2931 144 : let cause_str: &'static str = cause.into();
2932 144 : killed.with_label_values(&[cause_str])
2933 144 : })),
2934 48 : active_stderr_logger_tasks_started,
2935 48 : active_stderr_logger_tasks_finished,
2936 48 : }
2937 48 : }
2938 : }
2939 :
2940 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2941 : Lazy::new(WalRedoProcessCounters::default);
2942 :
2943 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2944 : pub(crate) struct StorageTimeMetricsTimer {
2945 : metrics: StorageTimeMetrics,
2946 : start: Instant,
2947 : }
2948 :
2949 : impl StorageTimeMetricsTimer {
2950 13032 : fn new(metrics: StorageTimeMetrics) -> Self {
2951 13032 : Self {
2952 13032 : metrics,
2953 13032 : start: Instant::now(),
2954 13032 : }
2955 13032 : }
2956 :
2957 : /// Returns the elapsed duration of the timer.
2958 13032 : pub fn elapsed(&self) -> Duration {
2959 13032 : self.start.elapsed()
2960 13032 : }
2961 :
2962 : /// Record the time from creation to now and return it.
2963 13032 : pub fn stop_and_record(self) -> Duration {
2964 13032 : let duration = self.elapsed();
2965 13032 : let seconds = duration.as_secs_f64();
2966 13032 : self.metrics.timeline_sum.inc_by(seconds);
2967 13032 : self.metrics.timeline_count.inc();
2968 13032 : self.metrics.global_histogram.observe(seconds);
2969 13032 : duration
2970 13032 : }
2971 :
2972 : /// Turns this timer into a timer, which will always record -- usually this means recording
2973 : /// regardless an early `?` path was taken in a function.
2974 120 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2975 120 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2976 120 : }
2977 : }
2978 :
2979 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2980 :
2981 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2982 120 : fn drop(&mut self) {
2983 120 : if let Some(inner) = self.0.take() {
2984 120 : inner.stop_and_record();
2985 120 : }
2986 120 : }
2987 : }
2988 :
2989 : impl AlwaysRecordingStorageTimeMetricsTimer {
2990 : /// Returns the elapsed duration of the timer.
2991 0 : pub fn elapsed(&self) -> Duration {
2992 0 : self.0.as_ref().expect("not dropped yet").elapsed()
2993 0 : }
2994 : }
2995 :
2996 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2997 : /// timeline total sum and count.
2998 : #[derive(Clone, Debug)]
2999 : pub(crate) struct StorageTimeMetrics {
3000 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
3001 : timeline_sum: Counter,
3002 : /// Number of oeprations, per operation, tenant_id and timeline_id
3003 : timeline_count: IntCounter,
3004 : /// Global histogram having only the "operation" label.
3005 : global_histogram: Histogram,
3006 : }
3007 :
3008 : impl StorageTimeMetrics {
3009 25056 : pub fn new(
3010 25056 : operation: StorageTimeOperation,
3011 25056 : tenant_id: &str,
3012 25056 : shard_id: &str,
3013 25056 : timeline_id: &str,
3014 25056 : ) -> Self {
3015 25056 : let operation: &'static str = operation.into();
3016 25056 :
3017 25056 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
3018 25056 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
3019 25056 : .unwrap();
3020 25056 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
3021 25056 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
3022 25056 : .unwrap();
3023 25056 : let global_histogram = STORAGE_TIME_GLOBAL
3024 25056 : .get_metric_with_label_values(&[operation])
3025 25056 : .unwrap();
3026 25056 :
3027 25056 : StorageTimeMetrics {
3028 25056 : timeline_sum,
3029 25056 : timeline_count,
3030 25056 : global_histogram,
3031 25056 : }
3032 25056 : }
3033 :
3034 : /// Starts timing a new operation.
3035 : ///
3036 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
3037 13032 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
3038 13032 : StorageTimeMetricsTimer::new(self.clone())
3039 13032 : }
3040 : }
3041 :
3042 : pub(crate) struct TimelineMetrics {
3043 : tenant_id: String,
3044 : shard_id: String,
3045 : timeline_id: String,
3046 : pub flush_time_histo: StorageTimeMetrics,
3047 : pub flush_delay_histo: StorageTimeMetrics,
3048 : pub compact_time_histo: StorageTimeMetrics,
3049 : pub create_images_time_histo: StorageTimeMetrics,
3050 : pub logical_size_histo: StorageTimeMetrics,
3051 : pub imitate_logical_size_histo: StorageTimeMetrics,
3052 : pub load_layer_map_histo: StorageTimeMetrics,
3053 : pub garbage_collect_histo: StorageTimeMetrics,
3054 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
3055 : pub last_record_lsn_gauge: IntGauge,
3056 : pub disk_consistent_lsn_gauge: IntGauge,
3057 : pub pitr_history_size: UIntGauge,
3058 : pub archival_size: UIntGauge,
3059 : pub layers_per_read: Histogram,
3060 : pub standby_horizon_gauge: IntGauge,
3061 : pub resident_physical_size_gauge: UIntGauge,
3062 : pub visible_physical_size_gauge: UIntGauge,
3063 : /// copy of LayeredTimeline.current_logical_size
3064 : pub current_logical_size_gauge: UIntGauge,
3065 : pub aux_file_size_gauge: IntGauge,
3066 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
3067 : pub evictions: IntCounter,
3068 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
3069 : /// Number of valid LSN leases.
3070 : pub valid_lsn_lease_count_gauge: UIntGauge,
3071 : pub wal_records_received: IntCounter,
3072 : pub storage_io_size: StorageIoSizeMetrics,
3073 : pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter,
3074 : pub wait_lsn_start_finish_counterpair: IntCounterPair,
3075 : pub wait_ondemand_download_time: wait_ondemand_download_time::WaitOndemandDownloadTimeSum,
3076 : shutdown: std::sync::atomic::AtomicBool,
3077 : }
3078 :
3079 : impl TimelineMetrics {
3080 2784 : pub fn new(
3081 2784 : tenant_shard_id: &TenantShardId,
3082 2784 : timeline_id_raw: &TimelineId,
3083 2784 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
3084 2784 : ) -> Self {
3085 2784 : let tenant_id = tenant_shard_id.tenant_id.to_string();
3086 2784 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
3087 2784 : let timeline_id = timeline_id_raw.to_string();
3088 2784 : let flush_time_histo = StorageTimeMetrics::new(
3089 2784 : StorageTimeOperation::LayerFlush,
3090 2784 : &tenant_id,
3091 2784 : &shard_id,
3092 2784 : &timeline_id,
3093 2784 : );
3094 2784 : let flush_delay_histo = StorageTimeMetrics::new(
3095 2784 : StorageTimeOperation::LayerFlushDelay,
3096 2784 : &tenant_id,
3097 2784 : &shard_id,
3098 2784 : &timeline_id,
3099 2784 : );
3100 2784 : let compact_time_histo = StorageTimeMetrics::new(
3101 2784 : StorageTimeOperation::Compact,
3102 2784 : &tenant_id,
3103 2784 : &shard_id,
3104 2784 : &timeline_id,
3105 2784 : );
3106 2784 : let create_images_time_histo = StorageTimeMetrics::new(
3107 2784 : StorageTimeOperation::CreateImages,
3108 2784 : &tenant_id,
3109 2784 : &shard_id,
3110 2784 : &timeline_id,
3111 2784 : );
3112 2784 : let logical_size_histo = StorageTimeMetrics::new(
3113 2784 : StorageTimeOperation::LogicalSize,
3114 2784 : &tenant_id,
3115 2784 : &shard_id,
3116 2784 : &timeline_id,
3117 2784 : );
3118 2784 : let imitate_logical_size_histo = StorageTimeMetrics::new(
3119 2784 : StorageTimeOperation::ImitateLogicalSize,
3120 2784 : &tenant_id,
3121 2784 : &shard_id,
3122 2784 : &timeline_id,
3123 2784 : );
3124 2784 : let load_layer_map_histo = StorageTimeMetrics::new(
3125 2784 : StorageTimeOperation::LoadLayerMap,
3126 2784 : &tenant_id,
3127 2784 : &shard_id,
3128 2784 : &timeline_id,
3129 2784 : );
3130 2784 : let garbage_collect_histo = StorageTimeMetrics::new(
3131 2784 : StorageTimeOperation::Gc,
3132 2784 : &tenant_id,
3133 2784 : &shard_id,
3134 2784 : &timeline_id,
3135 2784 : );
3136 2784 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
3137 2784 : StorageTimeOperation::FindGcCutoffs,
3138 2784 : &tenant_id,
3139 2784 : &shard_id,
3140 2784 : &timeline_id,
3141 2784 : );
3142 2784 : let last_record_lsn_gauge = LAST_RECORD_LSN
3143 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3144 2784 : .unwrap();
3145 2784 :
3146 2784 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
3147 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3148 2784 : .unwrap();
3149 2784 :
3150 2784 : let pitr_history_size = PITR_HISTORY_SIZE
3151 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3152 2784 : .unwrap();
3153 2784 :
3154 2784 : let archival_size = TIMELINE_ARCHIVE_SIZE
3155 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3156 2784 : .unwrap();
3157 2784 :
3158 2784 : let layers_per_read = LAYERS_PER_READ
3159 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3160 2784 : .unwrap();
3161 2784 :
3162 2784 : let standby_horizon_gauge = STANDBY_HORIZON
3163 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3164 2784 : .unwrap();
3165 2784 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
3166 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3167 2784 : .unwrap();
3168 2784 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
3169 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3170 2784 : .unwrap();
3171 2784 : // TODO: we shouldn't expose this metric
3172 2784 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
3173 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3174 2784 : .unwrap();
3175 2784 : let aux_file_size_gauge = AUX_FILE_SIZE
3176 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3177 2784 : .unwrap();
3178 2784 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
3179 2784 : let directory_entries_count_gauge_closure = {
3180 2784 : let tenant_shard_id = *tenant_shard_id;
3181 2784 : let timeline_id_raw = *timeline_id_raw;
3182 0 : move || {
3183 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
3184 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
3185 0 : let timeline_id = timeline_id_raw.to_string();
3186 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
3187 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3188 0 : .unwrap();
3189 0 : gauge
3190 0 : }
3191 : };
3192 2784 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
3193 2784 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
3194 2784 : let evictions = EVICTIONS
3195 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3196 2784 : .unwrap();
3197 2784 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
3198 2784 : .build(&tenant_id, &shard_id, &timeline_id);
3199 2784 :
3200 2784 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
3201 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3202 2784 : .unwrap();
3203 2784 :
3204 2784 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
3205 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3206 2784 : .unwrap();
3207 2784 :
3208 2784 : let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id);
3209 2784 :
3210 2784 : let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter {
3211 2784 : global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(),
3212 2784 : per_tenant: WAIT_LSN_IN_PROGRESS_MICROS
3213 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3214 2784 : .unwrap(),
3215 2784 : };
3216 2784 :
3217 2784 : let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR
3218 2784 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
3219 2784 : .unwrap();
3220 2784 :
3221 2784 : let wait_ondemand_download_time =
3222 2784 : wait_ondemand_download_time::WaitOndemandDownloadTimeSum::new(
3223 2784 : &tenant_id,
3224 2784 : &shard_id,
3225 2784 : &timeline_id,
3226 2784 : );
3227 2784 :
3228 2784 : TimelineMetrics {
3229 2784 : tenant_id,
3230 2784 : shard_id,
3231 2784 : timeline_id,
3232 2784 : flush_time_histo,
3233 2784 : flush_delay_histo,
3234 2784 : compact_time_histo,
3235 2784 : create_images_time_histo,
3236 2784 : logical_size_histo,
3237 2784 : imitate_logical_size_histo,
3238 2784 : garbage_collect_histo,
3239 2784 : find_gc_cutoffs_histo,
3240 2784 : load_layer_map_histo,
3241 2784 : last_record_lsn_gauge,
3242 2784 : disk_consistent_lsn_gauge,
3243 2784 : pitr_history_size,
3244 2784 : archival_size,
3245 2784 : layers_per_read,
3246 2784 : standby_horizon_gauge,
3247 2784 : resident_physical_size_gauge,
3248 2784 : visible_physical_size_gauge,
3249 2784 : current_logical_size_gauge,
3250 2784 : aux_file_size_gauge,
3251 2784 : directory_entries_count_gauge,
3252 2784 : evictions,
3253 2784 : evictions_with_low_residence_duration: std::sync::RwLock::new(
3254 2784 : evictions_with_low_residence_duration,
3255 2784 : ),
3256 2784 : storage_io_size,
3257 2784 : valid_lsn_lease_count_gauge,
3258 2784 : wal_records_received,
3259 2784 : wait_lsn_in_progress_micros,
3260 2784 : wait_lsn_start_finish_counterpair,
3261 2784 : wait_ondemand_download_time,
3262 2784 : shutdown: std::sync::atomic::AtomicBool::default(),
3263 2784 : }
3264 2784 : }
3265 :
3266 9504 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
3267 9504 : self.resident_physical_size_add(sz);
3268 9504 : }
3269 :
3270 3268 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
3271 3268 : self.resident_physical_size_gauge.sub(sz);
3272 3268 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
3273 3268 : }
3274 :
3275 10320 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
3276 10320 : self.resident_physical_size_gauge.add(sz);
3277 10320 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
3278 10320 : }
3279 :
3280 60 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
3281 60 : self.resident_physical_size_gauge.get()
3282 60 : }
3283 :
3284 : /// Generates TIMELINE_LAYER labels for a persistent layer.
3285 15868 : fn make_layer_labels(&self, layer_desc: &PersistentLayerDesc) -> [&str; 5] {
3286 15868 : let level = match LayerMap::is_l0(&layer_desc.key_range, layer_desc.is_delta()) {
3287 8542 : true => LayerLevel::L0,
3288 7326 : false => LayerLevel::L1,
3289 : };
3290 15868 : let kind = match layer_desc.is_delta() {
3291 13132 : true => LayerKind::Delta,
3292 2736 : false => LayerKind::Image,
3293 : };
3294 15868 : [
3295 15868 : &self.tenant_id,
3296 15868 : &self.shard_id,
3297 15868 : &self.timeline_id,
3298 15868 : level.into(),
3299 15868 : kind.into(),
3300 15868 : ]
3301 15868 : }
3302 :
3303 : /// Generates TIMELINE_LAYER labels for a frozen ephemeral layer.
3304 14208 : fn make_frozen_layer_labels(&self, _layer: &InMemoryLayer) -> [&str; 5] {
3305 14208 : [
3306 14208 : &self.tenant_id,
3307 14208 : &self.shard_id,
3308 14208 : &self.timeline_id,
3309 14208 : LayerLevel::Frozen.into(),
3310 14208 : LayerKind::Delta.into(), // by definition
3311 14208 : ]
3312 14208 : }
3313 :
3314 : /// Removes a frozen ephemeral layer to TIMELINE_LAYER metrics.
3315 7104 : pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
3316 7104 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3317 7104 : let labels = self.make_frozen_layer_labels(layer);
3318 7104 : let size = layer.try_len().expect("frozen layer should have no writer");
3319 7104 : TIMELINE_LAYER_COUNT
3320 7104 : .get_metric_with_label_values(&labels)
3321 7104 : .unwrap()
3322 7104 : .dec();
3323 7104 : TIMELINE_LAYER_SIZE
3324 7104 : .get_metric_with_label_values(&labels)
3325 7104 : .unwrap()
3326 7104 : .sub(size);
3327 7104 : }
3328 :
3329 : /// Adds a frozen ephemeral layer to TIMELINE_LAYER metrics.
3330 7104 : pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
3331 7104 : assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
3332 7104 : let labels = self.make_frozen_layer_labels(layer);
3333 7104 : let size = layer.try_len().expect("frozen layer should have no writer");
3334 7104 : TIMELINE_LAYER_COUNT
3335 7104 : .get_metric_with_label_values(&labels)
3336 7104 : .unwrap()
3337 7104 : .inc();
3338 7104 : TIMELINE_LAYER_SIZE
3339 7104 : .get_metric_with_label_values(&labels)
3340 7104 : .unwrap()
3341 7104 : .add(size);
3342 7104 : }
3343 :
3344 : /// Removes a persistent layer from TIMELINE_LAYER metrics.
3345 4156 : pub fn dec_layer(&self, layer_desc: &PersistentLayerDesc) {
3346 4156 : let labels = self.make_layer_labels(layer_desc);
3347 4156 : TIMELINE_LAYER_COUNT
3348 4156 : .get_metric_with_label_values(&labels)
3349 4156 : .unwrap()
3350 4156 : .dec();
3351 4156 : TIMELINE_LAYER_SIZE
3352 4156 : .get_metric_with_label_values(&labels)
3353 4156 : .unwrap()
3354 4156 : .sub(layer_desc.file_size);
3355 4156 : }
3356 :
3357 : /// Adds a persistent layer to TIMELINE_LAYER metrics.
3358 11712 : pub fn inc_layer(&self, layer_desc: &PersistentLayerDesc) {
3359 11712 : let labels = self.make_layer_labels(layer_desc);
3360 11712 : TIMELINE_LAYER_COUNT
3361 11712 : .get_metric_with_label_values(&labels)
3362 11712 : .unwrap()
3363 11712 : .inc();
3364 11712 : TIMELINE_LAYER_SIZE
3365 11712 : .get_metric_with_label_values(&labels)
3366 11712 : .unwrap()
3367 11712 : .add(layer_desc.file_size);
3368 11712 : }
3369 :
3370 60 : pub(crate) fn shutdown(&self) {
3371 60 : let was_shutdown = self
3372 60 : .shutdown
3373 60 : .swap(true, std::sync::atomic::Ordering::Relaxed);
3374 60 :
3375 60 : if was_shutdown {
3376 : // this happens on tenant deletion because tenant first shuts down timelines, then
3377 : // invokes timeline deletion which first shuts down the timeline again.
3378 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
3379 0 : return;
3380 60 : }
3381 60 :
3382 60 : let tenant_id = &self.tenant_id;
3383 60 : let timeline_id = &self.timeline_id;
3384 60 : let shard_id = &self.shard_id;
3385 60 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3386 60 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3387 60 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3388 60 : {
3389 60 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
3390 60 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3391 60 : }
3392 60 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3393 60 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3394 60 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
3395 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3396 60 : }
3397 :
3398 60 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3399 60 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3400 :
3401 240 : for ref level in LayerLevel::iter() {
3402 540 : for ref kind in LayerKind::iter() {
3403 360 : let labels: [&str; 5] =
3404 360 : [tenant_id, shard_id, timeline_id, level.into(), kind.into()];
3405 360 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&labels);
3406 360 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&labels);
3407 360 : }
3408 : }
3409 :
3410 60 : let _ = LAYERS_PER_READ.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3411 60 :
3412 60 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3413 60 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3414 60 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3415 60 :
3416 60 : self.evictions_with_low_residence_duration
3417 60 : .write()
3418 60 : .unwrap()
3419 60 : .remove(tenant_id, shard_id, timeline_id);
3420 :
3421 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
3422 : // removed at the end of it. The idea is to have the metrics outlive the
3423 : // entity during which they're observed, e.g., the smgr metrics shall
3424 : // outlive an individual smgr connection, but not the timeline.
3425 :
3426 600 : for op in StorageTimeOperation::VARIANTS {
3427 540 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
3428 540 : op,
3429 540 : tenant_id,
3430 540 : shard_id,
3431 540 : timeline_id,
3432 540 : ]);
3433 540 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
3434 540 : op,
3435 540 : tenant_id,
3436 540 : shard_id,
3437 540 : timeline_id,
3438 540 : ]);
3439 540 : }
3440 :
3441 180 : for op in StorageIoSizeOperation::VARIANTS {
3442 120 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
3443 120 : }
3444 :
3445 : let _ =
3446 60 : WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3447 60 :
3448 60 : {
3449 60 : let mut res = [Ok(()), Ok(())];
3450 60 : WAIT_LSN_START_FINISH_COUNTERPAIR
3451 60 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]);
3452 60 : }
3453 60 :
3454 60 : wait_ondemand_download_time::shutdown_timeline(tenant_id, shard_id, timeline_id);
3455 60 :
3456 60 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
3457 60 : SmgrQueryType::GetPageAtLsn.into(),
3458 60 : tenant_id,
3459 60 : shard_id,
3460 60 : timeline_id,
3461 60 : ]);
3462 60 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
3463 60 : SmgrQueryType::GetPageAtLsn.into(),
3464 60 : tenant_id,
3465 60 : shard_id,
3466 60 : timeline_id,
3467 60 : ]);
3468 60 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
3469 60 : tenant_id,
3470 60 : shard_id,
3471 60 : timeline_id,
3472 60 : ]);
3473 60 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
3474 60 : tenant_id,
3475 60 : shard_id,
3476 60 : timeline_id,
3477 60 : ]);
3478 60 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
3479 60 : tenant_id,
3480 60 : shard_id,
3481 60 : timeline_id,
3482 60 : ]);
3483 60 : let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
3484 60 : tenant_id,
3485 60 : shard_id,
3486 60 : timeline_id,
3487 60 : ]);
3488 :
3489 480 : for reason in GetPageBatchBreakReason::iter() {
3490 420 : let _ = PAGE_SERVICE_BATCH_BREAK_REASON_PER_TENANT_TIMELINE.remove_label_values(&[
3491 420 : tenant_id,
3492 420 : shard_id,
3493 420 : timeline_id,
3494 420 : reason.into(),
3495 420 : ]);
3496 420 : }
3497 60 : }
3498 : }
3499 :
3500 36 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3501 36 : // Only shard zero deals in synthetic sizes
3502 36 : if tenant_shard_id.is_shard_zero() {
3503 36 : let tid = tenant_shard_id.tenant_id.to_string();
3504 36 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
3505 36 : }
3506 :
3507 36 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
3508 36 :
3509 36 : // we leave the BROKEN_TENANTS_SET entry if any
3510 36 : }
3511 :
3512 : /// Maintain a per timeline gauge in addition to the global gauge.
3513 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
3514 : last_set: AtomicU64,
3515 : gauge: UIntGauge,
3516 : }
3517 :
3518 : impl PerTimelineRemotePhysicalSizeGauge {
3519 2844 : fn new(per_timeline_gauge: UIntGauge) -> Self {
3520 2844 : Self {
3521 2844 : last_set: AtomicU64::new(0),
3522 2844 : gauge: per_timeline_gauge,
3523 2844 : }
3524 2844 : }
3525 11689 : pub(crate) fn set(&self, sz: u64) {
3526 11689 : self.gauge.set(sz);
3527 11689 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
3528 11689 : if sz < prev {
3529 204 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
3530 11485 : } else {
3531 11485 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
3532 11485 : };
3533 11689 : }
3534 12 : pub(crate) fn get(&self) -> u64 {
3535 12 : self.gauge.get()
3536 12 : }
3537 : }
3538 :
3539 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
3540 120 : fn drop(&mut self) {
3541 120 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
3542 120 : }
3543 : }
3544 :
3545 : pub(crate) struct RemoteTimelineClientMetrics {
3546 : tenant_id: String,
3547 : shard_id: String,
3548 : timeline_id: String,
3549 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
3550 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
3551 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3552 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3553 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
3554 : }
3555 :
3556 : impl RemoteTimelineClientMetrics {
3557 2844 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
3558 2844 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
3559 2844 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
3560 2844 : let timeline_id_str = timeline_id.to_string();
3561 2844 :
3562 2844 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
3563 2844 : REMOTE_PHYSICAL_SIZE
3564 2844 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3565 2844 : .unwrap(),
3566 2844 : );
3567 2844 :
3568 2844 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
3569 2844 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3570 2844 : .unwrap();
3571 2844 :
3572 2844 : RemoteTimelineClientMetrics {
3573 2844 : tenant_id: tenant_id_str,
3574 2844 : shard_id: shard_id_str,
3575 2844 : timeline_id: timeline_id_str,
3576 2844 : calls: Mutex::new(HashMap::default()),
3577 2844 : bytes_started_counter: Mutex::new(HashMap::default()),
3578 2844 : bytes_finished_counter: Mutex::new(HashMap::default()),
3579 2844 : remote_physical_size_gauge,
3580 2844 : projected_remote_consistent_lsn_gauge,
3581 2844 : }
3582 2844 : }
3583 :
3584 18483 : pub fn remote_operation_time(
3585 18483 : &self,
3586 18483 : task_kind: Option<TaskKind>,
3587 18483 : file_kind: &RemoteOpFileKind,
3588 18483 : op_kind: &RemoteOpKind,
3589 18483 : status: &'static str,
3590 18483 : ) -> Histogram {
3591 18483 : REMOTE_TIMELINE_CLIENT_COMPLETION_LATENCY
3592 18483 : .get_metric_with_label_values(&[
3593 18483 : task_kind.as_ref().map(|tk| tk.into()).unwrap_or("unknown"),
3594 18483 : file_kind.as_str(),
3595 18483 : op_kind.as_str(),
3596 18483 : status,
3597 18483 : ])
3598 18483 : .unwrap()
3599 18483 : }
3600 :
3601 43398 : fn calls_counter_pair(
3602 43398 : &self,
3603 43398 : file_kind: &RemoteOpFileKind,
3604 43398 : op_kind: &RemoteOpKind,
3605 43398 : ) -> IntCounterPair {
3606 43398 : let mut guard = self.calls.lock().unwrap();
3607 43398 : let key = (file_kind.as_str(), op_kind.as_str());
3608 43398 : let metric = guard.entry(key).or_insert_with(move || {
3609 5096 : REMOTE_TIMELINE_CLIENT_CALLS
3610 5096 : .get_metric_with_label_values(&[
3611 5096 : &self.tenant_id,
3612 5096 : &self.shard_id,
3613 5096 : &self.timeline_id,
3614 5096 : key.0,
3615 5096 : key.1,
3616 5096 : ])
3617 5096 : .unwrap()
3618 43398 : });
3619 43398 : metric.clone()
3620 43398 : }
3621 :
3622 10584 : fn bytes_started_counter(
3623 10584 : &self,
3624 10584 : file_kind: &RemoteOpFileKind,
3625 10584 : op_kind: &RemoteOpKind,
3626 10584 : ) -> IntCounter {
3627 10584 : let mut guard = self.bytes_started_counter.lock().unwrap();
3628 10584 : let key = (file_kind.as_str(), op_kind.as_str());
3629 10584 : let metric = guard.entry(key).or_insert_with(move || {
3630 2004 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3631 2004 : .get_metric_with_label_values(&[
3632 2004 : &self.tenant_id,
3633 2004 : &self.shard_id,
3634 2004 : &self.timeline_id,
3635 2004 : key.0,
3636 2004 : key.1,
3637 2004 : ])
3638 2004 : .unwrap()
3639 10584 : });
3640 10584 : metric.clone()
3641 10584 : }
3642 :
3643 19958 : fn bytes_finished_counter(
3644 19958 : &self,
3645 19958 : file_kind: &RemoteOpFileKind,
3646 19958 : op_kind: &RemoteOpKind,
3647 19958 : ) -> IntCounter {
3648 19958 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3649 19958 : let key = (file_kind.as_str(), op_kind.as_str());
3650 19958 : let metric = guard.entry(key).or_insert_with(move || {
3651 2004 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3652 2004 : .get_metric_with_label_values(&[
3653 2004 : &self.tenant_id,
3654 2004 : &self.shard_id,
3655 2004 : &self.timeline_id,
3656 2004 : key.0,
3657 2004 : key.1,
3658 2004 : ])
3659 2004 : .unwrap()
3660 19958 : });
3661 19958 : metric.clone()
3662 19958 : }
3663 : }
3664 :
3665 : #[cfg(test)]
3666 : impl RemoteTimelineClientMetrics {
3667 36 : pub fn get_bytes_started_counter_value(
3668 36 : &self,
3669 36 : file_kind: &RemoteOpFileKind,
3670 36 : op_kind: &RemoteOpKind,
3671 36 : ) -> Option<u64> {
3672 36 : let guard = self.bytes_started_counter.lock().unwrap();
3673 36 : let key = (file_kind.as_str(), op_kind.as_str());
3674 36 : guard.get(&key).map(|counter| counter.get())
3675 36 : }
3676 :
3677 36 : pub fn get_bytes_finished_counter_value(
3678 36 : &self,
3679 36 : file_kind: &RemoteOpFileKind,
3680 36 : op_kind: &RemoteOpKind,
3681 36 : ) -> Option<u64> {
3682 36 : let guard = self.bytes_finished_counter.lock().unwrap();
3683 36 : let key = (file_kind.as_str(), op_kind.as_str());
3684 36 : guard.get(&key).map(|counter| counter.get())
3685 36 : }
3686 : }
3687 :
3688 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3689 : #[must_use]
3690 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3691 : /// Decremented on drop.
3692 : calls_counter_pair: Option<IntCounterPair>,
3693 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3694 : bytes_finished: Option<(IntCounter, u64)>,
3695 : }
3696 :
3697 : impl RemoteTimelineClientCallMetricGuard {
3698 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3699 : /// The caller vouches to do the metric updates manually.
3700 23020 : pub fn will_decrement_manually(mut self) {
3701 23020 : let RemoteTimelineClientCallMetricGuard {
3702 23020 : calls_counter_pair,
3703 23020 : bytes_finished,
3704 23020 : } = &mut self;
3705 23020 : calls_counter_pair.take();
3706 23020 : bytes_finished.take();
3707 23020 : }
3708 : }
3709 :
3710 : impl Drop for RemoteTimelineClientCallMetricGuard {
3711 23224 : fn drop(&mut self) {
3712 23224 : let RemoteTimelineClientCallMetricGuard {
3713 23224 : calls_counter_pair,
3714 23224 : bytes_finished,
3715 23224 : } = self;
3716 23224 : if let Some(guard) = calls_counter_pair.take() {
3717 204 : guard.dec();
3718 23020 : }
3719 23224 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3720 0 : bytes_finished_metric.inc_by(*value);
3721 23224 : }
3722 23224 : }
3723 : }
3724 :
3725 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3726 : /// track the byte size of this call in applicable metric(s).
3727 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3728 : /// Do not account for this call's byte size in any metrics.
3729 : /// The `reason` field is there to make the call sites self-documenting
3730 : /// about why they don't need the metric.
3731 : DontTrackSize { reason: &'static str },
3732 : /// Track the byte size of the call in applicable metric(s).
3733 : Bytes(u64),
3734 : }
3735 :
3736 : impl RemoteTimelineClientMetrics {
3737 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3738 : ///
3739 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3740 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3741 : /// is more suitable.
3742 : /// Never do both.
3743 23224 : pub(crate) fn call_begin(
3744 23224 : &self,
3745 23224 : file_kind: &RemoteOpFileKind,
3746 23224 : op_kind: &RemoteOpKind,
3747 23224 : size: RemoteTimelineClientMetricsCallTrackSize,
3748 23224 : ) -> RemoteTimelineClientCallMetricGuard {
3749 23224 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3750 23224 : calls_counter_pair.inc();
3751 :
3752 23224 : let bytes_finished = match size {
3753 12640 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3754 12640 : // nothing to do
3755 12640 : None
3756 : }
3757 10584 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3758 10584 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3759 10584 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3760 10584 : Some((finished_counter, size))
3761 : }
3762 : };
3763 23224 : RemoteTimelineClientCallMetricGuard {
3764 23224 : calls_counter_pair: Some(calls_counter_pair),
3765 23224 : bytes_finished,
3766 23224 : }
3767 23224 : }
3768 :
3769 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3770 : /// Using the guard object is generally preferable.
3771 : /// See [`call_begin`](Self::call_begin) for more context.
3772 20174 : pub(crate) fn call_end(
3773 20174 : &self,
3774 20174 : file_kind: &RemoteOpFileKind,
3775 20174 : op_kind: &RemoteOpKind,
3776 20174 : size: RemoteTimelineClientMetricsCallTrackSize,
3777 20174 : ) {
3778 20174 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3779 20174 : calls_counter_pair.dec();
3780 20174 : match size {
3781 10800 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3782 9374 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3783 9374 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3784 9374 : }
3785 : }
3786 20174 : }
3787 : }
3788 :
3789 : impl Drop for RemoteTimelineClientMetrics {
3790 120 : fn drop(&mut self) {
3791 120 : let RemoteTimelineClientMetrics {
3792 120 : tenant_id,
3793 120 : shard_id,
3794 120 : timeline_id,
3795 120 : remote_physical_size_gauge,
3796 120 : calls,
3797 120 : bytes_started_counter,
3798 120 : bytes_finished_counter,
3799 120 : projected_remote_consistent_lsn_gauge,
3800 120 : } = self;
3801 144 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3802 144 : let mut res = [Ok(()), Ok(())];
3803 144 : REMOTE_TIMELINE_CLIENT_CALLS
3804 144 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3805 144 : // don't care about results
3806 144 : }
3807 120 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3808 36 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3809 36 : tenant_id,
3810 36 : shard_id,
3811 36 : timeline_id,
3812 36 : a,
3813 36 : b,
3814 36 : ]);
3815 36 : }
3816 120 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3817 36 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3818 36 : tenant_id,
3819 36 : shard_id,
3820 36 : timeline_id,
3821 36 : a,
3822 36 : b,
3823 36 : ]);
3824 36 : }
3825 120 : {
3826 120 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3827 120 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3828 120 : }
3829 120 : {
3830 120 : let _ = projected_remote_consistent_lsn_gauge;
3831 120 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3832 120 : tenant_id,
3833 120 : shard_id,
3834 120 : timeline_id,
3835 120 : ]);
3836 120 : }
3837 120 : }
3838 : }
3839 :
3840 : /// Wrapper future that measures the time spent by a remote storage operation,
3841 : /// and records the time and success/failure as a prometheus metric.
3842 : pub(crate) trait MeasureRemoteOp<O, E>: Sized + Future<Output = Result<O, E>> {
3843 19406 : async fn measure_remote_op(
3844 19406 : self,
3845 19406 : task_kind: Option<TaskKind>, // not all caller contexts have a RequestContext / TaskKind handy
3846 19406 : file_kind: RemoteOpFileKind,
3847 19406 : op: RemoteOpKind,
3848 19406 : metrics: Arc<RemoteTimelineClientMetrics>,
3849 19406 : ) -> Result<O, E> {
3850 19406 : let start = Instant::now();
3851 19406 : let res = self.await;
3852 18483 : let duration = start.elapsed();
3853 18483 : let status = if res.is_ok() { &"success" } else { &"failure" };
3854 18483 : metrics
3855 18483 : .remote_operation_time(task_kind, &file_kind, &op, status)
3856 18483 : .observe(duration.as_secs_f64());
3857 18483 : res
3858 18483 : }
3859 : }
3860 :
3861 : impl<Fut, O, E> MeasureRemoteOp<O, E> for Fut where Fut: Sized + Future<Output = Result<O, E>> {}
3862 :
3863 : pub mod tokio_epoll_uring {
3864 : use std::collections::HashMap;
3865 : use std::sync::{Arc, Mutex};
3866 :
3867 : use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter};
3868 : use once_cell::sync::Lazy;
3869 :
3870 : /// Shared storage for tokio-epoll-uring thread local metrics.
3871 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3872 732 : Lazy::new(|| {
3873 732 : let slots_submission_queue_depth = register_histogram!(
3874 732 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3875 732 : "The slots waiters queue depth of each tokio_epoll_uring system",
3876 732 : vec![
3877 732 : 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0
3878 732 : ],
3879 732 : )
3880 732 : .expect("failed to define a metric");
3881 732 : ThreadLocalMetricsStorage {
3882 732 : observers: Mutex::new(HashMap::new()),
3883 732 : slots_submission_queue_depth,
3884 732 : }
3885 732 : });
3886 :
3887 : pub struct ThreadLocalMetricsStorage {
3888 : /// List of thread local metrics observers.
3889 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3890 : /// A histogram shared between all thread local systems
3891 : /// for collecting slots submission queue depth.
3892 : slots_submission_queue_depth: Histogram,
3893 : }
3894 :
3895 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3896 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3897 : ///
3898 : /// The System makes observations into [`Self`] and periodically, the collector
3899 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3900 : ///
3901 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3902 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3903 : /// for cache coherence protocol to get an exclusive cache line.
3904 : pub struct ThreadLocalMetrics {
3905 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3906 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3907 : }
3908 :
3909 : impl ThreadLocalMetricsStorage {
3910 : /// Registers a new thread local system. Returns a thread local metrics observer.
3911 3213 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3912 3213 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3913 3213 : self.slots_submission_queue_depth.local(),
3914 3213 : ));
3915 3213 : let mut g = self.observers.lock().unwrap();
3916 3213 : g.insert(id, Arc::clone(&per_system_metrics));
3917 3213 : per_system_metrics
3918 3213 : }
3919 :
3920 : /// Removes metrics observer for a thread local system.
3921 : /// This should be called before dropping a thread local system.
3922 732 : pub fn remove_system(&self, id: u64) {
3923 732 : let mut g = self.observers.lock().unwrap();
3924 732 : g.remove(&id);
3925 732 : }
3926 :
3927 : /// Flush all thread local metrics to the shared storage.
3928 0 : pub fn flush_thread_local_metrics(&self) {
3929 0 : let g = self.observers.lock().unwrap();
3930 0 : g.values().for_each(|local| {
3931 0 : local.flush();
3932 0 : });
3933 0 : }
3934 : }
3935 :
3936 : impl ThreadLocalMetrics {
3937 3213 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3938 3213 : ThreadLocalMetrics {
3939 3213 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3940 3213 : }
3941 3213 : }
3942 :
3943 : /// Flushes the thread local metrics to shared aggregator.
3944 0 : pub fn flush(&self) {
3945 0 : let Self {
3946 0 : slots_submission_queue_depth,
3947 0 : } = self;
3948 0 : slots_submission_queue_depth.lock().unwrap().flush();
3949 0 : }
3950 : }
3951 :
3952 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3953 2358900 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3954 2358900 : let Self {
3955 2358900 : slots_submission_queue_depth,
3956 2358900 : } = self;
3957 2358900 : slots_submission_queue_depth
3958 2358900 : .lock()
3959 2358900 : .unwrap()
3960 2358900 : .observe(queue_depth as f64);
3961 2358900 : }
3962 : }
3963 :
3964 : pub struct Collector {
3965 : descs: Vec<metrics::core::Desc>,
3966 : systems_created: UIntGauge,
3967 : systems_destroyed: UIntGauge,
3968 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3969 : }
3970 :
3971 : impl metrics::core::Collector for Collector {
3972 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3973 0 : self.descs.iter().collect()
3974 0 : }
3975 :
3976 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3977 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3978 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3979 0 : systems_created,
3980 0 : systems_destroyed,
3981 0 : } = tokio_epoll_uring::metrics::global();
3982 0 : self.systems_created.set(systems_created);
3983 0 : mfs.extend(self.systems_created.collect());
3984 0 : self.systems_destroyed.set(systems_destroyed);
3985 0 : mfs.extend(self.systems_destroyed.collect());
3986 0 :
3987 0 : self.thread_local_metrics_storage
3988 0 : .flush_thread_local_metrics();
3989 0 :
3990 0 : mfs.extend(
3991 0 : self.thread_local_metrics_storage
3992 0 : .slots_submission_queue_depth
3993 0 : .collect(),
3994 0 : );
3995 0 : mfs
3996 0 : }
3997 : }
3998 :
3999 : impl Collector {
4000 : const NMETRICS: usize = 3;
4001 :
4002 : #[allow(clippy::new_without_default)]
4003 0 : pub fn new() -> Self {
4004 0 : let mut descs = Vec::new();
4005 0 :
4006 0 : let systems_created = UIntGauge::new(
4007 0 : "pageserver_tokio_epoll_uring_systems_created",
4008 0 : "counter of tokio-epoll-uring systems that were created",
4009 0 : )
4010 0 : .unwrap();
4011 0 : descs.extend(
4012 0 : metrics::core::Collector::desc(&systems_created)
4013 0 : .into_iter()
4014 0 : .cloned(),
4015 0 : );
4016 0 :
4017 0 : let systems_destroyed = UIntGauge::new(
4018 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
4019 0 : "counter of tokio-epoll-uring systems that were destroyed",
4020 0 : )
4021 0 : .unwrap();
4022 0 : descs.extend(
4023 0 : metrics::core::Collector::desc(&systems_destroyed)
4024 0 : .into_iter()
4025 0 : .cloned(),
4026 0 : );
4027 0 :
4028 0 : Self {
4029 0 : descs,
4030 0 : systems_created,
4031 0 : systems_destroyed,
4032 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
4033 0 : }
4034 0 : }
4035 : }
4036 :
4037 732 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
4038 732 : register_int_counter!(
4039 732 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
4040 732 : "Number of times where thread_local_system creation spanned multiple executor threads",
4041 732 : )
4042 732 : .unwrap()
4043 732 : });
4044 :
4045 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
4046 0 : register_int_counter!(
4047 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
4048 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
4049 0 : )
4050 0 : .unwrap()
4051 0 : });
4052 : }
4053 :
4054 : pub(crate) struct GlobalAndPerTenantIntCounter {
4055 : global: IntCounter,
4056 : per_tenant: IntCounter,
4057 : }
4058 :
4059 : impl GlobalAndPerTenantIntCounter {
4060 : #[inline(always)]
4061 0 : pub(crate) fn inc(&self) {
4062 0 : self.inc_by(1)
4063 0 : }
4064 : #[inline(always)]
4065 1352896 : pub(crate) fn inc_by(&self, n: u64) {
4066 1352896 : self.global.inc_by(n);
4067 1352896 : self.per_tenant.inc_by(n);
4068 1352896 : }
4069 : }
4070 :
4071 : pub(crate) mod tenant_throttling {
4072 : use metrics::register_int_counter_vec;
4073 : use once_cell::sync::Lazy;
4074 : use utils::shard::TenantShardId;
4075 :
4076 : use super::GlobalAndPerTenantIntCounter;
4077 :
4078 : pub(crate) struct Metrics<const KIND: usize> {
4079 : pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
4080 : pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,
4081 : pub(super) wait_time: GlobalAndPerTenantIntCounter,
4082 : pub(super) count_throttled: GlobalAndPerTenantIntCounter,
4083 : }
4084 :
4085 1284 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4086 1284 : register_int_counter_vec!(
4087 1284 : "pageserver_tenant_throttling_count_accounted_start_global",
4088 1284 : "Count of tenant throttling starts, by kind of throttle.",
4089 1284 : &["kind"]
4090 1284 : )
4091 1284 : .unwrap()
4092 1284 : });
4093 1284 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4094 1284 : register_int_counter_vec!(
4095 1284 : "pageserver_tenant_throttling_count_accounted_start",
4096 1284 : "Count of tenant throttling starts, by kind of throttle.",
4097 1284 : &["kind", "tenant_id", "shard_id"]
4098 1284 : )
4099 1284 : .unwrap()
4100 1284 : });
4101 1284 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4102 1284 : register_int_counter_vec!(
4103 1284 : "pageserver_tenant_throttling_count_accounted_finish_global",
4104 1284 : "Count of tenant throttling finishes, by kind of throttle.",
4105 1284 : &["kind"]
4106 1284 : )
4107 1284 : .unwrap()
4108 1284 : });
4109 1284 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4110 1284 : register_int_counter_vec!(
4111 1284 : "pageserver_tenant_throttling_count_accounted_finish",
4112 1284 : "Count of tenant throttling finishes, by kind of throttle.",
4113 1284 : &["kind", "tenant_id", "shard_id"]
4114 1284 : )
4115 1284 : .unwrap()
4116 1284 : });
4117 1284 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4118 1284 : register_int_counter_vec!(
4119 1284 : "pageserver_tenant_throttling_wait_usecs_sum_global",
4120 1284 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
4121 1284 : &["kind"]
4122 1284 : )
4123 1284 : .unwrap()
4124 1284 : });
4125 1284 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4126 1284 : register_int_counter_vec!(
4127 1284 : "pageserver_tenant_throttling_wait_usecs_sum",
4128 1284 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
4129 1284 : &["kind", "tenant_id", "shard_id"]
4130 1284 : )
4131 1284 : .unwrap()
4132 1284 : });
4133 :
4134 1284 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4135 1284 : register_int_counter_vec!(
4136 1284 : "pageserver_tenant_throttling_count_global",
4137 1284 : "Count of tenant throttlings, by kind of throttle.",
4138 1284 : &["kind"]
4139 1284 : )
4140 1284 : .unwrap()
4141 1284 : });
4142 1284 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
4143 1284 : register_int_counter_vec!(
4144 1284 : "pageserver_tenant_throttling_count",
4145 1284 : "Count of tenant throttlings, by kind of throttle.",
4146 1284 : &["kind", "tenant_id", "shard_id"]
4147 1284 : )
4148 1284 : .unwrap()
4149 1284 : });
4150 :
4151 : const KINDS: &[&str] = &["pagestream"];
4152 : pub type Pagestream = Metrics<0>;
4153 :
4154 : impl<const KIND: usize> Metrics<KIND> {
4155 1392 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
4156 1392 : let per_tenant_label_values = &[
4157 1392 : KINDS[KIND],
4158 1392 : &tenant_shard_id.tenant_id.to_string(),
4159 1392 : &tenant_shard_id.shard_slug().to_string(),
4160 1392 : ];
4161 1392 : Metrics {
4162 1392 : count_accounted_start: {
4163 1392 : GlobalAndPerTenantIntCounter {
4164 1392 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
4165 1392 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
4166 1392 : .with_label_values(per_tenant_label_values),
4167 1392 : }
4168 1392 : },
4169 1392 : count_accounted_finish: {
4170 1392 : GlobalAndPerTenantIntCounter {
4171 1392 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
4172 1392 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
4173 1392 : .with_label_values(per_tenant_label_values),
4174 1392 : }
4175 1392 : },
4176 1392 : wait_time: {
4177 1392 : GlobalAndPerTenantIntCounter {
4178 1392 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
4179 1392 : per_tenant: WAIT_USECS_PER_TENANT
4180 1392 : .with_label_values(per_tenant_label_values),
4181 1392 : }
4182 1392 : },
4183 1392 : count_throttled: {
4184 1392 : GlobalAndPerTenantIntCounter {
4185 1392 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
4186 1392 : per_tenant: WAIT_COUNT_PER_TENANT
4187 1392 : .with_label_values(per_tenant_label_values),
4188 1392 : }
4189 1392 : },
4190 1392 : }
4191 1392 : }
4192 : }
4193 :
4194 0 : pub(crate) fn preinitialize_global_metrics() {
4195 0 : Lazy::force(&COUNT_ACCOUNTED_START);
4196 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
4197 0 : Lazy::force(&WAIT_USECS);
4198 0 : Lazy::force(&WAIT_COUNT);
4199 0 : }
4200 :
4201 36 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
4202 144 : for m in &[
4203 36 : &COUNT_ACCOUNTED_START_PER_TENANT,
4204 36 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
4205 36 : &WAIT_USECS_PER_TENANT,
4206 36 : &WAIT_COUNT_PER_TENANT,
4207 36 : ] {
4208 288 : for kind in KINDS {
4209 144 : let _ = m.remove_label_values(&[
4210 144 : kind,
4211 144 : &tenant_shard_id.tenant_id.to_string(),
4212 144 : &tenant_shard_id.shard_slug().to_string(),
4213 144 : ]);
4214 144 : }
4215 : }
4216 36 : }
4217 : }
4218 :
4219 : pub(crate) mod disk_usage_based_eviction {
4220 : use super::*;
4221 :
4222 : pub(crate) struct Metrics {
4223 : pub(crate) tenant_collection_time: Histogram,
4224 : pub(crate) tenant_layer_count: Histogram,
4225 : pub(crate) layers_collected: IntCounter,
4226 : pub(crate) layers_selected: IntCounter,
4227 : pub(crate) layers_evicted: IntCounter,
4228 : }
4229 :
4230 : impl Default for Metrics {
4231 0 : fn default() -> Self {
4232 0 : let tenant_collection_time = register_histogram!(
4233 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
4234 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
4235 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
4236 0 : )
4237 0 : .unwrap();
4238 0 :
4239 0 : let tenant_layer_count = register_histogram!(
4240 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
4241 0 : "Amount of layers gathered from a tenant",
4242 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
4243 0 : )
4244 0 : .unwrap();
4245 0 :
4246 0 : let layers_collected = register_int_counter!(
4247 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
4248 0 : "Amount of layers collected"
4249 0 : )
4250 0 : .unwrap();
4251 0 :
4252 0 : let layers_selected = register_int_counter!(
4253 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
4254 0 : "Amount of layers selected"
4255 0 : )
4256 0 : .unwrap();
4257 0 :
4258 0 : let layers_evicted = register_int_counter!(
4259 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
4260 0 : "Amount of layers successfully evicted"
4261 0 : )
4262 0 : .unwrap();
4263 0 :
4264 0 : Self {
4265 0 : tenant_collection_time,
4266 0 : tenant_layer_count,
4267 0 : layers_collected,
4268 0 : layers_selected,
4269 0 : layers_evicted,
4270 0 : }
4271 0 : }
4272 : }
4273 :
4274 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
4275 : }
4276 :
4277 1248 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
4278 1248 : register_uint_gauge_vec!(
4279 1248 : "pageserver_tokio_executor_thread_configured_count",
4280 1248 : "Total number of configued tokio executor threads in the process.
4281 1248 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
4282 1248 : &["setup"],
4283 1248 : )
4284 1248 : .unwrap()
4285 1248 : });
4286 :
4287 1248 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
4288 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
4289 1248 : let _guard = SERIALIZE.lock().unwrap();
4290 1248 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
4291 1248 : TOKIO_EXECUTOR_THREAD_COUNT
4292 1248 : .get_metric_with_label_values(&[setup])
4293 1248 : .unwrap()
4294 1248 : .set(u64::try_from(num_threads.get()).unwrap());
4295 1248 : }
4296 :
4297 0 : static PAGESERVER_CONFIG_IGNORED_ITEMS: Lazy<UIntGaugeVec> = Lazy::new(|| {
4298 0 : register_uint_gauge_vec!(
4299 0 : "pageserver_config_ignored_items",
4300 0 : "TOML items present in the on-disk configuration file but ignored by the pageserver config parser.\
4301 0 : The `item` label is the dot-separated path of the ignored item in the on-disk configuration file.\
4302 0 : The value for an unknown config item is always 1.\
4303 0 : There is a special label value \"\", which is 0, so that there is always a metric exposed (simplifies dashboards).",
4304 0 : &["item"]
4305 0 : )
4306 0 : .unwrap()
4307 0 : });
4308 :
4309 0 : pub fn preinitialize_metrics(
4310 0 : conf: &'static PageServerConf,
4311 0 : ignored: config::ignored_fields::Paths,
4312 0 : ) {
4313 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
4314 0 :
4315 0 : PAGESERVER_CONFIG_IGNORED_ITEMS
4316 0 : .with_label_values(&[""])
4317 0 : .set(0);
4318 0 : for path in &ignored.paths {
4319 0 : PAGESERVER_CONFIG_IGNORED_ITEMS
4320 0 : .with_label_values(&[path])
4321 0 : .set(1);
4322 0 : }
4323 :
4324 : // Python tests need these and on some we do alerting.
4325 : //
4326 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
4327 : // order:
4328 : // - global metrics reside in a Lazy<PageserverMetrics>
4329 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
4330 : // - could move the statics into TimelineMetrics::new()?
4331 :
4332 : // counters
4333 0 : [
4334 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
4335 0 : &WALRECEIVER_STARTED_CONNECTIONS,
4336 0 : &WALRECEIVER_BROKER_UPDATES,
4337 0 : &WALRECEIVER_CANDIDATES_ADDED,
4338 0 : &WALRECEIVER_CANDIDATES_REMOVED,
4339 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
4340 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
4341 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
4342 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
4343 0 : &CIRCUIT_BREAKERS_BROKEN,
4344 0 : &CIRCUIT_BREAKERS_UNBROKEN,
4345 0 : &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
4346 0 : &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS,
4347 0 : ]
4348 0 : .into_iter()
4349 0 : .for_each(|c| {
4350 0 : Lazy::force(c);
4351 0 : });
4352 0 :
4353 0 : // Deletion queue stats
4354 0 : Lazy::force(&DELETION_QUEUE);
4355 0 :
4356 0 : // Tenant stats
4357 0 : Lazy::force(&TENANT);
4358 0 :
4359 0 : // Tenant manager stats
4360 0 : Lazy::force(&TENANT_MANAGER);
4361 0 :
4362 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
4363 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
4364 :
4365 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
4366 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
4367 0 : // values from last restart.
4368 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
4369 0 : }
4370 :
4371 : // countervecs
4372 0 : [
4373 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
4374 0 : &SMGR_QUERY_STARTED_GLOBAL,
4375 0 : &PAGE_SERVICE_BATCH_BREAK_REASON_GLOBAL,
4376 0 : ]
4377 0 : .into_iter()
4378 0 : .for_each(|c| {
4379 0 : Lazy::force(c);
4380 0 : });
4381 0 :
4382 0 : // gauges
4383 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
4384 0 :
4385 0 : // histograms
4386 0 : [
4387 0 : &LAYERS_PER_READ_GLOBAL,
4388 0 : &LAYERS_PER_READ_BATCH_GLOBAL,
4389 0 : &LAYERS_PER_READ_AMORTIZED_GLOBAL,
4390 0 : &DELTAS_PER_READ_GLOBAL,
4391 0 : &WAIT_LSN_TIME,
4392 0 : &WAL_REDO_TIME,
4393 0 : &WAL_REDO_RECORDS_HISTOGRAM,
4394 0 : &WAL_REDO_BYTES_HISTOGRAM,
4395 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
4396 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
4397 0 : &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
4398 0 : ]
4399 0 : .into_iter()
4400 0 : .for_each(|h| {
4401 0 : Lazy::force(h);
4402 0 : });
4403 0 :
4404 0 : // Custom
4405 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
4406 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
4407 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
4408 0 :
4409 0 : tenant_throttling::preinitialize_global_metrics();
4410 0 : wait_ondemand_download_time::preinitialize_global_metrics();
4411 0 : }
|