Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::config::{
11 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
12 : PageServiceProtocolPipelinedExecutionStrategy,
13 : };
14 : use pageserver_api::shard::TenantShardId;
15 : use postgres_backend::{is_expected_io_error, QueryError};
16 : use pq_proto::framed::ConnectionError;
17 : use strum::{EnumCount, VariantNames};
18 : use strum_macros::{IntoStaticStr, VariantNames};
19 : use tracing::warn;
20 : use utils::id::TimelineId;
21 :
22 : /// Prometheus histogram buckets (in seconds) for operations in the critical
23 : /// path. In other words, operations that directly affect that latency of user
24 : /// queries.
25 : ///
26 : /// The buckets capture the majority of latencies in the microsecond and
27 : /// millisecond range but also extend far enough up to distinguish "bad" from
28 : /// "really bad".
29 : const CRITICAL_OP_BUCKETS: &[f64] = &[
30 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
31 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
32 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
33 : ];
34 :
35 : // Metrics collected on operations on the storage repository.
36 3344 : #[derive(Debug, VariantNames, IntoStaticStr)]
37 : #[strum(serialize_all = "kebab_case")]
38 : pub(crate) enum StorageTimeOperation {
39 : #[strum(serialize = "layer flush")]
40 : LayerFlush,
41 :
42 : #[strum(serialize = "compact")]
43 : Compact,
44 :
45 : #[strum(serialize = "create images")]
46 : CreateImages,
47 :
48 : #[strum(serialize = "logical size")]
49 : LogicalSize,
50 :
51 : #[strum(serialize = "imitate logical size")]
52 : ImitateLogicalSize,
53 :
54 : #[strum(serialize = "load layer map")]
55 : LoadLayerMap,
56 :
57 : #[strum(serialize = "gc")]
58 : Gc,
59 :
60 : #[strum(serialize = "find gc cutoffs")]
61 : FindGcCutoffs,
62 : }
63 :
64 172 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
65 172 : register_counter_vec!(
66 172 : "pageserver_storage_operations_seconds_sum",
67 172 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
68 172 : &["operation", "tenant_id", "shard_id", "timeline_id"],
69 172 : )
70 172 : .expect("failed to define a metric")
71 172 : });
72 :
73 172 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
74 172 : register_int_counter_vec!(
75 172 : "pageserver_storage_operations_seconds_count",
76 172 : "Count of storage operations with operation, tenant and timeline dimensions",
77 172 : &["operation", "tenant_id", "shard_id", "timeline_id"],
78 172 : )
79 172 : .expect("failed to define a metric")
80 172 : });
81 :
82 : // Buckets for background operations like compaction, GC, size calculation
83 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
84 :
85 172 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
86 172 : register_histogram_vec!(
87 172 : "pageserver_storage_operations_seconds_global",
88 172 : "Time spent on storage operations",
89 172 : &["operation"],
90 172 : STORAGE_OP_BUCKETS.into(),
91 172 : )
92 172 : .expect("failed to define a metric")
93 172 : });
94 :
95 0 : pub(crate) static READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
96 0 : register_histogram!(
97 0 : "pageserver_layers_visited_per_read_global",
98 0 : "Number of layers visited to reconstruct one key",
99 0 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
100 0 : )
101 0 : .expect("failed to define a metric")
102 0 : });
103 :
104 168 : pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
105 168 : register_histogram!(
106 168 : "pageserver_layers_visited_per_vectored_read_global",
107 168 : "Average number of layers visited to reconstruct one key",
108 168 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
109 168 : )
110 168 : .expect("failed to define a metric")
111 168 : });
112 :
113 : // Metrics collected on operations on the storage repository.
114 : #[derive(
115 680 : Clone, Copy, enum_map::Enum, strum_macros::EnumString, strum_macros::Display, IntoStaticStr,
116 : )]
117 : pub(crate) enum GetKind {
118 : Singular,
119 : Vectored,
120 : }
121 :
122 : pub(crate) struct ReconstructTimeMetrics {
123 : singular: Histogram,
124 : vectored: Histogram,
125 : }
126 :
127 170 : pub(crate) static RECONSTRUCT_TIME: Lazy<ReconstructTimeMetrics> = Lazy::new(|| {
128 170 : let inner = register_histogram_vec!(
129 170 : "pageserver_getpage_reconstruct_seconds",
130 170 : "Time spent in reconstruct_value (reconstruct a page from deltas)",
131 170 : &["get_kind"],
132 170 : CRITICAL_OP_BUCKETS.into(),
133 170 : )
134 170 : .expect("failed to define a metric");
135 170 :
136 170 : ReconstructTimeMetrics {
137 170 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
138 170 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
139 170 : }
140 170 : });
141 :
142 : impl ReconstructTimeMetrics {
143 626532 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
144 626532 : match get_kind {
145 626096 : GetKind::Singular => &self.singular,
146 436 : GetKind::Vectored => &self.vectored,
147 : }
148 626532 : }
149 : }
150 :
151 : pub(crate) struct ReconstructDataTimeMetrics {
152 : singular: Histogram,
153 : vectored: Histogram,
154 : }
155 :
156 : impl ReconstructDataTimeMetrics {
157 626548 : pub(crate) fn for_get_kind(&self, get_kind: GetKind) -> &Histogram {
158 626548 : match get_kind {
159 626112 : GetKind::Singular => &self.singular,
160 436 : GetKind::Vectored => &self.vectored,
161 : }
162 626548 : }
163 : }
164 :
165 170 : pub(crate) static GET_RECONSTRUCT_DATA_TIME: Lazy<ReconstructDataTimeMetrics> = Lazy::new(|| {
166 170 : let inner = register_histogram_vec!(
167 170 : "pageserver_getpage_get_reconstruct_data_seconds",
168 170 : "Time spent in get_reconstruct_value_data",
169 170 : &["get_kind"],
170 170 : CRITICAL_OP_BUCKETS.into(),
171 170 : )
172 170 : .expect("failed to define a metric");
173 170 :
174 170 : ReconstructDataTimeMetrics {
175 170 : singular: inner.with_label_values(&[GetKind::Singular.into()]),
176 170 : vectored: inner.with_label_values(&[GetKind::Vectored.into()]),
177 170 : }
178 170 : });
179 :
180 : pub(crate) struct GetVectoredLatency {
181 : map: EnumMap<TaskKind, Option<Histogram>>,
182 : }
183 :
184 : #[allow(dead_code)]
185 : pub(crate) struct ScanLatency {
186 : map: EnumMap<TaskKind, Option<Histogram>>,
187 : }
188 :
189 : impl GetVectoredLatency {
190 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
191 : // cardinality of the metric.
192 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
193 :
194 19510 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
195 19510 : self.map[task_kind].as_ref()
196 19510 : }
197 : }
198 :
199 : impl ScanLatency {
200 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
201 : // cardinality of the metric.
202 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
203 :
204 12 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
205 12 : self.map[task_kind].as_ref()
206 12 : }
207 : }
208 :
209 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
210 : parent: &'a Histogram,
211 : start: std::time::Instant,
212 : }
213 :
214 : impl<'a> ScanLatencyOngoingRecording<'a> {
215 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
216 0 : let start = Instant::now();
217 0 : ScanLatencyOngoingRecording { parent, start }
218 0 : }
219 :
220 0 : pub(crate) fn observe(self) {
221 0 : let elapsed = self.start.elapsed();
222 0 : self.parent.observe(elapsed.as_secs_f64());
223 0 : }
224 : }
225 :
226 164 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
227 164 : let inner = register_histogram_vec!(
228 164 : "pageserver_get_vectored_seconds",
229 164 : "Time spent in get_vectored.",
230 164 : &["task_kind"],
231 164 : CRITICAL_OP_BUCKETS.into(),
232 164 : )
233 164 : .expect("failed to define a metric");
234 164 :
235 164 : GetVectoredLatency {
236 5084 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
237 5084 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
238 5084 :
239 5084 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
240 328 : let task_kind = task_kind.into();
241 328 : Some(inner.with_label_values(&[task_kind]))
242 : } else {
243 4756 : None
244 : }
245 5084 : })),
246 164 : }
247 164 : });
248 :
249 4 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
250 4 : let inner = register_histogram_vec!(
251 4 : "pageserver_scan_seconds",
252 4 : "Time spent in scan.",
253 4 : &["task_kind"],
254 4 : CRITICAL_OP_BUCKETS.into(),
255 4 : )
256 4 : .expect("failed to define a metric");
257 4 :
258 4 : ScanLatency {
259 124 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
260 124 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
261 124 :
262 124 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
263 4 : let task_kind = task_kind.into();
264 4 : Some(inner.with_label_values(&[task_kind]))
265 : } else {
266 120 : None
267 : }
268 124 : })),
269 4 : }
270 4 : });
271 :
272 : pub(crate) struct PageCacheMetricsForTaskKind {
273 : pub read_accesses_immutable: IntCounter,
274 : pub read_hits_immutable: IntCounter,
275 : }
276 :
277 : pub(crate) struct PageCacheMetrics {
278 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
279 : }
280 :
281 88 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
282 88 : register_int_counter_vec!(
283 88 : "pageserver_page_cache_read_hits_total",
284 88 : "Number of read accesses to the page cache that hit",
285 88 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
286 88 : )
287 88 : .expect("failed to define a metric")
288 88 : });
289 :
290 88 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
291 88 : register_int_counter_vec!(
292 88 : "pageserver_page_cache_read_accesses_total",
293 88 : "Number of read accesses to the page cache",
294 88 : &["task_kind", "key_kind", "content_kind"]
295 88 : )
296 88 : .expect("failed to define a metric")
297 88 : });
298 :
299 88 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
300 2728 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
301 2728 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
302 2728 : let task_kind: &'static str = task_kind.into();
303 21824 : EnumMap::from_array(std::array::from_fn(|content_kind| {
304 21824 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
305 21824 : let content_kind: &'static str = content_kind.into();
306 21824 : PageCacheMetricsForTaskKind {
307 21824 : read_accesses_immutable: {
308 21824 : PAGE_CACHE_READ_ACCESSES
309 21824 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
310 21824 : .unwrap()
311 21824 : },
312 21824 :
313 21824 : read_hits_immutable: {
314 21824 : PAGE_CACHE_READ_HITS
315 21824 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
316 21824 : .unwrap()
317 21824 : },
318 21824 : }
319 21824 : }))
320 2728 : })),
321 88 : });
322 :
323 : impl PageCacheMetrics {
324 970878 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
325 970878 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
326 970878 : }
327 : }
328 :
329 : pub(crate) struct PageCacheSizeMetrics {
330 : pub max_bytes: UIntGauge,
331 :
332 : pub current_bytes_immutable: UIntGauge,
333 : }
334 :
335 88 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
336 88 : register_uint_gauge_vec!(
337 88 : "pageserver_page_cache_size_current_bytes",
338 88 : "Current size of the page cache in bytes, by key kind",
339 88 : &["key_kind"]
340 88 : )
341 88 : .expect("failed to define a metric")
342 88 : });
343 :
344 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
345 88 : Lazy::new(|| PageCacheSizeMetrics {
346 88 : max_bytes: {
347 88 : register_uint_gauge!(
348 88 : "pageserver_page_cache_size_max_bytes",
349 88 : "Maximum size of the page cache in bytes"
350 88 : )
351 88 : .expect("failed to define a metric")
352 88 : },
353 88 : current_bytes_immutable: {
354 88 : PAGE_CACHE_SIZE_CURRENT_BYTES
355 88 : .get_metric_with_label_values(&["immutable"])
356 88 : .unwrap()
357 88 : },
358 88 : });
359 :
360 : pub(crate) mod page_cache_eviction_metrics {
361 : use std::num::NonZeroUsize;
362 :
363 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
364 : use once_cell::sync::Lazy;
365 :
366 : #[derive(Clone, Copy)]
367 : pub(crate) enum Outcome {
368 : FoundSlotUnused { iters: NonZeroUsize },
369 : FoundSlotEvicted { iters: NonZeroUsize },
370 : ItersExceeded { iters: NonZeroUsize },
371 : }
372 :
373 88 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
374 88 : register_int_counter_vec!(
375 88 : "pageserver_page_cache_find_victim_iters_total",
376 88 : "Counter for the number of iterations in the find_victim loop",
377 88 : &["outcome"],
378 88 : )
379 88 : .expect("failed to define a metric")
380 88 : });
381 :
382 88 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
383 88 : register_int_counter_vec!(
384 88 : "pageserver_page_cache_find_victim_calls",
385 88 : "Incremented at the end of each find_victim() call.\
386 88 : Filter by outcome to get e.g., eviction rate.",
387 88 : &["outcome"]
388 88 : )
389 88 : .unwrap()
390 88 : });
391 :
392 32089 : pub(crate) fn observe(outcome: Outcome) {
393 : macro_rules! dry {
394 : ($label:literal, $iters:expr) => {{
395 : static LABEL: &'static str = $label;
396 : static ITERS_TOTAL: Lazy<IntCounter> =
397 108 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
398 : static CALLS: Lazy<IntCounter> =
399 108 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
400 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
401 : CALLS.inc();
402 : }};
403 : }
404 32089 : match outcome {
405 1560 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
406 30529 : Outcome::FoundSlotEvicted { iters } => {
407 30529 : dry!("found_evicted", iters)
408 : }
409 0 : Outcome::ItersExceeded { iters } => {
410 0 : dry!("err_iters_exceeded", iters);
411 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
412 0 : }
413 : }
414 32089 : }
415 : }
416 :
417 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
418 0 : register_int_counter_vec!(
419 0 : "page_cache_errors_total",
420 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
421 0 : &["error_kind"]
422 0 : )
423 0 : .expect("failed to define a metric")
424 0 : });
425 :
426 0 : #[derive(IntoStaticStr)]
427 : #[strum(serialize_all = "kebab_case")]
428 : pub(crate) enum PageCacheErrorKind {
429 : AcquirePinnedSlotTimeout,
430 : EvictIterLimit,
431 : }
432 :
433 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
434 0 : PAGE_CACHE_ERRORS
435 0 : .get_metric_with_label_values(&[error_kind.into()])
436 0 : .unwrap()
437 0 : .inc();
438 0 : }
439 :
440 18 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
441 18 : register_histogram!(
442 18 : "pageserver_wait_lsn_seconds",
443 18 : "Time spent waiting for WAL to arrive",
444 18 : CRITICAL_OP_BUCKETS.into(),
445 18 : )
446 18 : .expect("failed to define a metric")
447 18 : });
448 :
449 172 : static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
450 172 : register_gauge_vec!(
451 172 : "pageserver_flush_wait_upload_seconds",
452 172 : "Time spent waiting for preceding uploads during layer flush",
453 172 : &["tenant_id", "shard_id", "timeline_id"]
454 172 : )
455 172 : .expect("failed to define a metric")
456 172 : });
457 :
458 172 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
459 172 : register_int_gauge_vec!(
460 172 : "pageserver_last_record_lsn",
461 172 : "Last record LSN grouped by timeline",
462 172 : &["tenant_id", "shard_id", "timeline_id"]
463 172 : )
464 172 : .expect("failed to define a metric")
465 172 : });
466 :
467 172 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
468 172 : register_int_gauge_vec!(
469 172 : "pageserver_disk_consistent_lsn",
470 172 : "Disk consistent LSN grouped by timeline",
471 172 : &["tenant_id", "shard_id", "timeline_id"]
472 172 : )
473 172 : .expect("failed to define a metric")
474 172 : });
475 :
476 172 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
477 172 : register_uint_gauge_vec!(
478 172 : "pageserver_projected_remote_consistent_lsn",
479 172 : "Projected remote consistent LSN grouped by timeline",
480 172 : &["tenant_id", "shard_id", "timeline_id"]
481 172 : )
482 172 : .expect("failed to define a metric")
483 172 : });
484 :
485 172 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
486 172 : register_uint_gauge_vec!(
487 172 : "pageserver_pitr_history_size",
488 172 : "Data written since PITR cutoff on this timeline",
489 172 : &["tenant_id", "shard_id", "timeline_id"]
490 172 : )
491 172 : .expect("failed to define a metric")
492 172 : });
493 :
494 1712 : #[derive(strum_macros::EnumString, strum_macros::Display, strum_macros::IntoStaticStr)]
495 : #[strum(serialize_all = "kebab_case")]
496 : pub(crate) enum MetricLayerKind {
497 : Delta,
498 : Image,
499 : }
500 :
501 172 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
502 172 : register_uint_gauge_vec!(
503 172 : "pageserver_layer_bytes",
504 172 : "Sum of layer physical sizes in bytes",
505 172 : &["tenant_id", "shard_id", "timeline_id", "kind"]
506 172 : )
507 172 : .expect("failed to define a metric")
508 172 : });
509 :
510 172 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
511 172 : register_uint_gauge_vec!(
512 172 : "pageserver_layer_count",
513 172 : "Number of layers that exist",
514 172 : &["tenant_id", "shard_id", "timeline_id", "kind"]
515 172 : )
516 172 : .expect("failed to define a metric")
517 172 : });
518 :
519 172 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
520 172 : register_uint_gauge_vec!(
521 172 : "pageserver_archive_size",
522 172 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
523 172 : &["tenant_id", "shard_id", "timeline_id"]
524 172 : )
525 172 : .expect("failed to define a metric")
526 172 : });
527 :
528 172 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
529 172 : register_int_gauge_vec!(
530 172 : "pageserver_standby_horizon",
531 172 : "Standby apply LSN for which GC is hold off, by timeline.",
532 172 : &["tenant_id", "shard_id", "timeline_id"]
533 172 : )
534 172 : .expect("failed to define a metric")
535 172 : });
536 :
537 172 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
538 172 : register_uint_gauge_vec!(
539 172 : "pageserver_resident_physical_size",
540 172 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
541 172 : &["tenant_id", "shard_id", "timeline_id"]
542 172 : )
543 172 : .expect("failed to define a metric")
544 172 : });
545 :
546 172 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
547 172 : register_uint_gauge_vec!(
548 172 : "pageserver_visible_physical_size",
549 172 : "The size of the layer files present in the pageserver's filesystem.",
550 172 : &["tenant_id", "shard_id", "timeline_id"]
551 172 : )
552 172 : .expect("failed to define a metric")
553 172 : });
554 :
555 168 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
556 168 : register_uint_gauge!(
557 168 : "pageserver_resident_physical_size_global",
558 168 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
559 168 : )
560 168 : .expect("failed to define a metric")
561 168 : });
562 :
563 172 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
564 172 : register_uint_gauge_vec!(
565 172 : "pageserver_remote_physical_size",
566 172 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
567 172 : // Corollary: If any files are missing from the index part, they won't be included here.
568 172 : &["tenant_id", "shard_id", "timeline_id"]
569 172 : )
570 172 : .expect("failed to define a metric")
571 172 : });
572 :
573 172 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
574 172 : register_uint_gauge!(
575 172 : "pageserver_remote_physical_size_global",
576 172 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
577 172 : )
578 172 : .expect("failed to define a metric")
579 172 : });
580 :
581 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
582 4 : register_int_counter!(
583 4 : "pageserver_remote_ondemand_downloaded_layers_total",
584 4 : "Total on-demand downloaded layers"
585 4 : )
586 4 : .unwrap()
587 4 : });
588 :
589 4 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
590 4 : register_int_counter!(
591 4 : "pageserver_remote_ondemand_downloaded_bytes_total",
592 4 : "Total bytes of layers on-demand downloaded",
593 4 : )
594 4 : .unwrap()
595 4 : });
596 :
597 172 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
598 172 : register_uint_gauge_vec!(
599 172 : "pageserver_current_logical_size",
600 172 : "Current logical size grouped by timeline",
601 172 : &["tenant_id", "shard_id", "timeline_id"]
602 172 : )
603 172 : .expect("failed to define current logical size metric")
604 172 : });
605 :
606 172 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
607 172 : register_int_gauge_vec!(
608 172 : "pageserver_aux_file_estimated_size",
609 172 : "The size of all aux files for a timeline in aux file v2 store.",
610 172 : &["tenant_id", "shard_id", "timeline_id"]
611 172 : )
612 172 : .expect("failed to define a metric")
613 172 : });
614 :
615 172 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
616 172 : register_uint_gauge_vec!(
617 172 : "pageserver_valid_lsn_lease_count",
618 172 : "The number of valid leases after refreshing gc info.",
619 172 : &["tenant_id", "shard_id", "timeline_id"],
620 172 : )
621 172 : .expect("failed to define a metric")
622 172 : });
623 :
624 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
625 0 : register_int_counter!(
626 0 : "pageserver_circuit_breaker_broken",
627 0 : "How many times a circuit breaker has broken"
628 0 : )
629 0 : .expect("failed to define a metric")
630 0 : });
631 :
632 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
633 0 : register_int_counter!(
634 0 : "pageserver_circuit_breaker_unbroken",
635 0 : "How many times a circuit breaker has been un-broken (recovered)"
636 0 : )
637 0 : .expect("failed to define a metric")
638 0 : });
639 :
640 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
641 164 : register_int_counter!(
642 164 : "pageserver_compression_image_in_bytes_total",
643 164 : "Size of data written into image layers before compression"
644 164 : )
645 164 : .expect("failed to define a metric")
646 164 : });
647 :
648 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
649 164 : register_int_counter!(
650 164 : "pageserver_compression_image_in_bytes_considered",
651 164 : "Size of potentially compressible data written into image layers before compression"
652 164 : )
653 164 : .expect("failed to define a metric")
654 164 : });
655 :
656 164 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
657 164 : register_int_counter!(
658 164 : "pageserver_compression_image_in_bytes_chosen",
659 164 : "Size of data whose compressed form was written into image layers"
660 164 : )
661 164 : .expect("failed to define a metric")
662 164 : });
663 :
664 164 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
665 164 : register_int_counter!(
666 164 : "pageserver_compression_image_out_bytes_total",
667 164 : "Size of compressed image layer written"
668 164 : )
669 164 : .expect("failed to define a metric")
670 164 : });
671 :
672 10 : pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
673 10 : register_uint_gauge!(
674 10 : "pageserver_relsize_cache_entries",
675 10 : "Number of entries in the relation size cache",
676 10 : )
677 10 : .expect("failed to define a metric")
678 10 : });
679 :
680 10 : pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
681 10 : register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
682 10 : .expect("failed to define a metric")
683 10 : });
684 :
685 10 : pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
686 10 : register_int_counter!(
687 10 : "pageserver_relsize_cache_misses",
688 10 : "Relation size cache misses",
689 10 : )
690 10 : .expect("failed to define a metric")
691 10 : });
692 :
693 4 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
694 4 : register_int_counter!(
695 4 : "pageserver_relsize_cache_misses_old",
696 4 : "Relation size cache misses where the lookup LSN is older than the last relation update"
697 4 : )
698 4 : .expect("failed to define a metric")
699 4 : });
700 :
701 : pub(crate) mod initial_logical_size {
702 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
703 : use once_cell::sync::Lazy;
704 :
705 : pub(crate) struct StartCalculation(IntCounterVec);
706 172 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
707 172 : StartCalculation(
708 172 : register_int_counter_vec!(
709 172 : "pageserver_initial_logical_size_start_calculation",
710 172 : "Incremented each time we start an initial logical size calculation attempt. \
711 172 : The `circumstances` label provides some additional details.",
712 172 : &["attempt", "circumstances"]
713 172 : )
714 172 : .unwrap(),
715 172 : )
716 172 : });
717 :
718 : struct DropCalculation {
719 : first: IntCounter,
720 : retry: IntCounter,
721 : }
722 :
723 172 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
724 172 : let vec = register_int_counter_vec!(
725 172 : "pageserver_initial_logical_size_drop_calculation",
726 172 : "Incremented each time we abort a started size calculation attmpt.",
727 172 : &["attempt"]
728 172 : )
729 172 : .unwrap();
730 172 : DropCalculation {
731 172 : first: vec.with_label_values(&["first"]),
732 172 : retry: vec.with_label_values(&["retry"]),
733 172 : }
734 172 : });
735 :
736 : pub(crate) struct Calculated {
737 : pub(crate) births: IntCounter,
738 : pub(crate) deaths: IntCounter,
739 : }
740 :
741 172 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
742 172 : births: register_int_counter!(
743 172 : "pageserver_initial_logical_size_finish_calculation",
744 172 : "Incremented every time we finish calculation of initial logical size.\
745 172 : If everything is working well, this should happen at most once per Timeline object."
746 172 : )
747 172 : .unwrap(),
748 172 : deaths: register_int_counter!(
749 172 : "pageserver_initial_logical_size_drop_finished_calculation",
750 172 : "Incremented when we drop a finished initial logical size calculation result.\
751 172 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
752 172 : )
753 172 : .unwrap(),
754 172 : });
755 :
756 : pub(crate) struct OngoingCalculationGuard {
757 : inc_drop_calculation: Option<IntCounter>,
758 : }
759 :
760 184 : #[derive(strum_macros::IntoStaticStr)]
761 : pub(crate) enum StartCircumstances {
762 : EmptyInitial,
763 : SkippedConcurrencyLimiter,
764 : AfterBackgroundTasksRateLimit,
765 : }
766 :
767 : impl StartCalculation {
768 184 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
769 184 : let circumstances_label: &'static str = circumstances.into();
770 184 : self.0
771 184 : .with_label_values(&["first", circumstances_label])
772 184 : .inc();
773 184 : OngoingCalculationGuard {
774 184 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
775 184 : }
776 184 : }
777 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
778 0 : let circumstances_label: &'static str = circumstances.into();
779 0 : self.0
780 0 : .with_label_values(&["retry", circumstances_label])
781 0 : .inc();
782 0 : OngoingCalculationGuard {
783 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
784 0 : }
785 0 : }
786 : }
787 :
788 : impl Drop for OngoingCalculationGuard {
789 184 : fn drop(&mut self) {
790 184 : if let Some(counter) = self.inc_drop_calculation.take() {
791 0 : counter.inc();
792 184 : }
793 184 : }
794 : }
795 :
796 : impl OngoingCalculationGuard {
797 184 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
798 184 : drop(self.inc_drop_calculation.take());
799 184 : CALCULATED.births.inc();
800 184 : FinishedCalculationGuard {
801 184 : inc_on_drop: CALCULATED.deaths.clone(),
802 184 : }
803 184 : }
804 : }
805 :
806 : pub(crate) struct FinishedCalculationGuard {
807 : inc_on_drop: IntCounter,
808 : }
809 :
810 : impl Drop for FinishedCalculationGuard {
811 6 : fn drop(&mut self) {
812 6 : self.inc_on_drop.inc();
813 6 : }
814 : }
815 :
816 : // context: https://github.com/neondatabase/neon/issues/5963
817 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
818 0 : Lazy::new(|| {
819 0 : register_int_counter!(
820 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
821 0 : "Counter for the following event: walreceiver calls\
822 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
823 0 : )
824 0 : .unwrap()
825 0 : });
826 : }
827 :
828 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
829 0 : register_uint_gauge_vec!(
830 0 : "pageserver_directory_entries_count",
831 0 : "Sum of the entries in pageserver-stored directory listings",
832 0 : &["tenant_id", "shard_id", "timeline_id"]
833 0 : )
834 0 : .expect("failed to define a metric")
835 0 : });
836 :
837 174 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
838 174 : register_uint_gauge_vec!(
839 174 : "pageserver_tenant_states_count",
840 174 : "Count of tenants per state",
841 174 : &["state"]
842 174 : )
843 174 : .expect("Failed to register pageserver_tenant_states_count metric")
844 174 : });
845 :
846 : /// A set of broken tenants.
847 : ///
848 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
849 : /// tenant.
850 10 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
851 10 : register_uint_gauge_vec!(
852 10 : "pageserver_broken_tenants_count",
853 10 : "Set of broken tenants",
854 10 : &["tenant_id", "shard_id"]
855 10 : )
856 10 : .expect("Failed to register pageserver_tenant_states_count metric")
857 10 : });
858 :
859 6 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
860 6 : register_uint_gauge_vec!(
861 6 : "pageserver_tenant_synthetic_cached_size_bytes",
862 6 : "Synthetic size of each tenant in bytes",
863 6 : &["tenant_id"]
864 6 : )
865 6 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
866 6 : });
867 :
868 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
869 0 : register_histogram_vec!(
870 0 : "pageserver_eviction_iteration_duration_seconds_global",
871 0 : "Time spent on a single eviction iteration",
872 0 : &["period_secs", "threshold_secs"],
873 0 : STORAGE_OP_BUCKETS.into(),
874 0 : )
875 0 : .expect("failed to define a metric")
876 0 : });
877 :
878 172 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
879 172 : register_int_counter_vec!(
880 172 : "pageserver_evictions",
881 172 : "Number of layers evicted from the pageserver",
882 172 : &["tenant_id", "shard_id", "timeline_id"]
883 172 : )
884 172 : .expect("failed to define a metric")
885 172 : });
886 :
887 172 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
888 172 : register_int_counter_vec!(
889 172 : "pageserver_evictions_with_low_residence_duration",
890 172 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
891 172 : Residence duration is determined using the `residence_duration_data_source`.",
892 172 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
893 172 : )
894 172 : .expect("failed to define a metric")
895 172 : });
896 :
897 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
898 0 : register_int_counter!(
899 0 : "pageserver_unexpected_ondemand_downloads_count",
900 0 : "Number of unexpected on-demand downloads. \
901 0 : We log more context for each increment, so, forgo any labels in this metric.",
902 0 : )
903 0 : .expect("failed to define a metric")
904 0 : });
905 :
906 : /// How long did we take to start up? Broken down by labels to describe
907 : /// different phases of startup.
908 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
909 0 : register_gauge_vec!(
910 0 : "pageserver_startup_duration_seconds",
911 0 : "Time taken by phases of pageserver startup, in seconds",
912 0 : &["phase"]
913 0 : )
914 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
915 0 : });
916 :
917 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
918 0 : register_uint_gauge!(
919 0 : "pageserver_startup_is_loading",
920 0 : "1 while in initial startup load of tenants, 0 at other times"
921 0 : )
922 0 : .expect("Failed to register pageserver_startup_is_loading")
923 0 : });
924 :
925 168 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
926 168 : register_uint_gauge!(
927 168 : "pageserver_timeline_ephemeral_bytes",
928 168 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
929 168 : )
930 168 : .expect("Failed to register metric")
931 168 : });
932 :
933 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
934 : /// like how long it took to load.
935 : ///
936 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
937 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
938 : /// at a timeline level than tenant level.
939 : pub(crate) struct TenantMetrics {
940 : /// How long did tenants take to go from construction to active state?
941 : pub(crate) activation: Histogram,
942 : pub(crate) preload: Histogram,
943 : pub(crate) attach: Histogram,
944 :
945 : /// How many tenants are included in the initial startup of the pagesrever?
946 : pub(crate) startup_scheduled: IntCounter,
947 : pub(crate) startup_complete: IntCounter,
948 : }
949 :
950 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
951 0 : TenantMetrics {
952 0 : activation: register_histogram!(
953 0 : "pageserver_tenant_activation_seconds",
954 0 : "Time taken by tenants to activate, in seconds",
955 0 : CRITICAL_OP_BUCKETS.into()
956 0 : )
957 0 : .expect("Failed to register metric"),
958 0 : preload: register_histogram!(
959 0 : "pageserver_tenant_preload_seconds",
960 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
961 0 : CRITICAL_OP_BUCKETS.into()
962 0 : )
963 0 : .expect("Failed to register metric"),
964 0 : attach: register_histogram!(
965 0 : "pageserver_tenant_attach_seconds",
966 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
967 0 : CRITICAL_OP_BUCKETS.into()
968 0 : )
969 0 : .expect("Failed to register metric"),
970 0 : startup_scheduled: register_int_counter!(
971 0 : "pageserver_tenant_startup_scheduled",
972 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
973 0 : ).expect("Failed to register metric"),
974 0 : startup_complete: register_int_counter!(
975 0 : "pageserver_tenant_startup_complete",
976 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
977 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
978 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
979 0 : ).expect("Failed to register metric"),
980 0 : }
981 0 : });
982 :
983 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
984 : #[derive(Debug)]
985 : pub(crate) struct EvictionsWithLowResidenceDuration {
986 : data_source: &'static str,
987 : threshold: Duration,
988 : counter: Option<IntCounter>,
989 : }
990 :
991 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
992 : data_source: &'static str,
993 : threshold: Duration,
994 : }
995 :
996 : impl EvictionsWithLowResidenceDurationBuilder {
997 418 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
998 418 : Self {
999 418 : data_source,
1000 418 : threshold,
1001 418 : }
1002 418 : }
1003 :
1004 418 : fn build(
1005 418 : &self,
1006 418 : tenant_id: &str,
1007 418 : shard_id: &str,
1008 418 : timeline_id: &str,
1009 418 : ) -> EvictionsWithLowResidenceDuration {
1010 418 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
1011 418 : .get_metric_with_label_values(&[
1012 418 : tenant_id,
1013 418 : shard_id,
1014 418 : timeline_id,
1015 418 : self.data_source,
1016 418 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
1017 418 : ])
1018 418 : .unwrap();
1019 418 : EvictionsWithLowResidenceDuration {
1020 418 : data_source: self.data_source,
1021 418 : threshold: self.threshold,
1022 418 : counter: Some(counter),
1023 418 : }
1024 418 : }
1025 : }
1026 :
1027 : impl EvictionsWithLowResidenceDuration {
1028 428 : fn threshold_label_value(threshold: Duration) -> String {
1029 428 : format!("{}", threshold.as_secs())
1030 428 : }
1031 :
1032 4 : pub fn observe(&self, observed_value: Duration) {
1033 4 : if observed_value < self.threshold {
1034 4 : self.counter
1035 4 : .as_ref()
1036 4 : .expect("nobody calls this function after `remove_from_vec`")
1037 4 : .inc();
1038 4 : }
1039 4 : }
1040 :
1041 0 : pub fn change_threshold(
1042 0 : &mut self,
1043 0 : tenant_id: &str,
1044 0 : shard_id: &str,
1045 0 : timeline_id: &str,
1046 0 : new_threshold: Duration,
1047 0 : ) {
1048 0 : if new_threshold == self.threshold {
1049 0 : return;
1050 0 : }
1051 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1052 0 : self.data_source,
1053 0 : new_threshold,
1054 0 : )
1055 0 : .build(tenant_id, shard_id, timeline_id);
1056 0 : std::mem::swap(self, &mut with_new);
1057 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1058 0 : }
1059 :
1060 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1061 10 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1062 10 : let Some(_counter) = self.counter.take() else {
1063 0 : return;
1064 : };
1065 :
1066 10 : let threshold = Self::threshold_label_value(self.threshold);
1067 10 :
1068 10 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1069 10 : tenant_id,
1070 10 : shard_id,
1071 10 : timeline_id,
1072 10 : self.data_source,
1073 10 : &threshold,
1074 10 : ]);
1075 10 :
1076 10 : match removed {
1077 0 : Err(e) => {
1078 0 : // this has been hit in staging as
1079 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1080 0 : // because we can be in the drop path already, don't risk:
1081 0 : // - "double-panic => illegal instruction" or
1082 0 : // - future "drop panick => abort"
1083 0 : //
1084 0 : // so just nag: (the error has the labels)
1085 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
1086 : }
1087 : Ok(()) => {
1088 : // to help identify cases where we double-remove the same values, let's log all
1089 : // deletions?
1090 10 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
1091 : }
1092 : }
1093 10 : }
1094 : }
1095 :
1096 : // Metrics collected on disk IO operations
1097 : //
1098 : // Roughly logarithmic scale.
1099 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1100 : 0.000030, // 30 usec
1101 : 0.001000, // 1000 usec
1102 : 0.030, // 30 ms
1103 : 1.000, // 1000 ms
1104 : 30.000, // 30000 ms
1105 : ];
1106 :
1107 : /// VirtualFile fs operation variants.
1108 : ///
1109 : /// Operations:
1110 : /// - open ([`std::fs::OpenOptions::open`])
1111 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1112 : /// - close-by-replace (close by replacement algorithm)
1113 : /// - read (`read_at`)
1114 : /// - write (`write_at`)
1115 : /// - seek (modify internal position or file length query)
1116 : /// - fsync ([`std::fs::File::sync_all`])
1117 : /// - metadata ([`std::fs::File::metadata`])
1118 : #[derive(
1119 1818 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1120 : )]
1121 : pub(crate) enum StorageIoOperation {
1122 : Open,
1123 : OpenAfterReplace,
1124 : Close,
1125 : CloseByReplace,
1126 : Read,
1127 : Write,
1128 : Seek,
1129 : Fsync,
1130 : Metadata,
1131 : }
1132 :
1133 : impl StorageIoOperation {
1134 1818 : pub fn as_str(&self) -> &'static str {
1135 1818 : match self {
1136 202 : StorageIoOperation::Open => "open",
1137 202 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1138 202 : StorageIoOperation::Close => "close",
1139 202 : StorageIoOperation::CloseByReplace => "close-by-replace",
1140 202 : StorageIoOperation::Read => "read",
1141 202 : StorageIoOperation::Write => "write",
1142 202 : StorageIoOperation::Seek => "seek",
1143 202 : StorageIoOperation::Fsync => "fsync",
1144 202 : StorageIoOperation::Metadata => "metadata",
1145 : }
1146 1818 : }
1147 : }
1148 :
1149 : /// Tracks time taken by fs operations near VirtualFile.
1150 : #[derive(Debug)]
1151 : pub(crate) struct StorageIoTime {
1152 : metrics: [Histogram; StorageIoOperation::COUNT],
1153 : }
1154 :
1155 : impl StorageIoTime {
1156 202 : fn new() -> Self {
1157 202 : let storage_io_histogram_vec = register_histogram_vec!(
1158 202 : "pageserver_io_operations_seconds",
1159 202 : "Time spent in IO operations",
1160 202 : &["operation"],
1161 202 : STORAGE_IO_TIME_BUCKETS.into()
1162 202 : )
1163 202 : .expect("failed to define a metric");
1164 1818 : let metrics = std::array::from_fn(|i| {
1165 1818 : let op = StorageIoOperation::from_repr(i).unwrap();
1166 1818 : storage_io_histogram_vec
1167 1818 : .get_metric_with_label_values(&[op.as_str()])
1168 1818 : .unwrap()
1169 1818 : });
1170 202 : Self { metrics }
1171 202 : }
1172 :
1173 2002612 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1174 2002612 : &self.metrics[op as usize]
1175 2002612 : }
1176 : }
1177 :
1178 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1179 :
1180 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
1181 :
1182 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1183 198 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1184 198 : register_int_gauge_vec!(
1185 198 : "pageserver_io_operations_bytes_total",
1186 198 : "Total amount of bytes read/written in IO operations",
1187 198 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1188 198 : )
1189 198 : .expect("failed to define a metric")
1190 198 : });
1191 :
1192 : #[cfg(not(test))]
1193 : pub(crate) mod virtual_file_descriptor_cache {
1194 : use super::*;
1195 :
1196 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1197 0 : register_uint_gauge!(
1198 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1199 0 : "Maximum number of open file descriptors in the cache."
1200 0 : )
1201 0 : .unwrap()
1202 0 : });
1203 :
1204 : // SIZE_CURRENT: derive it like so:
1205 : // ```
1206 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1207 : // -ignoring(operation)
1208 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1209 : // ```
1210 : }
1211 :
1212 : #[cfg(not(test))]
1213 : pub(crate) mod virtual_file_io_engine {
1214 : use super::*;
1215 :
1216 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1217 0 : register_uint_gauge_vec!(
1218 0 : "pageserver_virtual_file_io_engine_kind",
1219 0 : "The configured io engine for VirtualFile",
1220 0 : &["kind"],
1221 0 : )
1222 0 : .unwrap()
1223 0 : });
1224 : }
1225 :
1226 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1227 : pub(crate) struct SmgrOpTimerInner {
1228 : global_latency_histo: Histogram,
1229 :
1230 : // Optional because not all op types are tracked per-timeline
1231 : per_timeline_latency_histo: Option<Histogram>,
1232 :
1233 : global_flush_in_progress_micros: IntCounter,
1234 : per_timeline_flush_in_progress_micros: IntCounter,
1235 :
1236 : start: Instant,
1237 : throttled: Duration,
1238 : op: SmgrQueryType,
1239 : }
1240 :
1241 : pub(crate) struct SmgrOpFlushInProgress {
1242 : base: Instant,
1243 : global_micros: IntCounter,
1244 : per_timeline_micros: IntCounter,
1245 : }
1246 :
1247 : impl SmgrOpTimer {
1248 0 : pub(crate) fn deduct_throttle(&mut self, throttle: &Option<Duration>) {
1249 0 : let Some(throttle) = throttle else {
1250 0 : return;
1251 : };
1252 0 : let inner = self.0.as_mut().expect("other public methods consume self");
1253 0 : inner.throttled += *throttle;
1254 0 : }
1255 :
1256 0 : pub(crate) fn observe_smgr_op_completion_and_start_flushing(mut self) -> SmgrOpFlushInProgress {
1257 0 : let (flush_start, inner) = self
1258 0 : .smgr_op_end()
1259 0 : .expect("this method consume self, and the only other caller is drop handler");
1260 0 : let SmgrOpTimerInner {
1261 0 : global_flush_in_progress_micros,
1262 0 : per_timeline_flush_in_progress_micros,
1263 0 : ..
1264 0 : } = inner;
1265 0 : SmgrOpFlushInProgress {
1266 0 : base: flush_start,
1267 0 : global_micros: global_flush_in_progress_micros,
1268 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1269 0 : }
1270 0 : }
1271 :
1272 : /// Returns `None`` if this method has already been called, `Some` otherwise.
1273 10 : fn smgr_op_end(&mut self) -> Option<(Instant, SmgrOpTimerInner)> {
1274 10 : let inner = self.0.take()?;
1275 :
1276 10 : let now = Instant::now();
1277 10 : let elapsed = now - inner.start;
1278 :
1279 10 : let elapsed = match elapsed.checked_sub(inner.throttled) {
1280 10 : Some(elapsed) => elapsed,
1281 : None => {
1282 : use utils::rate_limit::RateLimit;
1283 : static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
1284 0 : Lazy::new(|| {
1285 0 : Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
1286 0 : RateLimit::new(Duration::from_secs(10))
1287 0 : })))
1288 0 : });
1289 0 : let mut guard = LOGGED.lock().unwrap();
1290 0 : let rate_limit = &mut guard[inner.op];
1291 0 : rate_limit.call(|| {
1292 0 : warn!(op=?inner.op, ?elapsed, ?inner.throttled, "implementation error: time spent throttled exceeds total request wall clock time");
1293 0 : });
1294 0 : elapsed // un-throttled time, more info than just saturating to 0
1295 : }
1296 : };
1297 :
1298 10 : let elapsed = elapsed.as_secs_f64();
1299 10 :
1300 10 : inner.global_latency_histo.observe(elapsed);
1301 10 : if let Some(per_timeline_getpage_histo) = &inner.per_timeline_latency_histo {
1302 2 : per_timeline_getpage_histo.observe(elapsed);
1303 8 : }
1304 :
1305 10 : Some((now, inner))
1306 10 : }
1307 : }
1308 :
1309 : impl Drop for SmgrOpTimer {
1310 10 : fn drop(&mut self) {
1311 10 : self.smgr_op_end();
1312 10 : }
1313 : }
1314 :
1315 : impl SmgrOpFlushInProgress {
1316 0 : pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
1317 0 : where
1318 0 : Fut: std::future::Future<Output = O>,
1319 0 : {
1320 0 : let mut fut = std::pin::pin!(fut);
1321 0 :
1322 0 : let now = Instant::now();
1323 0 : // Whenever observe_guard gets called, or dropped,
1324 0 : // it adds the time elapsed since its last call to metrics.
1325 0 : // Last call is tracked in `now`.
1326 0 : let mut observe_guard = scopeguard::guard(
1327 0 : || {
1328 0 : let elapsed = now - self.base;
1329 0 : self.global_micros
1330 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1331 0 : self.per_timeline_micros
1332 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1333 0 : self.base = now;
1334 0 : },
1335 0 : |mut observe| {
1336 0 : observe();
1337 0 : },
1338 0 : );
1339 :
1340 : loop {
1341 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1342 0 : Ok(v) => return v,
1343 0 : Err(_timeout) => {
1344 0 : (*observe_guard)();
1345 0 : }
1346 : }
1347 : }
1348 0 : }
1349 : }
1350 :
1351 : #[derive(
1352 : Debug,
1353 : Clone,
1354 : Copy,
1355 5166 : IntoStaticStr,
1356 : strum_macros::EnumCount,
1357 24 : strum_macros::EnumIter,
1358 4280 : strum_macros::FromRepr,
1359 : enum_map::Enum,
1360 : )]
1361 : #[strum(serialize_all = "snake_case")]
1362 : pub enum SmgrQueryType {
1363 : GetRelExists,
1364 : GetRelSize,
1365 : GetPageAtLsn,
1366 : GetDbSize,
1367 : GetSlruSegment,
1368 : }
1369 :
1370 : #[derive(Debug)]
1371 : pub(crate) struct SmgrQueryTimePerTimeline {
1372 : global_started: [IntCounter; SmgrQueryType::COUNT],
1373 : global_latency: [Histogram; SmgrQueryType::COUNT],
1374 : per_timeline_getpage_started: IntCounter,
1375 : per_timeline_getpage_latency: Histogram,
1376 : global_batch_size: Histogram,
1377 : per_timeline_batch_size: Histogram,
1378 : global_flush_in_progress_micros: IntCounter,
1379 : per_timeline_flush_in_progress_micros: IntCounter,
1380 : }
1381 :
1382 174 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1383 174 : register_int_counter_vec!(
1384 174 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1385 174 : "pageserver_smgr_query_started_global_count",
1386 174 : "Number of smgr queries started, aggregated by query type.",
1387 174 : &["smgr_query_type"],
1388 174 : )
1389 174 : .expect("failed to define a metric")
1390 174 : });
1391 :
1392 174 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1393 174 : register_int_counter_vec!(
1394 174 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1395 174 : "pageserver_smgr_query_started_count",
1396 174 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1397 174 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1398 174 : )
1399 174 : .expect("failed to define a metric")
1400 174 : });
1401 :
1402 174 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1403 174 : register_histogram_vec!(
1404 174 : "pageserver_smgr_query_seconds",
1405 174 : "Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
1406 174 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1407 174 : CRITICAL_OP_BUCKETS.into(),
1408 174 : )
1409 174 : .expect("failed to define a metric")
1410 174 : });
1411 :
1412 174 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1413 174 : [
1414 174 : 1,
1415 174 : 10,
1416 174 : 20,
1417 174 : 40,
1418 174 : 60,
1419 174 : 80,
1420 174 : 100,
1421 174 : 200,
1422 174 : 300,
1423 174 : 400,
1424 174 : 500,
1425 174 : 600,
1426 174 : 700,
1427 174 : 800,
1428 174 : 900,
1429 174 : 1_000, // 1ms
1430 174 : 2_000,
1431 174 : 4_000,
1432 174 : 6_000,
1433 174 : 8_000,
1434 174 : 10_000, // 10ms
1435 174 : 20_000,
1436 174 : 40_000,
1437 174 : 60_000,
1438 174 : 80_000,
1439 174 : 100_000,
1440 174 : 200_000,
1441 174 : 400_000,
1442 174 : 600_000,
1443 174 : 800_000,
1444 174 : 1_000_000, // 1s
1445 174 : 2_000_000,
1446 174 : 4_000_000,
1447 174 : 6_000_000,
1448 174 : 8_000_000,
1449 174 : 10_000_000, // 10s
1450 174 : 20_000_000,
1451 174 : 50_000_000,
1452 174 : 100_000_000,
1453 174 : 200_000_000,
1454 174 : 1_000_000_000, // 1000s
1455 174 : ]
1456 174 : .into_iter()
1457 174 : .map(Duration::from_micros)
1458 7134 : .map(|d| d.as_secs_f64())
1459 174 : .collect()
1460 174 : });
1461 :
1462 174 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1463 174 : register_histogram_vec!(
1464 174 : "pageserver_smgr_query_seconds_global",
1465 174 : "Time spent on smgr query handling, aggregated by query type.",
1466 174 : &["smgr_query_type"],
1467 174 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1468 174 : )
1469 174 : .expect("failed to define a metric")
1470 174 : });
1471 :
1472 174 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1473 174 : (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
1474 5568 : .map(|v| v.into())
1475 174 : .collect()
1476 174 : });
1477 :
1478 174 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1479 174 : register_histogram!(
1480 174 : "pageserver_page_service_batch_size_global",
1481 174 : "Batch size of pageserver page service requests",
1482 174 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1483 174 : )
1484 174 : .expect("failed to define a metric")
1485 174 : });
1486 :
1487 174 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1488 174 : let mut buckets = Vec::new();
1489 1218 : for i in 0.. {
1490 1218 : let bucket = 1 << i;
1491 1218 : if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
1492 174 : break;
1493 1044 : }
1494 1044 : buckets.push(bucket.into());
1495 : }
1496 174 : buckets
1497 174 : });
1498 :
1499 174 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1500 174 : register_histogram_vec!(
1501 174 : "pageserver_page_service_batch_size",
1502 174 : "Batch size of pageserver page service requests",
1503 174 : &["tenant_id", "shard_id", "timeline_id"],
1504 174 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1505 174 : )
1506 174 : .expect("failed to define a metric")
1507 174 : });
1508 :
1509 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1510 0 : register_int_gauge_vec!(
1511 0 : "pageserver_page_service_config_max_batch_size",
1512 0 : "Configured maximum batch size for the server-side batching functionality of page_service. \
1513 0 : Labels expose more of the configuration parameters.",
1514 0 : &["mode", "execution"]
1515 0 : )
1516 0 : .expect("failed to define a metric")
1517 0 : });
1518 :
1519 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
1520 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
1521 0 : let (label_values, value) = match conf {
1522 0 : PageServicePipeliningConfig::Serial => (["serial", "-"], 1),
1523 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
1524 0 : max_batch_size,
1525 0 : execution,
1526 0 : }) => {
1527 0 : let mode = "pipelined";
1528 0 : let execution = match execution {
1529 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
1530 0 : "concurrent-futures"
1531 : }
1532 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
1533 : };
1534 0 : ([mode, execution], max_batch_size.get())
1535 : }
1536 : };
1537 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
1538 0 : .with_label_values(&label_values)
1539 0 : .set(value.try_into().unwrap());
1540 0 : }
1541 :
1542 174 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
1543 174 : register_int_counter_vec!(
1544 174 : "pageserver_page_service_pagestream_flush_in_progress_micros",
1545 174 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
1546 174 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
1547 174 : easily discoverable in monitoring. \
1548 174 : Hence, this is NOT a completion latency historgram.",
1549 174 : &["tenant_id", "shard_id", "timeline_id"],
1550 174 : )
1551 174 : .expect("failed to define a metric")
1552 174 : });
1553 :
1554 174 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
1555 174 : register_int_counter!(
1556 174 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
1557 174 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
1558 174 : )
1559 174 : .expect("failed to define a metric")
1560 174 : });
1561 :
1562 : impl SmgrQueryTimePerTimeline {
1563 428 : pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
1564 428 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1565 428 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1566 428 : let timeline_id = timeline_id.to_string();
1567 2140 : let global_started = std::array::from_fn(|i| {
1568 2140 : let op = SmgrQueryType::from_repr(i).unwrap();
1569 2140 : SMGR_QUERY_STARTED_GLOBAL
1570 2140 : .get_metric_with_label_values(&[op.into()])
1571 2140 : .unwrap()
1572 2140 : });
1573 2140 : let global_latency = std::array::from_fn(|i| {
1574 2140 : let op = SmgrQueryType::from_repr(i).unwrap();
1575 2140 : SMGR_QUERY_TIME_GLOBAL
1576 2140 : .get_metric_with_label_values(&[op.into()])
1577 2140 : .unwrap()
1578 2140 : });
1579 428 :
1580 428 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
1581 428 : .get_metric_with_label_values(&[
1582 428 : SmgrQueryType::GetPageAtLsn.into(),
1583 428 : &tenant_id,
1584 428 : &shard_slug,
1585 428 : &timeline_id,
1586 428 : ])
1587 428 : .unwrap();
1588 428 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1589 428 : .get_metric_with_label_values(&[
1590 428 : SmgrQueryType::GetPageAtLsn.into(),
1591 428 : &tenant_id,
1592 428 : &shard_slug,
1593 428 : &timeline_id,
1594 428 : ])
1595 428 : .unwrap();
1596 428 :
1597 428 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
1598 428 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
1599 428 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1600 428 : .unwrap();
1601 428 :
1602 428 : let global_flush_in_progress_micros =
1603 428 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
1604 428 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
1605 428 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1606 428 : .unwrap();
1607 428 :
1608 428 : Self {
1609 428 : global_started,
1610 428 : global_latency,
1611 428 : per_timeline_getpage_latency,
1612 428 : per_timeline_getpage_started,
1613 428 : global_batch_size,
1614 428 : per_timeline_batch_size,
1615 428 : global_flush_in_progress_micros,
1616 428 : per_timeline_flush_in_progress_micros,
1617 428 : }
1618 428 : }
1619 10 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, started_at: Instant) -> SmgrOpTimer {
1620 10 : self.global_started[op as usize].inc();
1621 :
1622 10 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1623 2 : self.per_timeline_getpage_started.inc();
1624 2 : Some(self.per_timeline_getpage_latency.clone())
1625 : } else {
1626 8 : None
1627 : };
1628 :
1629 10 : SmgrOpTimer(Some(SmgrOpTimerInner {
1630 10 : global_latency_histo: self.global_latency[op as usize].clone(),
1631 10 : per_timeline_latency_histo,
1632 10 : start: started_at,
1633 10 : op,
1634 10 : throttled: Duration::ZERO,
1635 10 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
1636 10 : per_timeline_flush_in_progress_micros: self
1637 10 : .per_timeline_flush_in_progress_micros
1638 10 : .clone(),
1639 10 : }))
1640 10 : }
1641 :
1642 0 : pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
1643 0 : self.global_batch_size.observe(batch_size as f64);
1644 0 : self.per_timeline_batch_size.observe(batch_size as f64);
1645 0 : }
1646 : }
1647 :
1648 : #[cfg(test)]
1649 : mod smgr_query_time_tests {
1650 : use std::time::Instant;
1651 :
1652 : use pageserver_api::shard::TenantShardId;
1653 : use strum::IntoEnumIterator;
1654 : use utils::id::{TenantId, TimelineId};
1655 :
1656 : // Regression test, we used hard-coded string constants before using an enum.
1657 : #[test]
1658 2 : fn op_label_name() {
1659 : use super::SmgrQueryType::*;
1660 2 : let expect: [(super::SmgrQueryType, &'static str); 5] = [
1661 2 : (GetRelExists, "get_rel_exists"),
1662 2 : (GetRelSize, "get_rel_size"),
1663 2 : (GetPageAtLsn, "get_page_at_lsn"),
1664 2 : (GetDbSize, "get_db_size"),
1665 2 : (GetSlruSegment, "get_slru_segment"),
1666 2 : ];
1667 12 : for (op, expect) in expect {
1668 10 : let actual: &'static str = op.into();
1669 10 : assert_eq!(actual, expect);
1670 : }
1671 2 : }
1672 :
1673 : #[test]
1674 2 : fn basic() {
1675 2 : let ops: Vec<_> = super::SmgrQueryType::iter().collect();
1676 :
1677 12 : for op in &ops {
1678 10 : let tenant_id = TenantId::generate();
1679 10 : let timeline_id = TimelineId::generate();
1680 10 : let metrics = super::SmgrQueryTimePerTimeline::new(
1681 10 : &TenantShardId::unsharded(tenant_id),
1682 10 : &timeline_id,
1683 10 : );
1684 10 :
1685 20 : let get_counts = || {
1686 20 : let global: u64 = ops
1687 20 : .iter()
1688 100 : .map(|op| metrics.global_latency[*op as usize].get_sample_count())
1689 20 : .sum();
1690 20 : (
1691 20 : global,
1692 20 : metrics.per_timeline_getpage_latency.get_sample_count(),
1693 20 : )
1694 20 : };
1695 :
1696 10 : let (pre_global, pre_per_tenant_timeline) = get_counts();
1697 10 : assert_eq!(pre_per_tenant_timeline, 0);
1698 :
1699 10 : let timer = metrics.start_smgr_op(*op, Instant::now());
1700 10 : drop(timer);
1701 10 :
1702 10 : let (post_global, post_per_tenant_timeline) = get_counts();
1703 10 : if matches!(op, super::SmgrQueryType::GetPageAtLsn) {
1704 : // getpage ops are tracked per-timeline, others aren't
1705 2 : assert_eq!(post_per_tenant_timeline, 1);
1706 : } else {
1707 8 : assert_eq!(post_per_tenant_timeline, 0);
1708 : }
1709 10 : assert!(post_global > pre_global);
1710 : }
1711 2 : }
1712 : }
1713 :
1714 : // keep in sync with control plane Go code so that we can validate
1715 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1716 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1717 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1718 0 : [
1719 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1720 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1721 0 : ]
1722 0 : .map(|ms| (ms as f64) / 1000.0)
1723 0 : });
1724 :
1725 : pub(crate) struct BasebackupQueryTime {
1726 : ok: Histogram,
1727 : error: Histogram,
1728 : client_error: Histogram,
1729 : }
1730 :
1731 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1732 0 : let vec = register_histogram_vec!(
1733 0 : "pageserver_basebackup_query_seconds",
1734 0 : "Histogram of basebackup queries durations, by result type",
1735 0 : &["result"],
1736 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1737 0 : )
1738 0 : .expect("failed to define a metric");
1739 0 : BasebackupQueryTime {
1740 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1741 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1742 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
1743 0 : }
1744 0 : });
1745 :
1746 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
1747 : parent: &'a BasebackupQueryTime,
1748 : start: std::time::Instant,
1749 : }
1750 :
1751 : impl BasebackupQueryTime {
1752 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
1753 0 : let start = Instant::now();
1754 0 : BasebackupQueryTimeOngoingRecording {
1755 0 : parent: self,
1756 0 : start,
1757 0 : }
1758 0 : }
1759 : }
1760 :
1761 : impl BasebackupQueryTimeOngoingRecording<'_> {
1762 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
1763 0 : let elapsed = self.start.elapsed().as_secs_f64();
1764 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
1765 0 : let metric = match res {
1766 0 : Ok(_) => &self.parent.ok,
1767 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
1768 0 : if is_expected_io_error(io_error) =>
1769 0 : {
1770 0 : &self.parent.client_error
1771 : }
1772 0 : Err(_) => &self.parent.error,
1773 : };
1774 0 : metric.observe(elapsed);
1775 0 : }
1776 : }
1777 :
1778 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1779 0 : register_int_counter_pair_vec!(
1780 0 : "pageserver_live_connections_started",
1781 0 : "Number of network connections that we started handling",
1782 0 : "pageserver_live_connections_finished",
1783 0 : "Number of network connections that we finished handling",
1784 0 : &["pageserver_connection_kind"]
1785 0 : )
1786 0 : .expect("failed to define a metric")
1787 0 : });
1788 :
1789 0 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1790 : pub(crate) enum ComputeCommandKind {
1791 : PageStreamV2,
1792 : Basebackup,
1793 : Fullbackup,
1794 : LeaseLsn,
1795 : }
1796 :
1797 : pub(crate) struct ComputeCommandCounters {
1798 : map: EnumMap<ComputeCommandKind, IntCounter>,
1799 : }
1800 :
1801 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
1802 0 : let inner = register_int_counter_vec!(
1803 0 : "pageserver_compute_commands",
1804 0 : "Number of compute -> pageserver commands processed",
1805 0 : &["command"]
1806 0 : )
1807 0 : .expect("failed to define a metric");
1808 0 :
1809 0 : ComputeCommandCounters {
1810 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
1811 0 : let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
1812 0 : let command_str: &'static str = command.into();
1813 0 : inner.with_label_values(&[command_str])
1814 0 : })),
1815 0 : }
1816 0 : });
1817 :
1818 : impl ComputeCommandCounters {
1819 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
1820 0 : &self.map[command]
1821 0 : }
1822 : }
1823 :
1824 : // remote storage metrics
1825 :
1826 168 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1827 168 : register_int_counter_pair_vec!(
1828 168 : "pageserver_remote_timeline_client_calls_started",
1829 168 : "Number of started calls to remote timeline client.",
1830 168 : "pageserver_remote_timeline_client_calls_finished",
1831 168 : "Number of finshed calls to remote timeline client.",
1832 168 : &[
1833 168 : "tenant_id",
1834 168 : "shard_id",
1835 168 : "timeline_id",
1836 168 : "file_kind",
1837 168 : "op_kind"
1838 168 : ],
1839 168 : )
1840 168 : .unwrap()
1841 168 : });
1842 :
1843 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1844 166 : Lazy::new(|| {
1845 166 : register_int_counter_vec!(
1846 166 : "pageserver_remote_timeline_client_bytes_started",
1847 166 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1848 166 : The increment happens when the operation is scheduled.",
1849 166 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1850 166 : )
1851 166 : .expect("failed to define a metric")
1852 166 : });
1853 :
1854 166 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1855 166 : register_int_counter_vec!(
1856 166 : "pageserver_remote_timeline_client_bytes_finished",
1857 166 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1858 166 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1859 166 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1860 166 : )
1861 166 : .expect("failed to define a metric")
1862 166 : });
1863 :
1864 : pub(crate) struct TenantManagerMetrics {
1865 : tenant_slots_attached: UIntGauge,
1866 : tenant_slots_secondary: UIntGauge,
1867 : tenant_slots_inprogress: UIntGauge,
1868 : pub(crate) tenant_slot_writes: IntCounter,
1869 : pub(crate) unexpected_errors: IntCounter,
1870 : }
1871 :
1872 : impl TenantManagerMetrics {
1873 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
1874 : /// exactly: they track the lifetime of the slots _in the tenant map_.
1875 2 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
1876 2 : match slot {
1877 0 : TenantSlot::Attached(_) => {
1878 0 : self.tenant_slots_attached.inc();
1879 0 : }
1880 0 : TenantSlot::Secondary(_) => {
1881 0 : self.tenant_slots_secondary.inc();
1882 0 : }
1883 2 : TenantSlot::InProgress(_) => {
1884 2 : self.tenant_slots_inprogress.inc();
1885 2 : }
1886 : }
1887 2 : }
1888 :
1889 2 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
1890 2 : match slot {
1891 2 : TenantSlot::Attached(_) => {
1892 2 : self.tenant_slots_attached.dec();
1893 2 : }
1894 0 : TenantSlot::Secondary(_) => {
1895 0 : self.tenant_slots_secondary.dec();
1896 0 : }
1897 0 : TenantSlot::InProgress(_) => {
1898 0 : self.tenant_slots_inprogress.dec();
1899 0 : }
1900 : }
1901 2 : }
1902 :
1903 : #[cfg(all(debug_assertions, not(test)))]
1904 0 : pub(crate) fn slots_total(&self) -> u64 {
1905 0 : self.tenant_slots_attached.get()
1906 0 : + self.tenant_slots_secondary.get()
1907 0 : + self.tenant_slots_inprogress.get()
1908 0 : }
1909 : }
1910 :
1911 2 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1912 2 : let tenant_slots = register_uint_gauge_vec!(
1913 2 : "pageserver_tenant_manager_slots",
1914 2 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1915 2 : &["mode"]
1916 2 : )
1917 2 : .expect("failed to define a metric");
1918 2 : TenantManagerMetrics {
1919 2 : tenant_slots_attached: tenant_slots
1920 2 : .get_metric_with_label_values(&["attached"])
1921 2 : .unwrap(),
1922 2 : tenant_slots_secondary: tenant_slots
1923 2 : .get_metric_with_label_values(&["secondary"])
1924 2 : .unwrap(),
1925 2 : tenant_slots_inprogress: tenant_slots
1926 2 : .get_metric_with_label_values(&["inprogress"])
1927 2 : .unwrap(),
1928 2 : tenant_slot_writes: register_int_counter!(
1929 2 : "pageserver_tenant_manager_slot_writes",
1930 2 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1931 2 : )
1932 2 : .expect("failed to define a metric"),
1933 2 : unexpected_errors: register_int_counter!(
1934 2 : "pageserver_tenant_manager_unexpected_errors_total",
1935 2 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1936 2 : )
1937 2 : .expect("failed to define a metric"),
1938 2 : }
1939 2 : });
1940 :
1941 : pub(crate) struct DeletionQueueMetrics {
1942 : pub(crate) keys_submitted: IntCounter,
1943 : pub(crate) keys_dropped: IntCounter,
1944 : pub(crate) keys_executed: IntCounter,
1945 : pub(crate) keys_validated: IntCounter,
1946 : pub(crate) dropped_lsn_updates: IntCounter,
1947 : pub(crate) unexpected_errors: IntCounter,
1948 : pub(crate) remote_errors: IntCounterVec,
1949 : }
1950 32 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1951 32 : DeletionQueueMetrics{
1952 32 :
1953 32 : keys_submitted: register_int_counter!(
1954 32 : "pageserver_deletion_queue_submitted_total",
1955 32 : "Number of objects submitted for deletion"
1956 32 : )
1957 32 : .expect("failed to define a metric"),
1958 32 :
1959 32 : keys_dropped: register_int_counter!(
1960 32 : "pageserver_deletion_queue_dropped_total",
1961 32 : "Number of object deletions dropped due to stale generation."
1962 32 : )
1963 32 : .expect("failed to define a metric"),
1964 32 :
1965 32 : keys_executed: register_int_counter!(
1966 32 : "pageserver_deletion_queue_executed_total",
1967 32 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1968 32 : )
1969 32 : .expect("failed to define a metric"),
1970 32 :
1971 32 : keys_validated: register_int_counter!(
1972 32 : "pageserver_deletion_queue_validated_total",
1973 32 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
1974 32 : )
1975 32 : .expect("failed to define a metric"),
1976 32 :
1977 32 : dropped_lsn_updates: register_int_counter!(
1978 32 : "pageserver_deletion_queue_dropped_lsn_updates_total",
1979 32 : "Updates to remote_consistent_lsn dropped due to stale generation number."
1980 32 : )
1981 32 : .expect("failed to define a metric"),
1982 32 : unexpected_errors: register_int_counter!(
1983 32 : "pageserver_deletion_queue_unexpected_errors_total",
1984 32 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
1985 32 : )
1986 32 : .expect("failed to define a metric"),
1987 32 : remote_errors: register_int_counter_vec!(
1988 32 : "pageserver_deletion_queue_remote_errors_total",
1989 32 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
1990 32 : &["op_kind"],
1991 32 : )
1992 32 : .expect("failed to define a metric")
1993 32 : }
1994 32 : });
1995 :
1996 : pub(crate) struct SecondaryModeMetrics {
1997 : pub(crate) upload_heatmap: IntCounter,
1998 : pub(crate) upload_heatmap_errors: IntCounter,
1999 : pub(crate) upload_heatmap_duration: Histogram,
2000 : pub(crate) download_heatmap: IntCounter,
2001 : pub(crate) download_layer: IntCounter,
2002 : }
2003 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2004 0 : SecondaryModeMetrics {
2005 0 : upload_heatmap: register_int_counter!(
2006 0 : "pageserver_secondary_upload_heatmap",
2007 0 : "Number of heatmaps written to remote storage by attached tenants"
2008 0 : )
2009 0 : .expect("failed to define a metric"),
2010 0 : upload_heatmap_errors: register_int_counter!(
2011 0 : "pageserver_secondary_upload_heatmap_errors",
2012 0 : "Failures writing heatmap to remote storage"
2013 0 : )
2014 0 : .expect("failed to define a metric"),
2015 0 : upload_heatmap_duration: register_histogram!(
2016 0 : "pageserver_secondary_upload_heatmap_duration",
2017 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2018 0 : )
2019 0 : .expect("failed to define a metric"),
2020 0 : download_heatmap: register_int_counter!(
2021 0 : "pageserver_secondary_download_heatmap",
2022 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2023 0 : )
2024 0 : .expect("failed to define a metric"),
2025 0 : download_layer: register_int_counter!(
2026 0 : "pageserver_secondary_download_layer",
2027 0 : "Number of downloads of layers by secondary mode locations"
2028 0 : )
2029 0 : .expect("failed to define a metric"),
2030 0 : }
2031 0 : });
2032 :
2033 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2034 0 : register_uint_gauge_vec!(
2035 0 : "pageserver_secondary_resident_physical_size",
2036 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2037 0 : &["tenant_id", "shard_id"]
2038 0 : )
2039 0 : .expect("failed to define a metric")
2040 0 : });
2041 :
2042 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2043 0 : register_uint_gauge!(
2044 0 : "pageserver_utilization_score",
2045 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2046 0 : )
2047 0 : .expect("failed to define a metric")
2048 0 : });
2049 :
2050 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2051 0 : register_uint_gauge_vec!(
2052 0 : "pageserver_secondary_heatmap_total_size",
2053 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2054 0 : &["tenant_id", "shard_id"]
2055 0 : )
2056 0 : .expect("failed to define a metric")
2057 0 : });
2058 :
2059 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2060 : pub enum RemoteOpKind {
2061 : Upload,
2062 : Download,
2063 : Delete,
2064 : }
2065 : impl RemoteOpKind {
2066 13660 : pub fn as_str(&self) -> &'static str {
2067 13660 : match self {
2068 12865 : Self::Upload => "upload",
2069 52 : Self::Download => "download",
2070 743 : Self::Delete => "delete",
2071 : }
2072 13660 : }
2073 : }
2074 :
2075 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2076 : pub enum RemoteOpFileKind {
2077 : Layer,
2078 : Index,
2079 : }
2080 : impl RemoteOpFileKind {
2081 13660 : pub fn as_str(&self) -> &'static str {
2082 13660 : match self {
2083 9350 : Self::Layer => "layer",
2084 4310 : Self::Index => "index",
2085 : }
2086 13660 : }
2087 : }
2088 :
2089 166 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
2090 166 : register_histogram_vec!(
2091 166 : "pageserver_remote_operation_seconds",
2092 166 : "Time spent on remote storage operations. \
2093 166 : Grouped by tenant, timeline, operation_kind and status. \
2094 166 : Does not account for time spent waiting in remote timeline client's queues.",
2095 166 : &["file_kind", "op_kind", "status"]
2096 166 : )
2097 166 : .expect("failed to define a metric")
2098 166 : });
2099 :
2100 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2101 0 : register_int_counter_vec!(
2102 0 : "pageserver_tenant_task_events",
2103 0 : "Number of task start/stop/fail events.",
2104 0 : &["event"],
2105 0 : )
2106 0 : .expect("Failed to register tenant_task_events metric")
2107 0 : });
2108 :
2109 : pub struct BackgroundLoopSemaphoreMetrics {
2110 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2111 : durations: EnumMap<BackgroundLoopKind, Counter>,
2112 : }
2113 :
2114 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
2115 20 : || {
2116 20 : let counters = register_int_counter_pair_vec!(
2117 20 : "pageserver_background_loop_semaphore_wait_start_count",
2118 20 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2119 20 : "pageserver_background_loop_semaphore_wait_finish_count",
2120 20 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2121 20 : &["task"],
2122 20 : )
2123 20 : .unwrap();
2124 20 :
2125 20 : let durations = register_counter_vec!(
2126 20 : "pageserver_background_loop_semaphore_wait_duration_seconds",
2127 20 : "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
2128 20 : &["task"],
2129 20 : )
2130 20 : .unwrap();
2131 20 :
2132 20 : BackgroundLoopSemaphoreMetrics {
2133 180 : counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2134 180 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2135 180 : counters.with_label_values(&[kind.into()])
2136 180 : })),
2137 180 : durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2138 180 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2139 180 : durations.with_label_values(&[kind.into()])
2140 180 : })),
2141 20 : }
2142 20 : },
2143 : );
2144 :
2145 : impl BackgroundLoopSemaphoreMetrics {
2146 364 : pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
2147 : struct Record<'a> {
2148 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2149 : task: BackgroundLoopKind,
2150 : _counter_guard: metrics::IntCounterPairGuard,
2151 : start: Instant,
2152 : }
2153 : impl Drop for Record<'_> {
2154 364 : fn drop(&mut self) {
2155 364 : let elapsed = self.start.elapsed().as_secs_f64();
2156 364 : self.metrics.durations[self.task].inc_by(elapsed);
2157 364 : }
2158 : }
2159 364 : Record {
2160 364 : metrics: self,
2161 364 : task,
2162 364 : _counter_guard: self.counters[task].guard(),
2163 364 : start: Instant::now(),
2164 364 : }
2165 364 : }
2166 : }
2167 :
2168 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2169 0 : register_int_counter_vec!(
2170 0 : "pageserver_background_loop_period_overrun_count",
2171 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2172 0 : &["task", "period"],
2173 0 : )
2174 0 : .expect("failed to define a metric")
2175 0 : });
2176 :
2177 : // walreceiver metrics
2178 :
2179 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2180 0 : register_int_counter!(
2181 0 : "pageserver_walreceiver_started_connections_total",
2182 0 : "Number of started walreceiver connections"
2183 0 : )
2184 0 : .expect("failed to define a metric")
2185 0 : });
2186 :
2187 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2188 0 : register_int_gauge!(
2189 0 : "pageserver_walreceiver_active_managers",
2190 0 : "Number of active walreceiver managers"
2191 0 : )
2192 0 : .expect("failed to define a metric")
2193 0 : });
2194 :
2195 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2196 0 : register_int_counter_vec!(
2197 0 : "pageserver_walreceiver_switches_total",
2198 0 : "Number of walreceiver manager change_connection calls",
2199 0 : &["reason"]
2200 0 : )
2201 0 : .expect("failed to define a metric")
2202 0 : });
2203 :
2204 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2205 0 : register_int_counter!(
2206 0 : "pageserver_walreceiver_broker_updates_total",
2207 0 : "Number of received broker updates in walreceiver"
2208 0 : )
2209 0 : .expect("failed to define a metric")
2210 0 : });
2211 :
2212 2 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2213 2 : register_int_counter_vec!(
2214 2 : "pageserver_walreceiver_candidates_events_total",
2215 2 : "Number of walreceiver candidate events",
2216 2 : &["event"]
2217 2 : )
2218 2 : .expect("failed to define a metric")
2219 2 : });
2220 :
2221 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2222 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2223 :
2224 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2225 2 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2226 :
2227 : // Metrics collected on WAL redo operations
2228 : //
2229 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2230 : // for access to the postgres process ('wait') since there is only one for
2231 : // each tenant.
2232 :
2233 : /// Time buckets are small because we want to be able to measure the
2234 : /// smallest redo processing times. These buckets allow us to measure down
2235 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2236 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2237 : ///
2238 : /// Values up to 1s are recorded because metrics show that we have redo
2239 : /// durations and lock times larger than 0.250s.
2240 : macro_rules! redo_histogram_time_buckets {
2241 : () => {
2242 : vec![
2243 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2244 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2245 : 1.000_000,
2246 : ]
2247 : };
2248 : }
2249 :
2250 : /// While we're at it, also measure the amount of records replayed in each
2251 : /// operation. We have a global 'total replayed' counter, but that's not
2252 : /// as useful as 'what is the skew for how many records we replay in one
2253 : /// operation'.
2254 : macro_rules! redo_histogram_count_buckets {
2255 : () => {
2256 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2257 : };
2258 : }
2259 :
2260 : macro_rules! redo_bytes_histogram_count_buckets {
2261 : () => {
2262 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2263 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2264 : vec![
2265 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2266 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2267 : ]
2268 : };
2269 : }
2270 :
2271 : pub(crate) struct WalIngestMetrics {
2272 : pub(crate) bytes_received: IntCounter,
2273 : pub(crate) records_received: IntCounter,
2274 : pub(crate) records_committed: IntCounter,
2275 : pub(crate) records_filtered: IntCounter,
2276 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2277 : pub(crate) clear_vm_bits_unknown: IntCounterVec,
2278 : }
2279 :
2280 10 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
2281 10 : bytes_received: register_int_counter!(
2282 10 : "pageserver_wal_ingest_bytes_received",
2283 10 : "Bytes of WAL ingested from safekeepers",
2284 10 : )
2285 10 : .unwrap(),
2286 10 : records_received: register_int_counter!(
2287 10 : "pageserver_wal_ingest_records_received",
2288 10 : "Number of WAL records received from safekeepers"
2289 10 : )
2290 10 : .expect("failed to define a metric"),
2291 10 : records_committed: register_int_counter!(
2292 10 : "pageserver_wal_ingest_records_committed",
2293 10 : "Number of WAL records which resulted in writes to pageserver storage"
2294 10 : )
2295 10 : .expect("failed to define a metric"),
2296 10 : records_filtered: register_int_counter!(
2297 10 : "pageserver_wal_ingest_records_filtered",
2298 10 : "Number of WAL records filtered out due to sharding"
2299 10 : )
2300 10 : .expect("failed to define a metric"),
2301 10 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2302 10 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2303 10 : "Total number of zero gap blocks written on relation extends"
2304 10 : )
2305 10 : .expect("failed to define a metric"),
2306 10 : clear_vm_bits_unknown: register_int_counter_vec!(
2307 10 : "pageserver_wal_ingest_clear_vm_bits_unknown",
2308 10 : "Number of ignored ClearVmBits operations due to unknown pages/relations",
2309 10 : &["entity"],
2310 10 : )
2311 10 : .expect("failed to define a metric"),
2312 10 : });
2313 :
2314 172 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2315 172 : register_int_counter_vec!(
2316 172 : "pageserver_timeline_wal_records_received",
2317 172 : "Number of WAL records received per shard",
2318 172 : &["tenant_id", "shard_id", "timeline_id"]
2319 172 : )
2320 172 : .expect("failed to define a metric")
2321 172 : });
2322 :
2323 6 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2324 6 : register_histogram!(
2325 6 : "pageserver_wal_redo_seconds",
2326 6 : "Time spent on WAL redo",
2327 6 : redo_histogram_time_buckets!()
2328 6 : )
2329 6 : .expect("failed to define a metric")
2330 6 : });
2331 :
2332 6 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2333 6 : register_histogram!(
2334 6 : "pageserver_wal_redo_records_histogram",
2335 6 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2336 6 : redo_histogram_count_buckets!(),
2337 6 : )
2338 6 : .expect("failed to define a metric")
2339 6 : });
2340 :
2341 6 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2342 6 : register_histogram!(
2343 6 : "pageserver_wal_redo_bytes_histogram",
2344 6 : "Histogram of number of records replayed per redo sent to Postgres",
2345 6 : redo_bytes_histogram_count_buckets!(),
2346 6 : )
2347 6 : .expect("failed to define a metric")
2348 6 : });
2349 :
2350 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2351 6 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2352 6 : register_int_counter!(
2353 6 : "pageserver_replayed_wal_records_total",
2354 6 : "Number of WAL records replayed in WAL redo process"
2355 6 : )
2356 6 : .unwrap()
2357 6 : });
2358 :
2359 : #[rustfmt::skip]
2360 8 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2361 8 : register_histogram!(
2362 8 : "pageserver_wal_redo_process_launch_duration",
2363 8 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2364 8 : vec![
2365 8 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2366 8 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2367 8 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2368 8 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2369 8 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2370 8 : ],
2371 8 : )
2372 8 : .expect("failed to define a metric")
2373 8 : });
2374 :
2375 : pub(crate) struct WalRedoProcessCounters {
2376 : pub(crate) started: IntCounter,
2377 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
2378 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2379 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2380 : }
2381 :
2382 24 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2383 : pub(crate) enum WalRedoKillCause {
2384 : WalRedoProcessDrop,
2385 : NoLeakChildDrop,
2386 : Startup,
2387 : }
2388 :
2389 : impl Default for WalRedoProcessCounters {
2390 8 : fn default() -> Self {
2391 8 : let started = register_int_counter!(
2392 8 : "pageserver_wal_redo_process_started_total",
2393 8 : "Number of WAL redo processes started",
2394 8 : )
2395 8 : .unwrap();
2396 8 :
2397 8 : let killed = register_int_counter_vec!(
2398 8 : "pageserver_wal_redo_process_stopped_total",
2399 8 : "Number of WAL redo processes stopped",
2400 8 : &["cause"],
2401 8 : )
2402 8 : .unwrap();
2403 8 :
2404 8 : let active_stderr_logger_tasks_started = register_int_counter!(
2405 8 : "pageserver_walredo_stderr_logger_tasks_started_total",
2406 8 : "Number of active walredo stderr logger tasks that have started",
2407 8 : )
2408 8 : .unwrap();
2409 8 :
2410 8 : let active_stderr_logger_tasks_finished = register_int_counter!(
2411 8 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2412 8 : "Number of active walredo stderr logger tasks that have finished",
2413 8 : )
2414 8 : .unwrap();
2415 8 :
2416 8 : Self {
2417 8 : started,
2418 24 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2419 24 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
2420 24 : let cause_str: &'static str = cause.into();
2421 24 : killed.with_label_values(&[cause_str])
2422 24 : })),
2423 8 : active_stderr_logger_tasks_started,
2424 8 : active_stderr_logger_tasks_finished,
2425 8 : }
2426 8 : }
2427 : }
2428 :
2429 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2430 : Lazy::new(WalRedoProcessCounters::default);
2431 :
2432 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2433 : pub(crate) struct StorageTimeMetricsTimer {
2434 : metrics: StorageTimeMetrics,
2435 : start: Instant,
2436 : }
2437 :
2438 : impl StorageTimeMetricsTimer {
2439 3370 : fn new(metrics: StorageTimeMetrics) -> Self {
2440 3370 : Self {
2441 3370 : metrics,
2442 3370 : start: Instant::now(),
2443 3370 : }
2444 3370 : }
2445 :
2446 : /// Record the time from creation to now.
2447 2238 : pub fn stop_and_record(self) {
2448 2238 : let duration = self.start.elapsed().as_secs_f64();
2449 2238 : self.metrics.timeline_sum.inc_by(duration);
2450 2238 : self.metrics.timeline_count.inc();
2451 2238 : self.metrics.global_histogram.observe(duration);
2452 2238 : }
2453 :
2454 : /// Turns this timer into a timer, which will always record -- usually this means recording
2455 : /// regardless an early `?` path was taken in a function.
2456 4 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2457 4 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2458 4 : }
2459 : }
2460 :
2461 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2462 :
2463 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2464 4 : fn drop(&mut self) {
2465 4 : if let Some(inner) = self.0.take() {
2466 4 : inner.stop_and_record();
2467 4 : }
2468 4 : }
2469 : }
2470 :
2471 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2472 : /// timeline total sum and count.
2473 : #[derive(Clone, Debug)]
2474 : pub(crate) struct StorageTimeMetrics {
2475 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2476 : timeline_sum: Counter,
2477 : /// Number of oeprations, per operation, tenant_id and timeline_id
2478 : timeline_count: IntCounter,
2479 : /// Global histogram having only the "operation" label.
2480 : global_histogram: Histogram,
2481 : }
2482 :
2483 : impl StorageTimeMetrics {
2484 3344 : pub fn new(
2485 3344 : operation: StorageTimeOperation,
2486 3344 : tenant_id: &str,
2487 3344 : shard_id: &str,
2488 3344 : timeline_id: &str,
2489 3344 : ) -> Self {
2490 3344 : let operation: &'static str = operation.into();
2491 3344 :
2492 3344 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2493 3344 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2494 3344 : .unwrap();
2495 3344 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2496 3344 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2497 3344 : .unwrap();
2498 3344 : let global_histogram = STORAGE_TIME_GLOBAL
2499 3344 : .get_metric_with_label_values(&[operation])
2500 3344 : .unwrap();
2501 3344 :
2502 3344 : StorageTimeMetrics {
2503 3344 : timeline_sum,
2504 3344 : timeline_count,
2505 3344 : global_histogram,
2506 3344 : }
2507 3344 : }
2508 :
2509 : /// Starts timing a new operation.
2510 : ///
2511 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2512 3370 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2513 3370 : StorageTimeMetricsTimer::new(self.clone())
2514 3370 : }
2515 : }
2516 :
2517 : #[derive(Debug)]
2518 : pub(crate) struct TimelineMetrics {
2519 : tenant_id: String,
2520 : shard_id: String,
2521 : timeline_id: String,
2522 : pub flush_time_histo: StorageTimeMetrics,
2523 : pub flush_wait_upload_time_gauge: Gauge,
2524 : pub compact_time_histo: StorageTimeMetrics,
2525 : pub create_images_time_histo: StorageTimeMetrics,
2526 : pub logical_size_histo: StorageTimeMetrics,
2527 : pub imitate_logical_size_histo: StorageTimeMetrics,
2528 : pub load_layer_map_histo: StorageTimeMetrics,
2529 : pub garbage_collect_histo: StorageTimeMetrics,
2530 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2531 : pub last_record_lsn_gauge: IntGauge,
2532 : pub disk_consistent_lsn_gauge: IntGauge,
2533 : pub pitr_history_size: UIntGauge,
2534 : pub archival_size: UIntGauge,
2535 : pub(crate) layer_size_image: UIntGauge,
2536 : pub(crate) layer_count_image: UIntGauge,
2537 : pub(crate) layer_size_delta: UIntGauge,
2538 : pub(crate) layer_count_delta: UIntGauge,
2539 : pub standby_horizon_gauge: IntGauge,
2540 : pub resident_physical_size_gauge: UIntGauge,
2541 : pub visible_physical_size_gauge: UIntGauge,
2542 : /// copy of LayeredTimeline.current_logical_size
2543 : pub current_logical_size_gauge: UIntGauge,
2544 : pub aux_file_size_gauge: IntGauge,
2545 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2546 : pub evictions: IntCounter,
2547 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2548 : /// Number of valid LSN leases.
2549 : pub valid_lsn_lease_count_gauge: UIntGauge,
2550 : pub wal_records_received: IntCounter,
2551 : shutdown: std::sync::atomic::AtomicBool,
2552 : }
2553 :
2554 : impl TimelineMetrics {
2555 418 : pub fn new(
2556 418 : tenant_shard_id: &TenantShardId,
2557 418 : timeline_id_raw: &TimelineId,
2558 418 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2559 418 : ) -> Self {
2560 418 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2561 418 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2562 418 : let timeline_id = timeline_id_raw.to_string();
2563 418 : let flush_time_histo = StorageTimeMetrics::new(
2564 418 : StorageTimeOperation::LayerFlush,
2565 418 : &tenant_id,
2566 418 : &shard_id,
2567 418 : &timeline_id,
2568 418 : );
2569 418 : let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
2570 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2571 418 : .unwrap();
2572 418 : let compact_time_histo = StorageTimeMetrics::new(
2573 418 : StorageTimeOperation::Compact,
2574 418 : &tenant_id,
2575 418 : &shard_id,
2576 418 : &timeline_id,
2577 418 : );
2578 418 : let create_images_time_histo = StorageTimeMetrics::new(
2579 418 : StorageTimeOperation::CreateImages,
2580 418 : &tenant_id,
2581 418 : &shard_id,
2582 418 : &timeline_id,
2583 418 : );
2584 418 : let logical_size_histo = StorageTimeMetrics::new(
2585 418 : StorageTimeOperation::LogicalSize,
2586 418 : &tenant_id,
2587 418 : &shard_id,
2588 418 : &timeline_id,
2589 418 : );
2590 418 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2591 418 : StorageTimeOperation::ImitateLogicalSize,
2592 418 : &tenant_id,
2593 418 : &shard_id,
2594 418 : &timeline_id,
2595 418 : );
2596 418 : let load_layer_map_histo = StorageTimeMetrics::new(
2597 418 : StorageTimeOperation::LoadLayerMap,
2598 418 : &tenant_id,
2599 418 : &shard_id,
2600 418 : &timeline_id,
2601 418 : );
2602 418 : let garbage_collect_histo = StorageTimeMetrics::new(
2603 418 : StorageTimeOperation::Gc,
2604 418 : &tenant_id,
2605 418 : &shard_id,
2606 418 : &timeline_id,
2607 418 : );
2608 418 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2609 418 : StorageTimeOperation::FindGcCutoffs,
2610 418 : &tenant_id,
2611 418 : &shard_id,
2612 418 : &timeline_id,
2613 418 : );
2614 418 : let last_record_lsn_gauge = LAST_RECORD_LSN
2615 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2616 418 : .unwrap();
2617 418 :
2618 418 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
2619 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2620 418 : .unwrap();
2621 418 :
2622 418 : let pitr_history_size = PITR_HISTORY_SIZE
2623 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2624 418 : .unwrap();
2625 418 :
2626 418 : let archival_size = TIMELINE_ARCHIVE_SIZE
2627 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2628 418 : .unwrap();
2629 418 :
2630 418 : let layer_size_image = TIMELINE_LAYER_SIZE
2631 418 : .get_metric_with_label_values(&[
2632 418 : &tenant_id,
2633 418 : &shard_id,
2634 418 : &timeline_id,
2635 418 : MetricLayerKind::Image.into(),
2636 418 : ])
2637 418 : .unwrap();
2638 418 :
2639 418 : let layer_count_image = TIMELINE_LAYER_COUNT
2640 418 : .get_metric_with_label_values(&[
2641 418 : &tenant_id,
2642 418 : &shard_id,
2643 418 : &timeline_id,
2644 418 : MetricLayerKind::Image.into(),
2645 418 : ])
2646 418 : .unwrap();
2647 418 :
2648 418 : let layer_size_delta = TIMELINE_LAYER_SIZE
2649 418 : .get_metric_with_label_values(&[
2650 418 : &tenant_id,
2651 418 : &shard_id,
2652 418 : &timeline_id,
2653 418 : MetricLayerKind::Delta.into(),
2654 418 : ])
2655 418 : .unwrap();
2656 418 :
2657 418 : let layer_count_delta = TIMELINE_LAYER_COUNT
2658 418 : .get_metric_with_label_values(&[
2659 418 : &tenant_id,
2660 418 : &shard_id,
2661 418 : &timeline_id,
2662 418 : MetricLayerKind::Delta.into(),
2663 418 : ])
2664 418 : .unwrap();
2665 418 :
2666 418 : let standby_horizon_gauge = STANDBY_HORIZON
2667 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2668 418 : .unwrap();
2669 418 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2670 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2671 418 : .unwrap();
2672 418 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
2673 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2674 418 : .unwrap();
2675 418 : // TODO: we shouldn't expose this metric
2676 418 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2677 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2678 418 : .unwrap();
2679 418 : let aux_file_size_gauge = AUX_FILE_SIZE
2680 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2681 418 : .unwrap();
2682 418 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2683 418 : let directory_entries_count_gauge_closure = {
2684 418 : let tenant_shard_id = *tenant_shard_id;
2685 418 : let timeline_id_raw = *timeline_id_raw;
2686 0 : move || {
2687 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2688 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2689 0 : let timeline_id = timeline_id_raw.to_string();
2690 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2691 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2692 0 : .unwrap();
2693 0 : gauge
2694 0 : }
2695 : };
2696 418 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2697 418 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2698 418 : let evictions = EVICTIONS
2699 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2700 418 : .unwrap();
2701 418 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2702 418 : .build(&tenant_id, &shard_id, &timeline_id);
2703 418 :
2704 418 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2705 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2706 418 : .unwrap();
2707 418 :
2708 418 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
2709 418 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2710 418 : .unwrap();
2711 418 :
2712 418 : TimelineMetrics {
2713 418 : tenant_id,
2714 418 : shard_id,
2715 418 : timeline_id,
2716 418 : flush_time_histo,
2717 418 : flush_wait_upload_time_gauge,
2718 418 : compact_time_histo,
2719 418 : create_images_time_histo,
2720 418 : logical_size_histo,
2721 418 : imitate_logical_size_histo,
2722 418 : garbage_collect_histo,
2723 418 : find_gc_cutoffs_histo,
2724 418 : load_layer_map_histo,
2725 418 : last_record_lsn_gauge,
2726 418 : disk_consistent_lsn_gauge,
2727 418 : pitr_history_size,
2728 418 : archival_size,
2729 418 : layer_size_image,
2730 418 : layer_count_image,
2731 418 : layer_size_delta,
2732 418 : layer_count_delta,
2733 418 : standby_horizon_gauge,
2734 418 : resident_physical_size_gauge,
2735 418 : visible_physical_size_gauge,
2736 418 : current_logical_size_gauge,
2737 418 : aux_file_size_gauge,
2738 418 : directory_entries_count_gauge,
2739 418 : evictions,
2740 418 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2741 418 : evictions_with_low_residence_duration,
2742 418 : ),
2743 418 : valid_lsn_lease_count_gauge,
2744 418 : wal_records_received,
2745 418 : shutdown: std::sync::atomic::AtomicBool::default(),
2746 418 : }
2747 418 : }
2748 :
2749 1530 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2750 1530 : self.resident_physical_size_add(sz);
2751 1530 : }
2752 :
2753 492 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2754 492 : self.resident_physical_size_gauge.sub(sz);
2755 492 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2756 492 : }
2757 :
2758 1560 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2759 1560 : self.resident_physical_size_gauge.add(sz);
2760 1560 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2761 1560 : }
2762 :
2763 10 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2764 10 : self.resident_physical_size_gauge.get()
2765 10 : }
2766 :
2767 1144 : pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
2768 1144 : self.flush_wait_upload_time_gauge.add(duration);
2769 1144 : crate::metrics::FLUSH_WAIT_UPLOAD_TIME
2770 1144 : .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
2771 1144 : .unwrap()
2772 1144 : .add(duration);
2773 1144 : }
2774 :
2775 10 : pub(crate) fn shutdown(&self) {
2776 10 : let was_shutdown = self
2777 10 : .shutdown
2778 10 : .swap(true, std::sync::atomic::Ordering::Relaxed);
2779 10 :
2780 10 : if was_shutdown {
2781 : // this happens on tenant deletion because tenant first shuts down timelines, then
2782 : // invokes timeline deletion which first shuts down the timeline again.
2783 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
2784 0 : return;
2785 10 : }
2786 10 :
2787 10 : let tenant_id = &self.tenant_id;
2788 10 : let timeline_id = &self.timeline_id;
2789 10 : let shard_id = &self.shard_id;
2790 10 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2791 10 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2792 10 : let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2793 10 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2794 10 : {
2795 10 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2796 10 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2797 10 : }
2798 10 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2799 10 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2800 10 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2801 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2802 10 : }
2803 :
2804 10 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2805 10 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2806 10 :
2807 10 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2808 10 : tenant_id,
2809 10 : shard_id,
2810 10 : timeline_id,
2811 10 : MetricLayerKind::Image.into(),
2812 10 : ]);
2813 10 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2814 10 : tenant_id,
2815 10 : shard_id,
2816 10 : timeline_id,
2817 10 : MetricLayerKind::Image.into(),
2818 10 : ]);
2819 10 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2820 10 : tenant_id,
2821 10 : shard_id,
2822 10 : timeline_id,
2823 10 : MetricLayerKind::Delta.into(),
2824 10 : ]);
2825 10 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2826 10 : tenant_id,
2827 10 : shard_id,
2828 10 : timeline_id,
2829 10 : MetricLayerKind::Delta.into(),
2830 10 : ]);
2831 10 :
2832 10 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2833 10 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2834 10 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2835 10 :
2836 10 : self.evictions_with_low_residence_duration
2837 10 : .write()
2838 10 : .unwrap()
2839 10 : .remove(tenant_id, shard_id, timeline_id);
2840 :
2841 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2842 : // removed at the end of it. The idea is to have the metrics outlive the
2843 : // entity during which they're observed, e.g., the smgr metrics shall
2844 : // outlive an individual smgr connection, but not the timeline.
2845 :
2846 90 : for op in StorageTimeOperation::VARIANTS {
2847 80 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2848 80 : op,
2849 80 : tenant_id,
2850 80 : shard_id,
2851 80 : timeline_id,
2852 80 : ]);
2853 80 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2854 80 : op,
2855 80 : tenant_id,
2856 80 : shard_id,
2857 80 : timeline_id,
2858 80 : ]);
2859 80 : }
2860 :
2861 30 : for op in STORAGE_IO_SIZE_OPERATIONS {
2862 20 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2863 20 : }
2864 :
2865 10 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
2866 10 : SmgrQueryType::GetPageAtLsn.into(),
2867 10 : tenant_id,
2868 10 : shard_id,
2869 10 : timeline_id,
2870 10 : ]);
2871 10 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2872 10 : SmgrQueryType::GetPageAtLsn.into(),
2873 10 : tenant_id,
2874 10 : shard_id,
2875 10 : timeline_id,
2876 10 : ]);
2877 10 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
2878 10 : tenant_id,
2879 10 : shard_id,
2880 10 : timeline_id,
2881 10 : ]);
2882 10 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
2883 10 : tenant_id,
2884 10 : shard_id,
2885 10 : timeline_id,
2886 10 : ]);
2887 10 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
2888 10 : tenant_id,
2889 10 : shard_id,
2890 10 : timeline_id,
2891 10 : ]);
2892 10 : }
2893 : }
2894 :
2895 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2896 6 : // Only shard zero deals in synthetic sizes
2897 6 : if tenant_shard_id.is_shard_zero() {
2898 6 : let tid = tenant_shard_id.tenant_id.to_string();
2899 6 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2900 6 : }
2901 :
2902 6 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
2903 6 :
2904 6 : // we leave the BROKEN_TENANTS_SET entry if any
2905 6 : }
2906 :
2907 : use futures::Future;
2908 : use pin_project_lite::pin_project;
2909 : use std::collections::HashMap;
2910 : use std::num::NonZeroUsize;
2911 : use std::pin::Pin;
2912 : use std::sync::atomic::AtomicU64;
2913 : use std::sync::{Arc, Mutex};
2914 : use std::task::{Context, Poll};
2915 : use std::time::{Duration, Instant};
2916 :
2917 : use crate::config::PageServerConf;
2918 : use crate::context::{PageContentKind, RequestContext};
2919 : use crate::task_mgr::TaskKind;
2920 : use crate::tenant::mgr::TenantSlot;
2921 : use crate::tenant::tasks::BackgroundLoopKind;
2922 : use crate::tenant::Timeline;
2923 :
2924 : /// Maintain a per timeline gauge in addition to the global gauge.
2925 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
2926 : last_set: AtomicU64,
2927 : gauge: UIntGauge,
2928 : }
2929 :
2930 : impl PerTimelineRemotePhysicalSizeGauge {
2931 428 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2932 428 : Self {
2933 428 : last_set: AtomicU64::new(0),
2934 428 : gauge: per_timeline_gauge,
2935 428 : }
2936 428 : }
2937 1827 : pub(crate) fn set(&self, sz: u64) {
2938 1827 : self.gauge.set(sz);
2939 1827 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
2940 1827 : if sz < prev {
2941 23 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
2942 1804 : } else {
2943 1804 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
2944 1804 : };
2945 1827 : }
2946 2 : pub(crate) fn get(&self) -> u64 {
2947 2 : self.gauge.get()
2948 2 : }
2949 : }
2950 :
2951 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
2952 20 : fn drop(&mut self) {
2953 20 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
2954 20 : }
2955 : }
2956 :
2957 : pub(crate) struct RemoteTimelineClientMetrics {
2958 : tenant_id: String,
2959 : shard_id: String,
2960 : timeline_id: String,
2961 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
2962 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
2963 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2964 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
2965 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
2966 : }
2967 :
2968 : impl RemoteTimelineClientMetrics {
2969 428 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
2970 428 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
2971 428 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
2972 428 : let timeline_id_str = timeline_id.to_string();
2973 428 :
2974 428 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
2975 428 : REMOTE_PHYSICAL_SIZE
2976 428 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
2977 428 : .unwrap(),
2978 428 : );
2979 428 :
2980 428 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
2981 428 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
2982 428 : .unwrap();
2983 428 :
2984 428 : RemoteTimelineClientMetrics {
2985 428 : tenant_id: tenant_id_str,
2986 428 : shard_id: shard_id_str,
2987 428 : timeline_id: timeline_id_str,
2988 428 : calls: Mutex::new(HashMap::default()),
2989 428 : bytes_started_counter: Mutex::new(HashMap::default()),
2990 428 : bytes_finished_counter: Mutex::new(HashMap::default()),
2991 428 : remote_physical_size_gauge,
2992 428 : projected_remote_consistent_lsn_gauge,
2993 428 : }
2994 428 : }
2995 :
2996 2758 : pub fn remote_operation_time(
2997 2758 : &self,
2998 2758 : file_kind: &RemoteOpFileKind,
2999 2758 : op_kind: &RemoteOpKind,
3000 2758 : status: &'static str,
3001 2758 : ) -> Histogram {
3002 2758 : let key = (file_kind.as_str(), op_kind.as_str(), status);
3003 2758 : REMOTE_OPERATION_TIME
3004 2758 : .get_metric_with_label_values(&[key.0, key.1, key.2])
3005 2758 : .unwrap()
3006 2758 : }
3007 :
3008 6491 : fn calls_counter_pair(
3009 6491 : &self,
3010 6491 : file_kind: &RemoteOpFileKind,
3011 6491 : op_kind: &RemoteOpKind,
3012 6491 : ) -> IntCounterPair {
3013 6491 : let mut guard = self.calls.lock().unwrap();
3014 6491 : let key = (file_kind.as_str(), op_kind.as_str());
3015 6491 : let metric = guard.entry(key).or_insert_with(move || {
3016 754 : REMOTE_TIMELINE_CLIENT_CALLS
3017 754 : .get_metric_with_label_values(&[
3018 754 : &self.tenant_id,
3019 754 : &self.shard_id,
3020 754 : &self.timeline_id,
3021 754 : key.0,
3022 754 : key.1,
3023 754 : ])
3024 754 : .unwrap()
3025 6491 : });
3026 6491 : metric.clone()
3027 6491 : }
3028 :
3029 1538 : fn bytes_started_counter(
3030 1538 : &self,
3031 1538 : file_kind: &RemoteOpFileKind,
3032 1538 : op_kind: &RemoteOpKind,
3033 1538 : ) -> IntCounter {
3034 1538 : let mut guard = self.bytes_started_counter.lock().unwrap();
3035 1538 : let key = (file_kind.as_str(), op_kind.as_str());
3036 1538 : let metric = guard.entry(key).or_insert_with(move || {
3037 290 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3038 290 : .get_metric_with_label_values(&[
3039 290 : &self.tenant_id,
3040 290 : &self.shard_id,
3041 290 : &self.timeline_id,
3042 290 : key.0,
3043 290 : key.1,
3044 290 : ])
3045 290 : .unwrap()
3046 1538 : });
3047 1538 : metric.clone()
3048 1538 : }
3049 :
3050 2861 : fn bytes_finished_counter(
3051 2861 : &self,
3052 2861 : file_kind: &RemoteOpFileKind,
3053 2861 : op_kind: &RemoteOpKind,
3054 2861 : ) -> IntCounter {
3055 2861 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3056 2861 : let key = (file_kind.as_str(), op_kind.as_str());
3057 2861 : let metric = guard.entry(key).or_insert_with(move || {
3058 290 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3059 290 : .get_metric_with_label_values(&[
3060 290 : &self.tenant_id,
3061 290 : &self.shard_id,
3062 290 : &self.timeline_id,
3063 290 : key.0,
3064 290 : key.1,
3065 290 : ])
3066 290 : .unwrap()
3067 2861 : });
3068 2861 : metric.clone()
3069 2861 : }
3070 : }
3071 :
3072 : #[cfg(test)]
3073 : impl RemoteTimelineClientMetrics {
3074 6 : pub fn get_bytes_started_counter_value(
3075 6 : &self,
3076 6 : file_kind: &RemoteOpFileKind,
3077 6 : op_kind: &RemoteOpKind,
3078 6 : ) -> Option<u64> {
3079 6 : let guard = self.bytes_started_counter.lock().unwrap();
3080 6 : let key = (file_kind.as_str(), op_kind.as_str());
3081 6 : guard.get(&key).map(|counter| counter.get())
3082 6 : }
3083 :
3084 6 : pub fn get_bytes_finished_counter_value(
3085 6 : &self,
3086 6 : file_kind: &RemoteOpFileKind,
3087 6 : op_kind: &RemoteOpKind,
3088 6 : ) -> Option<u64> {
3089 6 : let guard = self.bytes_finished_counter.lock().unwrap();
3090 6 : let key = (file_kind.as_str(), op_kind.as_str());
3091 6 : guard.get(&key).map(|counter| counter.get())
3092 6 : }
3093 : }
3094 :
3095 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3096 : #[must_use]
3097 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3098 : /// Decremented on drop.
3099 : calls_counter_pair: Option<IntCounterPair>,
3100 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3101 : bytes_finished: Option<(IntCounter, u64)>,
3102 : }
3103 :
3104 : impl RemoteTimelineClientCallMetricGuard {
3105 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3106 : /// The caller vouches to do the metric updates manually.
3107 3468 : pub fn will_decrement_manually(mut self) {
3108 3468 : let RemoteTimelineClientCallMetricGuard {
3109 3468 : calls_counter_pair,
3110 3468 : bytes_finished,
3111 3468 : } = &mut self;
3112 3468 : calls_counter_pair.take();
3113 3468 : bytes_finished.take();
3114 3468 : }
3115 : }
3116 :
3117 : impl Drop for RemoteTimelineClientCallMetricGuard {
3118 3494 : fn drop(&mut self) {
3119 3494 : let RemoteTimelineClientCallMetricGuard {
3120 3494 : calls_counter_pair,
3121 3494 : bytes_finished,
3122 3494 : } = self;
3123 3494 : if let Some(guard) = calls_counter_pair.take() {
3124 26 : guard.dec();
3125 3468 : }
3126 3494 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3127 0 : bytes_finished_metric.inc_by(*value);
3128 3494 : }
3129 3494 : }
3130 : }
3131 :
3132 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3133 : /// track the byte size of this call in applicable metric(s).
3134 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3135 : /// Do not account for this call's byte size in any metrics.
3136 : /// The `reason` field is there to make the call sites self-documenting
3137 : /// about why they don't need the metric.
3138 : DontTrackSize { reason: &'static str },
3139 : /// Track the byte size of the call in applicable metric(s).
3140 : Bytes(u64),
3141 : }
3142 :
3143 : impl RemoteTimelineClientMetrics {
3144 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3145 : ///
3146 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3147 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3148 : /// is more suitable.
3149 : /// Never do both.
3150 3494 : pub(crate) fn call_begin(
3151 3494 : &self,
3152 3494 : file_kind: &RemoteOpFileKind,
3153 3494 : op_kind: &RemoteOpKind,
3154 3494 : size: RemoteTimelineClientMetricsCallTrackSize,
3155 3494 : ) -> RemoteTimelineClientCallMetricGuard {
3156 3494 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3157 3494 : calls_counter_pair.inc();
3158 :
3159 3494 : let bytes_finished = match size {
3160 1956 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3161 1956 : // nothing to do
3162 1956 : None
3163 : }
3164 1538 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3165 1538 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3166 1538 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3167 1538 : Some((finished_counter, size))
3168 : }
3169 : };
3170 3494 : RemoteTimelineClientCallMetricGuard {
3171 3494 : calls_counter_pair: Some(calls_counter_pair),
3172 3494 : bytes_finished,
3173 3494 : }
3174 3494 : }
3175 :
3176 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3177 : /// Using the guard object is generally preferable.
3178 : /// See [`call_begin`](Self::call_begin) for more context.
3179 2997 : pub(crate) fn call_end(
3180 2997 : &self,
3181 2997 : file_kind: &RemoteOpFileKind,
3182 2997 : op_kind: &RemoteOpKind,
3183 2997 : size: RemoteTimelineClientMetricsCallTrackSize,
3184 2997 : ) {
3185 2997 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3186 2997 : calls_counter_pair.dec();
3187 2997 : match size {
3188 1674 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3189 1323 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3190 1323 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3191 1323 : }
3192 : }
3193 2997 : }
3194 : }
3195 :
3196 : impl Drop for RemoteTimelineClientMetrics {
3197 20 : fn drop(&mut self) {
3198 20 : let RemoteTimelineClientMetrics {
3199 20 : tenant_id,
3200 20 : shard_id,
3201 20 : timeline_id,
3202 20 : remote_physical_size_gauge,
3203 20 : calls,
3204 20 : bytes_started_counter,
3205 20 : bytes_finished_counter,
3206 20 : projected_remote_consistent_lsn_gauge,
3207 20 : } = self;
3208 24 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3209 24 : let mut res = [Ok(()), Ok(())];
3210 24 : REMOTE_TIMELINE_CLIENT_CALLS
3211 24 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3212 24 : // don't care about results
3213 24 : }
3214 20 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3215 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3216 6 : tenant_id,
3217 6 : shard_id,
3218 6 : timeline_id,
3219 6 : a,
3220 6 : b,
3221 6 : ]);
3222 6 : }
3223 20 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3224 6 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3225 6 : tenant_id,
3226 6 : shard_id,
3227 6 : timeline_id,
3228 6 : a,
3229 6 : b,
3230 6 : ]);
3231 6 : }
3232 20 : {
3233 20 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3234 20 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3235 20 : }
3236 20 : {
3237 20 : let _ = projected_remote_consistent_lsn_gauge;
3238 20 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3239 20 : tenant_id,
3240 20 : shard_id,
3241 20 : timeline_id,
3242 20 : ]);
3243 20 : }
3244 20 : }
3245 : }
3246 :
3247 : /// Wrapper future that measures the time spent by a remote storage operation,
3248 : /// and records the time and success/failure as a prometheus metric.
3249 : pub(crate) trait MeasureRemoteOp: Sized {
3250 2876 : fn measure_remote_op(
3251 2876 : self,
3252 2876 : file_kind: RemoteOpFileKind,
3253 2876 : op: RemoteOpKind,
3254 2876 : metrics: Arc<RemoteTimelineClientMetrics>,
3255 2876 : ) -> MeasuredRemoteOp<Self> {
3256 2876 : let start = Instant::now();
3257 2876 : MeasuredRemoteOp {
3258 2876 : inner: self,
3259 2876 : file_kind,
3260 2876 : op,
3261 2876 : start,
3262 2876 : metrics,
3263 2876 : }
3264 2876 : }
3265 : }
3266 :
3267 : impl<T: Sized> MeasureRemoteOp for T {}
3268 :
3269 : pin_project! {
3270 : pub(crate) struct MeasuredRemoteOp<F>
3271 : {
3272 : #[pin]
3273 : inner: F,
3274 : file_kind: RemoteOpFileKind,
3275 : op: RemoteOpKind,
3276 : start: Instant,
3277 : metrics: Arc<RemoteTimelineClientMetrics>,
3278 : }
3279 : }
3280 :
3281 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
3282 : type Output = Result<O, E>;
3283 :
3284 46334 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
3285 46334 : let this = self.project();
3286 46334 : let poll_result = this.inner.poll(cx);
3287 46334 : if let Poll::Ready(ref res) = poll_result {
3288 2758 : let duration = this.start.elapsed();
3289 2758 : let status = if res.is_ok() { &"success" } else { &"failure" };
3290 2758 : this.metrics
3291 2758 : .remote_operation_time(this.file_kind, this.op, status)
3292 2758 : .observe(duration.as_secs_f64());
3293 43576 : }
3294 46334 : poll_result
3295 46334 : }
3296 : }
3297 :
3298 : pub mod tokio_epoll_uring {
3299 : use std::{
3300 : collections::HashMap,
3301 : sync::{Arc, Mutex},
3302 : };
3303 :
3304 : use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
3305 : use once_cell::sync::Lazy;
3306 :
3307 : /// Shared storage for tokio-epoll-uring thread local metrics.
3308 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3309 102 : Lazy::new(|| {
3310 102 : let slots_submission_queue_depth = register_histogram!(
3311 102 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3312 102 : "The slots waiters queue depth of each tokio_epoll_uring system",
3313 102 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
3314 102 : )
3315 102 : .expect("failed to define a metric");
3316 102 : ThreadLocalMetricsStorage {
3317 102 : observers: Mutex::new(HashMap::new()),
3318 102 : slots_submission_queue_depth,
3319 102 : }
3320 102 : });
3321 :
3322 : pub struct ThreadLocalMetricsStorage {
3323 : /// List of thread local metrics observers.
3324 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3325 : /// A histogram shared between all thread local systems
3326 : /// for collecting slots submission queue depth.
3327 : slots_submission_queue_depth: Histogram,
3328 : }
3329 :
3330 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3331 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3332 : ///
3333 : /// The System makes observations into [`Self`] and periodically, the collector
3334 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3335 : ///
3336 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3337 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3338 : /// for cache coherence protocol to get an exclusive cache line.
3339 : pub struct ThreadLocalMetrics {
3340 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3341 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3342 : }
3343 :
3344 : impl ThreadLocalMetricsStorage {
3345 : /// Registers a new thread local system. Returns a thread local metrics observer.
3346 442 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3347 442 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3348 442 : self.slots_submission_queue_depth.local(),
3349 442 : ));
3350 442 : let mut g = self.observers.lock().unwrap();
3351 442 : g.insert(id, Arc::clone(&per_system_metrics));
3352 442 : per_system_metrics
3353 442 : }
3354 :
3355 : /// Removes metrics observer for a thread local system.
3356 : /// This should be called before dropping a thread local system.
3357 102 : pub fn remove_system(&self, id: u64) {
3358 102 : let mut g = self.observers.lock().unwrap();
3359 102 : g.remove(&id);
3360 102 : }
3361 :
3362 : /// Flush all thread local metrics to the shared storage.
3363 0 : pub fn flush_thread_local_metrics(&self) {
3364 0 : let g = self.observers.lock().unwrap();
3365 0 : g.values().for_each(|local| {
3366 0 : local.flush();
3367 0 : });
3368 0 : }
3369 : }
3370 :
3371 : impl ThreadLocalMetrics {
3372 442 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3373 442 : ThreadLocalMetrics {
3374 442 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3375 442 : }
3376 442 : }
3377 :
3378 : /// Flushes the thread local metrics to shared aggregator.
3379 0 : pub fn flush(&self) {
3380 0 : let Self {
3381 0 : slots_submission_queue_depth,
3382 0 : } = self;
3383 0 : slots_submission_queue_depth.lock().unwrap().flush();
3384 0 : }
3385 : }
3386 :
3387 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3388 902290 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3389 902290 : let Self {
3390 902290 : slots_submission_queue_depth,
3391 902290 : } = self;
3392 902290 : slots_submission_queue_depth
3393 902290 : .lock()
3394 902290 : .unwrap()
3395 902290 : .observe(queue_depth as f64);
3396 902290 : }
3397 : }
3398 :
3399 : pub struct Collector {
3400 : descs: Vec<metrics::core::Desc>,
3401 : systems_created: UIntGauge,
3402 : systems_destroyed: UIntGauge,
3403 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3404 : }
3405 :
3406 : impl metrics::core::Collector for Collector {
3407 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3408 0 : self.descs.iter().collect()
3409 0 : }
3410 :
3411 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3412 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3413 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3414 0 : systems_created,
3415 0 : systems_destroyed,
3416 0 : } = tokio_epoll_uring::metrics::global();
3417 0 : self.systems_created.set(systems_created);
3418 0 : mfs.extend(self.systems_created.collect());
3419 0 : self.systems_destroyed.set(systems_destroyed);
3420 0 : mfs.extend(self.systems_destroyed.collect());
3421 0 :
3422 0 : self.thread_local_metrics_storage
3423 0 : .flush_thread_local_metrics();
3424 0 :
3425 0 : mfs.extend(
3426 0 : self.thread_local_metrics_storage
3427 0 : .slots_submission_queue_depth
3428 0 : .collect(),
3429 0 : );
3430 0 : mfs
3431 0 : }
3432 : }
3433 :
3434 : impl Collector {
3435 : const NMETRICS: usize = 3;
3436 :
3437 : #[allow(clippy::new_without_default)]
3438 0 : pub fn new() -> Self {
3439 0 : let mut descs = Vec::new();
3440 0 :
3441 0 : let systems_created = UIntGauge::new(
3442 0 : "pageserver_tokio_epoll_uring_systems_created",
3443 0 : "counter of tokio-epoll-uring systems that were created",
3444 0 : )
3445 0 : .unwrap();
3446 0 : descs.extend(
3447 0 : metrics::core::Collector::desc(&systems_created)
3448 0 : .into_iter()
3449 0 : .cloned(),
3450 0 : );
3451 0 :
3452 0 : let systems_destroyed = UIntGauge::new(
3453 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
3454 0 : "counter of tokio-epoll-uring systems that were destroyed",
3455 0 : )
3456 0 : .unwrap();
3457 0 : descs.extend(
3458 0 : metrics::core::Collector::desc(&systems_destroyed)
3459 0 : .into_iter()
3460 0 : .cloned(),
3461 0 : );
3462 0 :
3463 0 : Self {
3464 0 : descs,
3465 0 : systems_created,
3466 0 : systems_destroyed,
3467 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
3468 0 : }
3469 0 : }
3470 : }
3471 :
3472 102 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3473 102 : register_int_counter!(
3474 102 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
3475 102 : "Number of times where thread_local_system creation spanned multiple executor threads",
3476 102 : )
3477 102 : .unwrap()
3478 102 : });
3479 :
3480 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3481 0 : register_int_counter!(
3482 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
3483 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
3484 0 : )
3485 0 : .unwrap()
3486 0 : });
3487 : }
3488 :
3489 : pub(crate) mod tenant_throttling {
3490 : use metrics::{register_int_counter_vec, IntCounter};
3491 : use once_cell::sync::Lazy;
3492 : use utils::shard::TenantShardId;
3493 :
3494 : use crate::tenant::{self};
3495 :
3496 : struct GlobalAndPerTenantIntCounter {
3497 : global: IntCounter,
3498 : per_tenant: IntCounter,
3499 : }
3500 :
3501 : impl GlobalAndPerTenantIntCounter {
3502 : #[inline(always)]
3503 0 : pub(crate) fn inc(&self) {
3504 0 : self.inc_by(1)
3505 0 : }
3506 : #[inline(always)]
3507 0 : pub(crate) fn inc_by(&self, n: u64) {
3508 0 : self.global.inc_by(n);
3509 0 : self.per_tenant.inc_by(n);
3510 0 : }
3511 : }
3512 :
3513 : pub(crate) struct Metrics<const KIND: usize> {
3514 : count_accounted_start: GlobalAndPerTenantIntCounter,
3515 : count_accounted_finish: GlobalAndPerTenantIntCounter,
3516 : wait_time: GlobalAndPerTenantIntCounter,
3517 : count_throttled: GlobalAndPerTenantIntCounter,
3518 : }
3519 :
3520 174 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3521 174 : register_int_counter_vec!(
3522 174 : "pageserver_tenant_throttling_count_accounted_start_global",
3523 174 : "Count of tenant throttling starts, by kind of throttle.",
3524 174 : &["kind"]
3525 174 : )
3526 174 : .unwrap()
3527 174 : });
3528 174 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3529 174 : register_int_counter_vec!(
3530 174 : "pageserver_tenant_throttling_count_accounted_start",
3531 174 : "Count of tenant throttling starts, by kind of throttle.",
3532 174 : &["kind", "tenant_id", "shard_id"]
3533 174 : )
3534 174 : .unwrap()
3535 174 : });
3536 174 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3537 174 : register_int_counter_vec!(
3538 174 : "pageserver_tenant_throttling_count_accounted_finish_global",
3539 174 : "Count of tenant throttling finishes, by kind of throttle.",
3540 174 : &["kind"]
3541 174 : )
3542 174 : .unwrap()
3543 174 : });
3544 174 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3545 174 : register_int_counter_vec!(
3546 174 : "pageserver_tenant_throttling_count_accounted_finish",
3547 174 : "Count of tenant throttling finishes, by kind of throttle.",
3548 174 : &["kind", "tenant_id", "shard_id"]
3549 174 : )
3550 174 : .unwrap()
3551 174 : });
3552 174 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3553 174 : register_int_counter_vec!(
3554 174 : "pageserver_tenant_throttling_wait_usecs_sum_global",
3555 174 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3556 174 : &["kind"]
3557 174 : )
3558 174 : .unwrap()
3559 174 : });
3560 174 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3561 174 : register_int_counter_vec!(
3562 174 : "pageserver_tenant_throttling_wait_usecs_sum",
3563 174 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3564 174 : &["kind", "tenant_id", "shard_id"]
3565 174 : )
3566 174 : .unwrap()
3567 174 : });
3568 :
3569 174 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3570 174 : register_int_counter_vec!(
3571 174 : "pageserver_tenant_throttling_count_global",
3572 174 : "Count of tenant throttlings, by kind of throttle.",
3573 174 : &["kind"]
3574 174 : )
3575 174 : .unwrap()
3576 174 : });
3577 174 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3578 174 : register_int_counter_vec!(
3579 174 : "pageserver_tenant_throttling_count",
3580 174 : "Count of tenant throttlings, by kind of throttle.",
3581 174 : &["kind", "tenant_id", "shard_id"]
3582 174 : )
3583 174 : .unwrap()
3584 174 : });
3585 :
3586 : const KINDS: &[&str] = &["pagestream"];
3587 : pub type Pagestream = Metrics<0>;
3588 :
3589 : impl<const KIND: usize> Metrics<KIND> {
3590 192 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
3591 192 : let per_tenant_label_values = &[
3592 192 : KINDS[KIND],
3593 192 : &tenant_shard_id.tenant_id.to_string(),
3594 192 : &tenant_shard_id.shard_slug().to_string(),
3595 192 : ];
3596 192 : Metrics {
3597 192 : count_accounted_start: {
3598 192 : GlobalAndPerTenantIntCounter {
3599 192 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
3600 192 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
3601 192 : .with_label_values(per_tenant_label_values),
3602 192 : }
3603 192 : },
3604 192 : count_accounted_finish: {
3605 192 : GlobalAndPerTenantIntCounter {
3606 192 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
3607 192 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
3608 192 : .with_label_values(per_tenant_label_values),
3609 192 : }
3610 192 : },
3611 192 : wait_time: {
3612 192 : GlobalAndPerTenantIntCounter {
3613 192 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
3614 192 : per_tenant: WAIT_USECS_PER_TENANT
3615 192 : .with_label_values(per_tenant_label_values),
3616 192 : }
3617 192 : },
3618 192 : count_throttled: {
3619 192 : GlobalAndPerTenantIntCounter {
3620 192 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
3621 192 : per_tenant: WAIT_COUNT_PER_TENANT
3622 192 : .with_label_values(per_tenant_label_values),
3623 192 : }
3624 192 : },
3625 192 : }
3626 192 : }
3627 : }
3628 :
3629 0 : pub(crate) fn preinitialize_global_metrics() {
3630 0 : Lazy::force(&COUNT_ACCOUNTED_START);
3631 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
3632 0 : Lazy::force(&WAIT_USECS);
3633 0 : Lazy::force(&WAIT_COUNT);
3634 0 : }
3635 :
3636 6 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3637 24 : for m in &[
3638 6 : &COUNT_ACCOUNTED_START_PER_TENANT,
3639 6 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
3640 6 : &WAIT_USECS_PER_TENANT,
3641 6 : &WAIT_COUNT_PER_TENANT,
3642 6 : ] {
3643 48 : for kind in KINDS {
3644 24 : let _ = m.remove_label_values(&[
3645 24 : kind,
3646 24 : &tenant_shard_id.tenant_id.to_string(),
3647 24 : &tenant_shard_id.shard_slug().to_string(),
3648 24 : ]);
3649 24 : }
3650 : }
3651 6 : }
3652 :
3653 : impl<const KIND: usize> tenant::throttle::Metric for Metrics<KIND> {
3654 : #[inline(always)]
3655 0 : fn accounting_start(&self) {
3656 0 : self.count_accounted_start.inc();
3657 0 : }
3658 : #[inline(always)]
3659 0 : fn accounting_finish(&self) {
3660 0 : self.count_accounted_finish.inc();
3661 0 : }
3662 : #[inline(always)]
3663 0 : fn observe_throttling(
3664 0 : &self,
3665 0 : tenant::throttle::Observation { wait_time }: &tenant::throttle::Observation,
3666 0 : ) {
3667 0 : let val = u64::try_from(wait_time.as_micros()).unwrap();
3668 0 : self.wait_time.inc_by(val);
3669 0 : self.count_throttled.inc();
3670 0 : }
3671 : }
3672 : }
3673 :
3674 : pub(crate) mod disk_usage_based_eviction {
3675 : use super::*;
3676 :
3677 : pub(crate) struct Metrics {
3678 : pub(crate) tenant_collection_time: Histogram,
3679 : pub(crate) tenant_layer_count: Histogram,
3680 : pub(crate) layers_collected: IntCounter,
3681 : pub(crate) layers_selected: IntCounter,
3682 : pub(crate) layers_evicted: IntCounter,
3683 : }
3684 :
3685 : impl Default for Metrics {
3686 0 : fn default() -> Self {
3687 0 : let tenant_collection_time = register_histogram!(
3688 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
3689 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
3690 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
3691 0 : )
3692 0 : .unwrap();
3693 0 :
3694 0 : let tenant_layer_count = register_histogram!(
3695 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
3696 0 : "Amount of layers gathered from a tenant",
3697 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
3698 0 : )
3699 0 : .unwrap();
3700 0 :
3701 0 : let layers_collected = register_int_counter!(
3702 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
3703 0 : "Amount of layers collected"
3704 0 : )
3705 0 : .unwrap();
3706 0 :
3707 0 : let layers_selected = register_int_counter!(
3708 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
3709 0 : "Amount of layers selected"
3710 0 : )
3711 0 : .unwrap();
3712 0 :
3713 0 : let layers_evicted = register_int_counter!(
3714 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
3715 0 : "Amount of layers successfully evicted"
3716 0 : )
3717 0 : .unwrap();
3718 0 :
3719 0 : Self {
3720 0 : tenant_collection_time,
3721 0 : tenant_layer_count,
3722 0 : layers_collected,
3723 0 : layers_selected,
3724 0 : layers_evicted,
3725 0 : }
3726 0 : }
3727 : }
3728 :
3729 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
3730 : }
3731 :
3732 168 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
3733 168 : register_uint_gauge_vec!(
3734 168 : "pageserver_tokio_executor_thread_configured_count",
3735 168 : "Total number of configued tokio executor threads in the process.
3736 168 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
3737 168 : &["setup"],
3738 168 : )
3739 168 : .unwrap()
3740 168 : });
3741 :
3742 168 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
3743 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
3744 168 : let _guard = SERIALIZE.lock().unwrap();
3745 168 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
3746 168 : TOKIO_EXECUTOR_THREAD_COUNT
3747 168 : .get_metric_with_label_values(&[setup])
3748 168 : .unwrap()
3749 168 : .set(u64::try_from(num_threads.get()).unwrap());
3750 168 : }
3751 :
3752 0 : pub fn preinitialize_metrics(conf: &'static PageServerConf) {
3753 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
3754 0 :
3755 0 : // Python tests need these and on some we do alerting.
3756 0 : //
3757 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
3758 0 : // order:
3759 0 : // - global metrics reside in a Lazy<PageserverMetrics>
3760 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
3761 0 : // - could move the statics into TimelineMetrics::new()?
3762 0 :
3763 0 : // counters
3764 0 : [
3765 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
3766 0 : &WALRECEIVER_STARTED_CONNECTIONS,
3767 0 : &WALRECEIVER_BROKER_UPDATES,
3768 0 : &WALRECEIVER_CANDIDATES_ADDED,
3769 0 : &WALRECEIVER_CANDIDATES_REMOVED,
3770 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
3771 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
3772 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
3773 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
3774 0 : &CIRCUIT_BREAKERS_BROKEN,
3775 0 : &CIRCUIT_BREAKERS_UNBROKEN,
3776 0 : ]
3777 0 : .into_iter()
3778 0 : .for_each(|c| {
3779 0 : Lazy::force(c);
3780 0 : });
3781 0 :
3782 0 : // Deletion queue stats
3783 0 : Lazy::force(&DELETION_QUEUE);
3784 0 :
3785 0 : // Tenant stats
3786 0 : Lazy::force(&TENANT);
3787 0 :
3788 0 : // Tenant manager stats
3789 0 : Lazy::force(&TENANT_MANAGER);
3790 0 :
3791 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
3792 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
3793 :
3794 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
3795 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
3796 0 : // values from last restart.
3797 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
3798 0 : }
3799 :
3800 : // countervecs
3801 0 : [
3802 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
3803 0 : &SMGR_QUERY_STARTED_GLOBAL,
3804 0 : ]
3805 0 : .into_iter()
3806 0 : .for_each(|c| {
3807 0 : Lazy::force(c);
3808 0 : });
3809 0 :
3810 0 : // gauges
3811 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
3812 0 :
3813 0 : // histograms
3814 0 : [
3815 0 : &READ_NUM_LAYERS_VISITED,
3816 0 : &VEC_READ_NUM_LAYERS_VISITED,
3817 0 : &WAIT_LSN_TIME,
3818 0 : &WAL_REDO_TIME,
3819 0 : &WAL_REDO_RECORDS_HISTOGRAM,
3820 0 : &WAL_REDO_BYTES_HISTOGRAM,
3821 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
3822 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
3823 0 : ]
3824 0 : .into_iter()
3825 0 : .for_each(|h| {
3826 0 : Lazy::force(h);
3827 0 : });
3828 0 :
3829 0 : // Custom
3830 0 : Lazy::force(&RECONSTRUCT_TIME);
3831 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
3832 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
3833 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
3834 0 :
3835 0 : tenant_throttling::preinitialize_global_metrics();
3836 0 : }
|