Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::config::{
11 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
12 : PageServiceProtocolPipelinedExecutionStrategy,
13 : };
14 : use pageserver_api::shard::TenantShardId;
15 : use postgres_backend::{is_expected_io_error, QueryError};
16 : use pq_proto::framed::ConnectionError;
17 : use strum::{EnumCount, VariantNames};
18 : use strum_macros::{IntoStaticStr, VariantNames};
19 : use utils::id::TimelineId;
20 :
21 : /// Prometheus histogram buckets (in seconds) for operations in the critical
22 : /// path. In other words, operations that directly affect that latency of user
23 : /// queries.
24 : ///
25 : /// The buckets capture the majority of latencies in the microsecond and
26 : /// millisecond range but also extend far enough up to distinguish "bad" from
27 : /// "really bad".
28 : const CRITICAL_OP_BUCKETS: &[f64] = &[
29 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
30 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
31 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
32 : ];
33 :
34 : // Metrics collected on operations on the storage repository.
35 : #[derive(Debug, VariantNames, IntoStaticStr)]
36 : #[strum(serialize_all = "kebab_case")]
37 : pub(crate) enum StorageTimeOperation {
38 : #[strum(serialize = "layer flush")]
39 : LayerFlush,
40 :
41 : #[strum(serialize = "layer flush delay")]
42 : LayerFlushDelay,
43 :
44 : #[strum(serialize = "compact")]
45 : Compact,
46 :
47 : #[strum(serialize = "create images")]
48 : CreateImages,
49 :
50 : #[strum(serialize = "logical size")]
51 : LogicalSize,
52 :
53 : #[strum(serialize = "imitate logical size")]
54 : ImitateLogicalSize,
55 :
56 : #[strum(serialize = "load layer map")]
57 : LoadLayerMap,
58 :
59 : #[strum(serialize = "gc")]
60 : Gc,
61 :
62 : #[strum(serialize = "find gc cutoffs")]
63 : FindGcCutoffs,
64 : }
65 :
66 400 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
67 400 : register_counter_vec!(
68 400 : "pageserver_storage_operations_seconds_sum",
69 400 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
70 400 : &["operation", "tenant_id", "shard_id", "timeline_id"],
71 400 : )
72 400 : .expect("failed to define a metric")
73 400 : });
74 :
75 400 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
76 400 : register_int_counter_vec!(
77 400 : "pageserver_storage_operations_seconds_count",
78 400 : "Count of storage operations with operation, tenant and timeline dimensions",
79 400 : &["operation", "tenant_id", "shard_id", "timeline_id"],
80 400 : )
81 400 : .expect("failed to define a metric")
82 400 : });
83 :
84 : // Buckets for background operations like compaction, GC, size calculation
85 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
86 :
87 400 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
88 400 : register_histogram_vec!(
89 400 : "pageserver_storage_operations_seconds_global",
90 400 : "Time spent on storage operations",
91 400 : &["operation"],
92 400 : STORAGE_OP_BUCKETS.into(),
93 400 : )
94 400 : .expect("failed to define a metric")
95 400 : });
96 :
97 392 : pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
98 392 : register_histogram!(
99 392 : "pageserver_layers_visited_per_vectored_read_global",
100 392 : "Average number of layers visited to reconstruct one key",
101 392 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
102 392 : )
103 392 : .expect("failed to define a metric")
104 392 : });
105 :
106 0 : pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
107 0 : register_uint_gauge!(
108 0 : "pageserver_concurrent_initdb",
109 0 : "Number of initdb processes running"
110 0 : )
111 0 : .expect("failed to define a metric")
112 0 : });
113 :
114 0 : pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
115 0 : register_histogram!(
116 0 : "pageserver_initdb_semaphore_seconds_global",
117 0 : "Time spent getting a permit from the global initdb semaphore",
118 0 : STORAGE_OP_BUCKETS.into()
119 0 : )
120 0 : .expect("failed to define metric")
121 0 : });
122 :
123 0 : pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
124 0 : register_histogram!(
125 0 : "pageserver_initdb_seconds_global",
126 0 : "Time spent performing initdb",
127 0 : STORAGE_OP_BUCKETS.into()
128 0 : )
129 0 : .expect("failed to define metric")
130 0 : });
131 :
132 : pub(crate) struct GetVectoredLatency {
133 : map: EnumMap<TaskKind, Option<Histogram>>,
134 : }
135 :
136 : #[allow(dead_code)]
137 : pub(crate) struct ScanLatency {
138 : map: EnumMap<TaskKind, Option<Histogram>>,
139 : }
140 :
141 : impl GetVectoredLatency {
142 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
143 : // cardinality of the metric.
144 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
145 :
146 39356 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
147 39356 : self.map[task_kind].as_ref()
148 39356 : }
149 : }
150 :
151 : impl ScanLatency {
152 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
153 : // cardinality of the metric.
154 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
155 :
156 24 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
157 24 : self.map[task_kind].as_ref()
158 24 : }
159 : }
160 :
161 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
162 : parent: &'a Histogram,
163 : start: std::time::Instant,
164 : }
165 :
166 : impl<'a> ScanLatencyOngoingRecording<'a> {
167 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
168 0 : let start = Instant::now();
169 0 : ScanLatencyOngoingRecording { parent, start }
170 0 : }
171 :
172 0 : pub(crate) fn observe(self) {
173 0 : let elapsed = self.start.elapsed();
174 0 : self.parent.observe(elapsed.as_secs_f64());
175 0 : }
176 : }
177 :
178 384 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
179 384 : let inner = register_histogram_vec!(
180 384 : "pageserver_get_vectored_seconds",
181 384 : "Time spent in get_vectored.",
182 384 : &["task_kind"],
183 384 : CRITICAL_OP_BUCKETS.into(),
184 384 : )
185 384 : .expect("failed to define a metric");
186 384 :
187 384 : GetVectoredLatency {
188 11904 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
189 11904 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
190 11904 :
191 11904 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
192 768 : let task_kind = task_kind.into();
193 768 : Some(inner.with_label_values(&[task_kind]))
194 : } else {
195 11136 : None
196 : }
197 11904 : })),
198 384 : }
199 384 : });
200 :
201 8 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
202 8 : let inner = register_histogram_vec!(
203 8 : "pageserver_scan_seconds",
204 8 : "Time spent in scan.",
205 8 : &["task_kind"],
206 8 : CRITICAL_OP_BUCKETS.into(),
207 8 : )
208 8 : .expect("failed to define a metric");
209 8 :
210 8 : ScanLatency {
211 248 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
212 248 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
213 248 :
214 248 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
215 8 : let task_kind = task_kind.into();
216 8 : Some(inner.with_label_values(&[task_kind]))
217 : } else {
218 240 : None
219 : }
220 248 : })),
221 8 : }
222 8 : });
223 :
224 : pub(crate) struct PageCacheMetricsForTaskKind {
225 : pub read_accesses_immutable: IntCounter,
226 : pub read_hits_immutable: IntCounter,
227 : }
228 :
229 : pub(crate) struct PageCacheMetrics {
230 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
231 : }
232 :
233 184 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
234 184 : register_int_counter_vec!(
235 184 : "pageserver_page_cache_read_hits_total",
236 184 : "Number of read accesses to the page cache that hit",
237 184 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
238 184 : )
239 184 : .expect("failed to define a metric")
240 184 : });
241 :
242 184 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
243 184 : register_int_counter_vec!(
244 184 : "pageserver_page_cache_read_accesses_total",
245 184 : "Number of read accesses to the page cache",
246 184 : &["task_kind", "key_kind", "content_kind"]
247 184 : )
248 184 : .expect("failed to define a metric")
249 184 : });
250 :
251 184 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
252 5704 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
253 5704 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
254 5704 : let task_kind: &'static str = task_kind.into();
255 45632 : EnumMap::from_array(std::array::from_fn(|content_kind| {
256 45632 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
257 45632 : let content_kind: &'static str = content_kind.into();
258 45632 : PageCacheMetricsForTaskKind {
259 45632 : read_accesses_immutable: {
260 45632 : PAGE_CACHE_READ_ACCESSES
261 45632 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
262 45632 : .unwrap()
263 45632 : },
264 45632 :
265 45632 : read_hits_immutable: {
266 45632 : PAGE_CACHE_READ_HITS
267 45632 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
268 45632 : .unwrap()
269 45632 : },
270 45632 : }
271 45632 : }))
272 5704 : })),
273 184 : });
274 :
275 : impl PageCacheMetrics {
276 1945038 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
277 1945038 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
278 1945038 : }
279 : }
280 :
281 : pub(crate) struct PageCacheSizeMetrics {
282 : pub max_bytes: UIntGauge,
283 :
284 : pub current_bytes_immutable: UIntGauge,
285 : }
286 :
287 184 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
288 184 : register_uint_gauge_vec!(
289 184 : "pageserver_page_cache_size_current_bytes",
290 184 : "Current size of the page cache in bytes, by key kind",
291 184 : &["key_kind"]
292 184 : )
293 184 : .expect("failed to define a metric")
294 184 : });
295 :
296 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
297 184 : Lazy::new(|| PageCacheSizeMetrics {
298 184 : max_bytes: {
299 184 : register_uint_gauge!(
300 184 : "pageserver_page_cache_size_max_bytes",
301 184 : "Maximum size of the page cache in bytes"
302 184 : )
303 184 : .expect("failed to define a metric")
304 184 : },
305 184 : current_bytes_immutable: {
306 184 : PAGE_CACHE_SIZE_CURRENT_BYTES
307 184 : .get_metric_with_label_values(&["immutable"])
308 184 : .unwrap()
309 184 : },
310 184 : });
311 :
312 : pub(crate) mod page_cache_eviction_metrics {
313 : use std::num::NonZeroUsize;
314 :
315 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
316 : use once_cell::sync::Lazy;
317 :
318 : #[derive(Clone, Copy)]
319 : pub(crate) enum Outcome {
320 : FoundSlotUnused { iters: NonZeroUsize },
321 : FoundSlotEvicted { iters: NonZeroUsize },
322 : ItersExceeded { iters: NonZeroUsize },
323 : }
324 :
325 184 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
326 184 : register_int_counter_vec!(
327 184 : "pageserver_page_cache_find_victim_iters_total",
328 184 : "Counter for the number of iterations in the find_victim loop",
329 184 : &["outcome"],
330 184 : )
331 184 : .expect("failed to define a metric")
332 184 : });
333 :
334 184 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
335 184 : register_int_counter_vec!(
336 184 : "pageserver_page_cache_find_victim_calls",
337 184 : "Incremented at the end of each find_victim() call.\
338 184 : Filter by outcome to get e.g., eviction rate.",
339 184 : &["outcome"]
340 184 : )
341 184 : .unwrap()
342 184 : });
343 :
344 64376 : pub(crate) fn observe(outcome: Outcome) {
345 : macro_rules! dry {
346 : ($label:literal, $iters:expr) => {{
347 : static LABEL: &'static str = $label;
348 : static ITERS_TOTAL: Lazy<IntCounter> =
349 224 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
350 : static CALLS: Lazy<IntCounter> =
351 224 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
352 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
353 : CALLS.inc();
354 : }};
355 : }
356 64376 : match outcome {
357 3272 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
358 61104 : Outcome::FoundSlotEvicted { iters } => {
359 61104 : dry!("found_evicted", iters)
360 : }
361 0 : Outcome::ItersExceeded { iters } => {
362 0 : dry!("err_iters_exceeded", iters);
363 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
364 0 : }
365 : }
366 64376 : }
367 : }
368 :
369 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
370 0 : register_int_counter_vec!(
371 0 : "page_cache_errors_total",
372 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
373 0 : &["error_kind"]
374 0 : )
375 0 : .expect("failed to define a metric")
376 0 : });
377 :
378 : #[derive(IntoStaticStr)]
379 : #[strum(serialize_all = "kebab_case")]
380 : pub(crate) enum PageCacheErrorKind {
381 : AcquirePinnedSlotTimeout,
382 : EvictIterLimit,
383 : }
384 :
385 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
386 0 : PAGE_CACHE_ERRORS
387 0 : .get_metric_with_label_values(&[error_kind.into()])
388 0 : .unwrap()
389 0 : .inc();
390 0 : }
391 :
392 40 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
393 40 : register_histogram!(
394 40 : "pageserver_wait_lsn_seconds",
395 40 : "Time spent waiting for WAL to arrive",
396 40 : CRITICAL_OP_BUCKETS.into(),
397 40 : )
398 40 : .expect("failed to define a metric")
399 40 : });
400 :
401 400 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
402 400 : register_int_gauge_vec!(
403 400 : "pageserver_last_record_lsn",
404 400 : "Last record LSN grouped by timeline",
405 400 : &["tenant_id", "shard_id", "timeline_id"]
406 400 : )
407 400 : .expect("failed to define a metric")
408 400 : });
409 :
410 400 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
411 400 : register_int_gauge_vec!(
412 400 : "pageserver_disk_consistent_lsn",
413 400 : "Disk consistent LSN grouped by timeline",
414 400 : &["tenant_id", "shard_id", "timeline_id"]
415 400 : )
416 400 : .expect("failed to define a metric")
417 400 : });
418 :
419 400 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
420 400 : register_uint_gauge_vec!(
421 400 : "pageserver_projected_remote_consistent_lsn",
422 400 : "Projected remote consistent LSN grouped by timeline",
423 400 : &["tenant_id", "shard_id", "timeline_id"]
424 400 : )
425 400 : .expect("failed to define a metric")
426 400 : });
427 :
428 400 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
429 400 : register_uint_gauge_vec!(
430 400 : "pageserver_pitr_history_size",
431 400 : "Data written since PITR cutoff on this timeline",
432 400 : &["tenant_id", "shard_id", "timeline_id"]
433 400 : )
434 400 : .expect("failed to define a metric")
435 400 : });
436 :
437 0 : #[derive(strum_macros::EnumString, strum_macros::Display, strum_macros::IntoStaticStr)]
438 : #[strum(serialize_all = "kebab_case")]
439 : pub(crate) enum MetricLayerKind {
440 : Delta,
441 : Image,
442 : }
443 :
444 400 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
445 400 : register_uint_gauge_vec!(
446 400 : "pageserver_layer_bytes",
447 400 : "Sum of layer physical sizes in bytes",
448 400 : &["tenant_id", "shard_id", "timeline_id", "kind"]
449 400 : )
450 400 : .expect("failed to define a metric")
451 400 : });
452 :
453 400 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
454 400 : register_uint_gauge_vec!(
455 400 : "pageserver_layer_count",
456 400 : "Number of layers that exist",
457 400 : &["tenant_id", "shard_id", "timeline_id", "kind"]
458 400 : )
459 400 : .expect("failed to define a metric")
460 400 : });
461 :
462 400 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
463 400 : register_uint_gauge_vec!(
464 400 : "pageserver_archive_size",
465 400 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
466 400 : &["tenant_id", "shard_id", "timeline_id"]
467 400 : )
468 400 : .expect("failed to define a metric")
469 400 : });
470 :
471 400 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
472 400 : register_int_gauge_vec!(
473 400 : "pageserver_standby_horizon",
474 400 : "Standby apply LSN for which GC is hold off, by timeline.",
475 400 : &["tenant_id", "shard_id", "timeline_id"]
476 400 : )
477 400 : .expect("failed to define a metric")
478 400 : });
479 :
480 400 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
481 400 : register_uint_gauge_vec!(
482 400 : "pageserver_resident_physical_size",
483 400 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
484 400 : &["tenant_id", "shard_id", "timeline_id"]
485 400 : )
486 400 : .expect("failed to define a metric")
487 400 : });
488 :
489 400 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
490 400 : register_uint_gauge_vec!(
491 400 : "pageserver_visible_physical_size",
492 400 : "The size of the layer files present in the pageserver's filesystem.",
493 400 : &["tenant_id", "shard_id", "timeline_id"]
494 400 : )
495 400 : .expect("failed to define a metric")
496 400 : });
497 :
498 392 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
499 392 : register_uint_gauge!(
500 392 : "pageserver_resident_physical_size_global",
501 392 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
502 392 : )
503 392 : .expect("failed to define a metric")
504 392 : });
505 :
506 400 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
507 400 : register_uint_gauge_vec!(
508 400 : "pageserver_remote_physical_size",
509 400 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
510 400 : // Corollary: If any files are missing from the index part, they won't be included here.
511 400 : &["tenant_id", "shard_id", "timeline_id"]
512 400 : )
513 400 : .expect("failed to define a metric")
514 400 : });
515 :
516 400 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
517 400 : register_uint_gauge!(
518 400 : "pageserver_remote_physical_size_global",
519 400 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
520 400 : )
521 400 : .expect("failed to define a metric")
522 400 : });
523 :
524 8 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
525 8 : register_int_counter!(
526 8 : "pageserver_remote_ondemand_downloaded_layers_total",
527 8 : "Total on-demand downloaded layers"
528 8 : )
529 8 : .unwrap()
530 8 : });
531 :
532 8 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
533 8 : register_int_counter!(
534 8 : "pageserver_remote_ondemand_downloaded_bytes_total",
535 8 : "Total bytes of layers on-demand downloaded",
536 8 : )
537 8 : .unwrap()
538 8 : });
539 :
540 400 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
541 400 : register_uint_gauge_vec!(
542 400 : "pageserver_current_logical_size",
543 400 : "Current logical size grouped by timeline",
544 400 : &["tenant_id", "shard_id", "timeline_id"]
545 400 : )
546 400 : .expect("failed to define current logical size metric")
547 400 : });
548 :
549 400 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
550 400 : register_int_gauge_vec!(
551 400 : "pageserver_aux_file_estimated_size",
552 400 : "The size of all aux files for a timeline in aux file v2 store.",
553 400 : &["tenant_id", "shard_id", "timeline_id"]
554 400 : )
555 400 : .expect("failed to define a metric")
556 400 : });
557 :
558 400 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
559 400 : register_uint_gauge_vec!(
560 400 : "pageserver_valid_lsn_lease_count",
561 400 : "The number of valid leases after refreshing gc info.",
562 400 : &["tenant_id", "shard_id", "timeline_id"],
563 400 : )
564 400 : .expect("failed to define a metric")
565 400 : });
566 :
567 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
568 0 : register_int_counter!(
569 0 : "pageserver_circuit_breaker_broken",
570 0 : "How many times a circuit breaker has broken"
571 0 : )
572 0 : .expect("failed to define a metric")
573 0 : });
574 :
575 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
576 0 : register_int_counter!(
577 0 : "pageserver_circuit_breaker_unbroken",
578 0 : "How many times a circuit breaker has been un-broken (recovered)"
579 0 : )
580 0 : .expect("failed to define a metric")
581 0 : });
582 :
583 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
584 384 : register_int_counter!(
585 384 : "pageserver_compression_image_in_bytes_total",
586 384 : "Size of data written into image layers before compression"
587 384 : )
588 384 : .expect("failed to define a metric")
589 384 : });
590 :
591 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
592 384 : register_int_counter!(
593 384 : "pageserver_compression_image_in_bytes_considered",
594 384 : "Size of potentially compressible data written into image layers before compression"
595 384 : )
596 384 : .expect("failed to define a metric")
597 384 : });
598 :
599 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
600 384 : register_int_counter!(
601 384 : "pageserver_compression_image_in_bytes_chosen",
602 384 : "Size of data whose compressed form was written into image layers"
603 384 : )
604 384 : .expect("failed to define a metric")
605 384 : });
606 :
607 384 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
608 384 : register_int_counter!(
609 384 : "pageserver_compression_image_out_bytes_total",
610 384 : "Size of compressed image layer written"
611 384 : )
612 384 : .expect("failed to define a metric")
613 384 : });
614 :
615 20 : pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
616 20 : register_uint_gauge!(
617 20 : "pageserver_relsize_cache_entries",
618 20 : "Number of entries in the relation size cache",
619 20 : )
620 20 : .expect("failed to define a metric")
621 20 : });
622 :
623 20 : pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
624 20 : register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
625 20 : .expect("failed to define a metric")
626 20 : });
627 :
628 20 : pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
629 20 : register_int_counter!(
630 20 : "pageserver_relsize_cache_misses",
631 20 : "Relation size cache misses",
632 20 : )
633 20 : .expect("failed to define a metric")
634 20 : });
635 :
636 8 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
637 8 : register_int_counter!(
638 8 : "pageserver_relsize_cache_misses_old",
639 8 : "Relation size cache misses where the lookup LSN is older than the last relation update"
640 8 : )
641 8 : .expect("failed to define a metric")
642 8 : });
643 :
644 : pub(crate) mod initial_logical_size {
645 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
646 : use once_cell::sync::Lazy;
647 :
648 : pub(crate) struct StartCalculation(IntCounterVec);
649 400 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
650 400 : StartCalculation(
651 400 : register_int_counter_vec!(
652 400 : "pageserver_initial_logical_size_start_calculation",
653 400 : "Incremented each time we start an initial logical size calculation attempt. \
654 400 : The `circumstances` label provides some additional details.",
655 400 : &["attempt", "circumstances"]
656 400 : )
657 400 : .unwrap(),
658 400 : )
659 400 : });
660 :
661 : struct DropCalculation {
662 : first: IntCounter,
663 : retry: IntCounter,
664 : }
665 :
666 400 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
667 400 : let vec = register_int_counter_vec!(
668 400 : "pageserver_initial_logical_size_drop_calculation",
669 400 : "Incremented each time we abort a started size calculation attmpt.",
670 400 : &["attempt"]
671 400 : )
672 400 : .unwrap();
673 400 : DropCalculation {
674 400 : first: vec.with_label_values(&["first"]),
675 400 : retry: vec.with_label_values(&["retry"]),
676 400 : }
677 400 : });
678 :
679 : pub(crate) struct Calculated {
680 : pub(crate) births: IntCounter,
681 : pub(crate) deaths: IntCounter,
682 : }
683 :
684 400 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
685 400 : births: register_int_counter!(
686 400 : "pageserver_initial_logical_size_finish_calculation",
687 400 : "Incremented every time we finish calculation of initial logical size.\
688 400 : If everything is working well, this should happen at most once per Timeline object."
689 400 : )
690 400 : .unwrap(),
691 400 : deaths: register_int_counter!(
692 400 : "pageserver_initial_logical_size_drop_finished_calculation",
693 400 : "Incremented when we drop a finished initial logical size calculation result.\
694 400 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
695 400 : )
696 400 : .unwrap(),
697 400 : });
698 :
699 : pub(crate) struct OngoingCalculationGuard {
700 : inc_drop_calculation: Option<IntCounter>,
701 : }
702 :
703 : #[derive(strum_macros::IntoStaticStr)]
704 : pub(crate) enum StartCircumstances {
705 : EmptyInitial,
706 : SkippedConcurrencyLimiter,
707 : AfterBackgroundTasksRateLimit,
708 : }
709 :
710 : impl StartCalculation {
711 424 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
712 424 : let circumstances_label: &'static str = circumstances.into();
713 424 : self.0
714 424 : .with_label_values(&["first", circumstances_label])
715 424 : .inc();
716 424 : OngoingCalculationGuard {
717 424 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
718 424 : }
719 424 : }
720 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
721 0 : let circumstances_label: &'static str = circumstances.into();
722 0 : self.0
723 0 : .with_label_values(&["retry", circumstances_label])
724 0 : .inc();
725 0 : OngoingCalculationGuard {
726 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
727 0 : }
728 0 : }
729 : }
730 :
731 : impl Drop for OngoingCalculationGuard {
732 424 : fn drop(&mut self) {
733 424 : if let Some(counter) = self.inc_drop_calculation.take() {
734 0 : counter.inc();
735 424 : }
736 424 : }
737 : }
738 :
739 : impl OngoingCalculationGuard {
740 424 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
741 424 : drop(self.inc_drop_calculation.take());
742 424 : CALCULATED.births.inc();
743 424 : FinishedCalculationGuard {
744 424 : inc_on_drop: CALCULATED.deaths.clone(),
745 424 : }
746 424 : }
747 : }
748 :
749 : pub(crate) struct FinishedCalculationGuard {
750 : inc_on_drop: IntCounter,
751 : }
752 :
753 : impl Drop for FinishedCalculationGuard {
754 12 : fn drop(&mut self) {
755 12 : self.inc_on_drop.inc();
756 12 : }
757 : }
758 :
759 : // context: https://github.com/neondatabase/neon/issues/5963
760 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
761 0 : Lazy::new(|| {
762 0 : register_int_counter!(
763 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
764 0 : "Counter for the following event: walreceiver calls\
765 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
766 0 : )
767 0 : .unwrap()
768 0 : });
769 : }
770 :
771 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
772 0 : register_uint_gauge_vec!(
773 0 : "pageserver_directory_entries_count",
774 0 : "Sum of the entries in pageserver-stored directory listings",
775 0 : &["tenant_id", "shard_id", "timeline_id"]
776 0 : )
777 0 : .expect("failed to define a metric")
778 0 : });
779 :
780 404 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
781 404 : register_uint_gauge_vec!(
782 404 : "pageserver_tenant_states_count",
783 404 : "Count of tenants per state",
784 404 : &["state"]
785 404 : )
786 404 : .expect("Failed to register pageserver_tenant_states_count metric")
787 404 : });
788 :
789 : /// A set of broken tenants.
790 : ///
791 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
792 : /// tenant.
793 20 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
794 20 : register_uint_gauge_vec!(
795 20 : "pageserver_broken_tenants_count",
796 20 : "Set of broken tenants",
797 20 : &["tenant_id", "shard_id"]
798 20 : )
799 20 : .expect("Failed to register pageserver_tenant_states_count metric")
800 20 : });
801 :
802 12 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
803 12 : register_uint_gauge_vec!(
804 12 : "pageserver_tenant_synthetic_cached_size_bytes",
805 12 : "Synthetic size of each tenant in bytes",
806 12 : &["tenant_id"]
807 12 : )
808 12 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
809 12 : });
810 :
811 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
812 0 : register_histogram_vec!(
813 0 : "pageserver_eviction_iteration_duration_seconds_global",
814 0 : "Time spent on a single eviction iteration",
815 0 : &["period_secs", "threshold_secs"],
816 0 : STORAGE_OP_BUCKETS.into(),
817 0 : )
818 0 : .expect("failed to define a metric")
819 0 : });
820 :
821 400 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
822 400 : register_int_counter_vec!(
823 400 : "pageserver_evictions",
824 400 : "Number of layers evicted from the pageserver",
825 400 : &["tenant_id", "shard_id", "timeline_id"]
826 400 : )
827 400 : .expect("failed to define a metric")
828 400 : });
829 :
830 400 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
831 400 : register_int_counter_vec!(
832 400 : "pageserver_evictions_with_low_residence_duration",
833 400 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
834 400 : Residence duration is determined using the `residence_duration_data_source`.",
835 400 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
836 400 : )
837 400 : .expect("failed to define a metric")
838 400 : });
839 :
840 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
841 0 : register_int_counter!(
842 0 : "pageserver_unexpected_ondemand_downloads_count",
843 0 : "Number of unexpected on-demand downloads. \
844 0 : We log more context for each increment, so, forgo any labels in this metric.",
845 0 : )
846 0 : .expect("failed to define a metric")
847 0 : });
848 :
849 : /// How long did we take to start up? Broken down by labels to describe
850 : /// different phases of startup.
851 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
852 0 : register_gauge_vec!(
853 0 : "pageserver_startup_duration_seconds",
854 0 : "Time taken by phases of pageserver startup, in seconds",
855 0 : &["phase"]
856 0 : )
857 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
858 0 : });
859 :
860 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
861 0 : register_uint_gauge!(
862 0 : "pageserver_startup_is_loading",
863 0 : "1 while in initial startup load of tenants, 0 at other times"
864 0 : )
865 0 : .expect("Failed to register pageserver_startup_is_loading")
866 0 : });
867 :
868 392 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
869 392 : register_uint_gauge!(
870 392 : "pageserver_timeline_ephemeral_bytes",
871 392 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
872 392 : )
873 392 : .expect("Failed to register metric")
874 392 : });
875 :
876 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
877 : /// like how long it took to load.
878 : ///
879 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
880 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
881 : /// at a timeline level than tenant level.
882 : pub(crate) struct TenantMetrics {
883 : /// How long did tenants take to go from construction to active state?
884 : pub(crate) activation: Histogram,
885 : pub(crate) preload: Histogram,
886 : pub(crate) attach: Histogram,
887 :
888 : /// How many tenants are included in the initial startup of the pagesrever?
889 : pub(crate) startup_scheduled: IntCounter,
890 : pub(crate) startup_complete: IntCounter,
891 : }
892 :
893 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
894 0 : TenantMetrics {
895 0 : activation: register_histogram!(
896 0 : "pageserver_tenant_activation_seconds",
897 0 : "Time taken by tenants to activate, in seconds",
898 0 : CRITICAL_OP_BUCKETS.into()
899 0 : )
900 0 : .expect("Failed to register metric"),
901 0 : preload: register_histogram!(
902 0 : "pageserver_tenant_preload_seconds",
903 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
904 0 : CRITICAL_OP_BUCKETS.into()
905 0 : )
906 0 : .expect("Failed to register metric"),
907 0 : attach: register_histogram!(
908 0 : "pageserver_tenant_attach_seconds",
909 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
910 0 : CRITICAL_OP_BUCKETS.into()
911 0 : )
912 0 : .expect("Failed to register metric"),
913 0 : startup_scheduled: register_int_counter!(
914 0 : "pageserver_tenant_startup_scheduled",
915 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
916 0 : ).expect("Failed to register metric"),
917 0 : startup_complete: register_int_counter!(
918 0 : "pageserver_tenant_startup_complete",
919 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
920 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
921 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
922 0 : ).expect("Failed to register metric"),
923 0 : }
924 0 : });
925 :
926 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
927 : #[derive(Debug)]
928 : pub(crate) struct EvictionsWithLowResidenceDuration {
929 : data_source: &'static str,
930 : threshold: Duration,
931 : counter: Option<IntCounter>,
932 : }
933 :
934 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
935 : data_source: &'static str,
936 : threshold: Duration,
937 : }
938 :
939 : impl EvictionsWithLowResidenceDurationBuilder {
940 892 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
941 892 : Self {
942 892 : data_source,
943 892 : threshold,
944 892 : }
945 892 : }
946 :
947 892 : fn build(
948 892 : &self,
949 892 : tenant_id: &str,
950 892 : shard_id: &str,
951 892 : timeline_id: &str,
952 892 : ) -> EvictionsWithLowResidenceDuration {
953 892 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
954 892 : .get_metric_with_label_values(&[
955 892 : tenant_id,
956 892 : shard_id,
957 892 : timeline_id,
958 892 : self.data_source,
959 892 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
960 892 : ])
961 892 : .unwrap();
962 892 : EvictionsWithLowResidenceDuration {
963 892 : data_source: self.data_source,
964 892 : threshold: self.threshold,
965 892 : counter: Some(counter),
966 892 : }
967 892 : }
968 : }
969 :
970 : impl EvictionsWithLowResidenceDuration {
971 912 : fn threshold_label_value(threshold: Duration) -> String {
972 912 : format!("{}", threshold.as_secs())
973 912 : }
974 :
975 8 : pub fn observe(&self, observed_value: Duration) {
976 8 : if observed_value < self.threshold {
977 8 : self.counter
978 8 : .as_ref()
979 8 : .expect("nobody calls this function after `remove_from_vec`")
980 8 : .inc();
981 8 : }
982 8 : }
983 :
984 0 : pub fn change_threshold(
985 0 : &mut self,
986 0 : tenant_id: &str,
987 0 : shard_id: &str,
988 0 : timeline_id: &str,
989 0 : new_threshold: Duration,
990 0 : ) {
991 0 : if new_threshold == self.threshold {
992 0 : return;
993 0 : }
994 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
995 0 : self.data_source,
996 0 : new_threshold,
997 0 : )
998 0 : .build(tenant_id, shard_id, timeline_id);
999 0 : std::mem::swap(self, &mut with_new);
1000 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1001 0 : }
1002 :
1003 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1004 20 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1005 20 : let Some(_counter) = self.counter.take() else {
1006 0 : return;
1007 : };
1008 :
1009 20 : let threshold = Self::threshold_label_value(self.threshold);
1010 20 :
1011 20 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1012 20 : tenant_id,
1013 20 : shard_id,
1014 20 : timeline_id,
1015 20 : self.data_source,
1016 20 : &threshold,
1017 20 : ]);
1018 20 :
1019 20 : match removed {
1020 0 : Err(e) => {
1021 0 : // this has been hit in staging as
1022 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1023 0 : // because we can be in the drop path already, don't risk:
1024 0 : // - "double-panic => illegal instruction" or
1025 0 : // - future "drop panick => abort"
1026 0 : //
1027 0 : // so just nag: (the error has the labels)
1028 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
1029 : }
1030 : Ok(()) => {
1031 : // to help identify cases where we double-remove the same values, let's log all
1032 : // deletions?
1033 20 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
1034 : }
1035 : }
1036 20 : }
1037 : }
1038 :
1039 : // Metrics collected on disk IO operations
1040 : //
1041 : // Roughly logarithmic scale.
1042 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1043 : 0.000030, // 30 usec
1044 : 0.001000, // 1000 usec
1045 : 0.030, // 30 ms
1046 : 1.000, // 1000 ms
1047 : 30.000, // 30000 ms
1048 : ];
1049 :
1050 : /// VirtualFile fs operation variants.
1051 : ///
1052 : /// Operations:
1053 : /// - open ([`std::fs::OpenOptions::open`])
1054 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1055 : /// - close-by-replace (close by replacement algorithm)
1056 : /// - read (`read_at`)
1057 : /// - write (`write_at`)
1058 : /// - seek (modify internal position or file length query)
1059 : /// - fsync ([`std::fs::File::sync_all`])
1060 : /// - metadata ([`std::fs::File::metadata`])
1061 : #[derive(
1062 0 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1063 : )]
1064 : pub(crate) enum StorageIoOperation {
1065 : Open,
1066 : OpenAfterReplace,
1067 : Close,
1068 : CloseByReplace,
1069 : Read,
1070 : Write,
1071 : Seek,
1072 : Fsync,
1073 : Metadata,
1074 : }
1075 :
1076 : impl StorageIoOperation {
1077 4140 : pub fn as_str(&self) -> &'static str {
1078 4140 : match self {
1079 460 : StorageIoOperation::Open => "open",
1080 460 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1081 460 : StorageIoOperation::Close => "close",
1082 460 : StorageIoOperation::CloseByReplace => "close-by-replace",
1083 460 : StorageIoOperation::Read => "read",
1084 460 : StorageIoOperation::Write => "write",
1085 460 : StorageIoOperation::Seek => "seek",
1086 460 : StorageIoOperation::Fsync => "fsync",
1087 460 : StorageIoOperation::Metadata => "metadata",
1088 : }
1089 4140 : }
1090 : }
1091 :
1092 : /// Tracks time taken by fs operations near VirtualFile.
1093 : #[derive(Debug)]
1094 : pub(crate) struct StorageIoTime {
1095 : metrics: [Histogram; StorageIoOperation::COUNT],
1096 : }
1097 :
1098 : impl StorageIoTime {
1099 460 : fn new() -> Self {
1100 460 : let storage_io_histogram_vec = register_histogram_vec!(
1101 460 : "pageserver_io_operations_seconds",
1102 460 : "Time spent in IO operations",
1103 460 : &["operation"],
1104 460 : STORAGE_IO_TIME_BUCKETS.into()
1105 460 : )
1106 460 : .expect("failed to define a metric");
1107 4140 : let metrics = std::array::from_fn(|i| {
1108 4140 : let op = StorageIoOperation::from_repr(i).unwrap();
1109 4140 : storage_io_histogram_vec
1110 4140 : .get_metric_with_label_values(&[op.as_str()])
1111 4140 : .unwrap()
1112 4140 : });
1113 460 : Self { metrics }
1114 460 : }
1115 :
1116 4026108 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1117 4026108 : &self.metrics[op as usize]
1118 4026108 : }
1119 : }
1120 :
1121 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1122 :
1123 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
1124 :
1125 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1126 452 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1127 452 : register_int_gauge_vec!(
1128 452 : "pageserver_io_operations_bytes_total",
1129 452 : "Total amount of bytes read/written in IO operations",
1130 452 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1131 452 : )
1132 452 : .expect("failed to define a metric")
1133 452 : });
1134 :
1135 : #[cfg(not(test))]
1136 : pub(crate) mod virtual_file_descriptor_cache {
1137 : use super::*;
1138 :
1139 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1140 0 : register_uint_gauge!(
1141 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1142 0 : "Maximum number of open file descriptors in the cache."
1143 0 : )
1144 0 : .unwrap()
1145 0 : });
1146 :
1147 : // SIZE_CURRENT: derive it like so:
1148 : // ```
1149 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1150 : // -ignoring(operation)
1151 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1152 : // ```
1153 : }
1154 :
1155 : #[cfg(not(test))]
1156 : pub(crate) mod virtual_file_io_engine {
1157 : use super::*;
1158 :
1159 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1160 0 : register_uint_gauge_vec!(
1161 0 : "pageserver_virtual_file_io_engine_kind",
1162 0 : "The configured io engine for VirtualFile",
1163 0 : &["kind"],
1164 0 : )
1165 0 : .unwrap()
1166 0 : });
1167 : }
1168 :
1169 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1170 : pub(crate) struct SmgrOpTimerInner {
1171 : global_execution_latency_histo: Histogram,
1172 : per_timeline_execution_latency_histo: Option<Histogram>,
1173 :
1174 : global_batch_wait_time: Histogram,
1175 : per_timeline_batch_wait_time: Histogram,
1176 :
1177 : global_flush_in_progress_micros: IntCounter,
1178 : per_timeline_flush_in_progress_micros: IntCounter,
1179 :
1180 : throttling: Arc<tenant_throttling::Pagestream>,
1181 :
1182 : timings: SmgrOpTimerState,
1183 : }
1184 :
1185 : /// The stages of request processing are represented by the enum variants.
1186 : /// Used as part of [`SmgrOpTimerInner::timings`].
1187 : ///
1188 : /// Request processing calls into the `SmgrOpTimer::observe_*` methods at the
1189 : /// transition points.
1190 : /// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]
1191 : /// to the next state.
1192 : ///
1193 : /// Each request goes through every stage, in all configurations.
1194 : ///
1195 : #[derive(Debug)]
1196 : enum SmgrOpTimerState {
1197 : Received {
1198 : // In the future, we may want to track the full time the request spent
1199 : // inside pageserver process (time spent in kernel buffers can't be tracked).
1200 : // `received_at` would be used for that.
1201 : #[allow(dead_code)]
1202 : received_at: Instant,
1203 : },
1204 : Throttling {
1205 : throttle_started_at: Instant,
1206 : },
1207 : Batching {
1208 : throttle_done_at: Instant,
1209 : },
1210 : Executing {
1211 : execution_started_at: Instant,
1212 : },
1213 : Flushing,
1214 : // NB: when adding observation points, remember to update the Drop impl.
1215 : }
1216 :
1217 : // NB: when adding observation points, remember to update the Drop impl.
1218 : impl SmgrOpTimer {
1219 : /// See [`SmgrOpTimerState`] for more context.
1220 0 : pub(crate) fn observe_throttle_start(&mut self, at: Instant) {
1221 0 : let Some(inner) = self.0.as_mut() else {
1222 0 : return;
1223 : };
1224 0 : let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {
1225 0 : return;
1226 : };
1227 0 : inner.throttling.count_accounted_start.inc();
1228 0 : inner.timings = SmgrOpTimerState::Throttling {
1229 0 : throttle_started_at: at,
1230 0 : };
1231 0 : }
1232 :
1233 : /// See [`SmgrOpTimerState`] for more context.
1234 0 : pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {
1235 0 : let Some(inner) = self.0.as_mut() else {
1236 0 : return;
1237 : };
1238 : let SmgrOpTimerState::Throttling {
1239 0 : throttle_started_at,
1240 0 : } = &inner.timings
1241 : else {
1242 0 : return;
1243 : };
1244 0 : inner.throttling.count_accounted_finish.inc();
1245 0 : match throttle {
1246 0 : ThrottleResult::NotThrottled { end } => {
1247 0 : inner.timings = SmgrOpTimerState::Batching {
1248 0 : throttle_done_at: end,
1249 0 : };
1250 0 : }
1251 0 : ThrottleResult::Throttled { end } => {
1252 0 : // update metrics
1253 0 : inner.throttling.count_throttled.inc();
1254 0 : inner
1255 0 : .throttling
1256 0 : .wait_time
1257 0 : .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());
1258 0 : // state transition
1259 0 : inner.timings = SmgrOpTimerState::Batching {
1260 0 : throttle_done_at: end,
1261 0 : };
1262 0 : }
1263 : }
1264 0 : }
1265 :
1266 : /// See [`SmgrOpTimerState`] for more context.
1267 0 : pub(crate) fn observe_execution_start(&mut self, at: Instant) {
1268 0 : let Some(inner) = self.0.as_mut() else {
1269 0 : return;
1270 : };
1271 0 : let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {
1272 0 : return;
1273 : };
1274 : // update metrics
1275 0 : let batch = at - *throttle_done_at;
1276 0 : inner.global_batch_wait_time.observe(batch.as_secs_f64());
1277 0 : inner
1278 0 : .per_timeline_batch_wait_time
1279 0 : .observe(batch.as_secs_f64());
1280 0 : // state transition
1281 0 : inner.timings = SmgrOpTimerState::Executing {
1282 0 : execution_started_at: at,
1283 0 : }
1284 0 : }
1285 :
1286 : /// For all but the first caller, this is a no-op.
1287 : /// The first callers receives Some, subsequent ones None.
1288 : ///
1289 : /// See [`SmgrOpTimerState`] for more context.
1290 0 : pub(crate) fn observe_execution_end_flush_start(
1291 0 : &mut self,
1292 0 : at: Instant,
1293 0 : ) -> Option<SmgrOpFlushInProgress> {
1294 : // NB: unlike the other observe_* methods, this one take()s.
1295 : #[allow(clippy::question_mark)] // maintain similar code pattern.
1296 0 : let Some(mut inner) = self.0.take() else {
1297 0 : return None;
1298 : };
1299 : let SmgrOpTimerState::Executing {
1300 0 : execution_started_at,
1301 0 : } = &inner.timings
1302 : else {
1303 0 : return None;
1304 : };
1305 : // update metrics
1306 0 : let execution = at - *execution_started_at;
1307 0 : inner
1308 0 : .global_execution_latency_histo
1309 0 : .observe(execution.as_secs_f64());
1310 0 : if let Some(per_timeline_execution_latency_histo) =
1311 0 : &inner.per_timeline_execution_latency_histo
1312 0 : {
1313 0 : per_timeline_execution_latency_histo.observe(execution.as_secs_f64());
1314 0 : }
1315 :
1316 : // state transition
1317 0 : inner.timings = SmgrOpTimerState::Flushing;
1318 0 :
1319 0 : // return the flush in progress object which
1320 0 : // will do the remaining metrics updates
1321 0 : let SmgrOpTimerInner {
1322 0 : global_flush_in_progress_micros,
1323 0 : per_timeline_flush_in_progress_micros,
1324 0 : ..
1325 0 : } = inner;
1326 0 : Some(SmgrOpFlushInProgress {
1327 0 : flush_started_at: at,
1328 0 : global_micros: global_flush_in_progress_micros,
1329 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1330 0 : })
1331 0 : }
1332 : }
1333 :
1334 : /// The last stage of request processing is serializing and flushing the request
1335 : /// into the TCP connection. We want to make slow flushes observable
1336 : /// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]
1337 : /// to periodically bump the metric.
1338 : ///
1339 : /// If in the future we decide that we're not interested in live updates, we can
1340 : /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
1341 : /// and remove this struct from the code base.
1342 : pub(crate) struct SmgrOpFlushInProgress {
1343 : flush_started_at: Instant,
1344 : global_micros: IntCounter,
1345 : per_timeline_micros: IntCounter,
1346 : }
1347 :
1348 : impl Drop for SmgrOpTimer {
1349 0 : fn drop(&mut self) {
1350 0 : // In case of early drop, update any of the remaining metrics with
1351 0 : // observations so that (started,finished) counter pairs balance out
1352 0 : // and all counters on the latency path have the the same number of
1353 0 : // observations.
1354 0 : // It's technically lying and it would be better if each metric had
1355 0 : // a separate label or similar for cancelled requests.
1356 0 : // But we don't have that right now and counter pairs balancing
1357 0 : // out is useful when using the metrics in panels and whatnot.
1358 0 : let now = Instant::now();
1359 0 : self.observe_throttle_start(now);
1360 0 : self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
1361 0 : self.observe_execution_start(now);
1362 0 : self.observe_execution_end_flush_start(now);
1363 0 : }
1364 : }
1365 :
1366 : impl SmgrOpFlushInProgress {
1367 0 : pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
1368 0 : where
1369 0 : Fut: std::future::Future<Output = O>,
1370 0 : {
1371 0 : let mut fut = std::pin::pin!(fut);
1372 0 :
1373 0 : // Whenever observe_guard gets called, or dropped,
1374 0 : // it adds the time elapsed since its last call to metrics.
1375 0 : // Last call is tracked in `now`.
1376 0 : let mut observe_guard = scopeguard::guard(
1377 0 : || {
1378 0 : let now = Instant::now();
1379 0 : let elapsed = now - self.flush_started_at;
1380 0 : self.global_micros
1381 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1382 0 : self.per_timeline_micros
1383 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1384 0 : self.flush_started_at = now;
1385 0 : },
1386 0 : |mut observe| {
1387 0 : observe();
1388 0 : },
1389 0 : );
1390 :
1391 : loop {
1392 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1393 0 : Ok(v) => return v,
1394 0 : Err(_timeout) => {
1395 0 : (*observe_guard)();
1396 0 : }
1397 : }
1398 : }
1399 0 : }
1400 : }
1401 :
1402 : #[derive(
1403 : Debug,
1404 : Clone,
1405 : Copy,
1406 : IntoStaticStr,
1407 : strum_macros::EnumCount,
1408 0 : strum_macros::EnumIter,
1409 : strum_macros::FromRepr,
1410 : enum_map::Enum,
1411 : )]
1412 : #[strum(serialize_all = "snake_case")]
1413 : pub enum SmgrQueryType {
1414 : GetRelExists,
1415 : GetRelSize,
1416 : GetPageAtLsn,
1417 : GetDbSize,
1418 : GetSlruSegment,
1419 : #[cfg(feature = "testing")]
1420 : Test,
1421 : }
1422 :
1423 : pub(crate) struct SmgrQueryTimePerTimeline {
1424 : global_started: [IntCounter; SmgrQueryType::COUNT],
1425 : global_latency: [Histogram; SmgrQueryType::COUNT],
1426 : per_timeline_getpage_started: IntCounter,
1427 : per_timeline_getpage_latency: Histogram,
1428 : global_batch_size: Histogram,
1429 : per_timeline_batch_size: Histogram,
1430 : global_flush_in_progress_micros: IntCounter,
1431 : per_timeline_flush_in_progress_micros: IntCounter,
1432 : global_batch_wait_time: Histogram,
1433 : per_timeline_batch_wait_time: Histogram,
1434 : throttling: Arc<tenant_throttling::Pagestream>,
1435 : }
1436 :
1437 400 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1438 400 : register_int_counter_vec!(
1439 400 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1440 400 : "pageserver_smgr_query_started_global_count",
1441 400 : "Number of smgr queries started, aggregated by query type.",
1442 400 : &["smgr_query_type"],
1443 400 : )
1444 400 : .expect("failed to define a metric")
1445 400 : });
1446 :
1447 400 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1448 400 : register_int_counter_vec!(
1449 400 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1450 400 : "pageserver_smgr_query_started_count",
1451 400 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1452 400 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1453 400 : )
1454 400 : .expect("failed to define a metric")
1455 400 : });
1456 :
1457 : // Alias so all histograms recording per-timeline smgr timings use the same buckets.
1458 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
1459 :
1460 400 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1461 400 : register_histogram_vec!(
1462 400 : "pageserver_smgr_query_seconds",
1463 400 : "Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
1464 400 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1465 400 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1466 400 : )
1467 400 : .expect("failed to define a metric")
1468 400 : });
1469 :
1470 400 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1471 400 : [
1472 400 : 1,
1473 400 : 10,
1474 400 : 20,
1475 400 : 40,
1476 400 : 60,
1477 400 : 80,
1478 400 : 100,
1479 400 : 200,
1480 400 : 300,
1481 400 : 400,
1482 400 : 500,
1483 400 : 600,
1484 400 : 700,
1485 400 : 800,
1486 400 : 900,
1487 400 : 1_000, // 1ms
1488 400 : 2_000,
1489 400 : 4_000,
1490 400 : 6_000,
1491 400 : 8_000,
1492 400 : 10_000, // 10ms
1493 400 : 20_000,
1494 400 : 40_000,
1495 400 : 60_000,
1496 400 : 80_000,
1497 400 : 100_000,
1498 400 : 200_000,
1499 400 : 400_000,
1500 400 : 600_000,
1501 400 : 800_000,
1502 400 : 1_000_000, // 1s
1503 400 : 2_000_000,
1504 400 : 4_000_000,
1505 400 : 6_000_000,
1506 400 : 8_000_000,
1507 400 : 10_000_000, // 10s
1508 400 : 20_000_000,
1509 400 : 50_000_000,
1510 400 : 100_000_000,
1511 400 : 200_000_000,
1512 400 : 1_000_000_000, // 1000s
1513 400 : ]
1514 400 : .into_iter()
1515 400 : .map(Duration::from_micros)
1516 16400 : .map(|d| d.as_secs_f64())
1517 400 : .collect()
1518 400 : });
1519 :
1520 400 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1521 400 : register_histogram_vec!(
1522 400 : "pageserver_smgr_query_seconds_global",
1523 400 : "Like pageserver_smgr_query_seconds, but aggregated to instance level.",
1524 400 : &["smgr_query_type"],
1525 400 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1526 400 : )
1527 400 : .expect("failed to define a metric")
1528 400 : });
1529 :
1530 400 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1531 400 : (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
1532 12800 : .map(|v| v.into())
1533 400 : .collect()
1534 400 : });
1535 :
1536 400 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1537 400 : register_histogram!(
1538 400 : "pageserver_page_service_batch_size_global",
1539 400 : "Batch size of pageserver page service requests",
1540 400 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1541 400 : )
1542 400 : .expect("failed to define a metric")
1543 400 : });
1544 :
1545 400 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1546 400 : let mut buckets = Vec::new();
1547 2800 : for i in 0.. {
1548 2800 : let bucket = 1 << i;
1549 2800 : if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
1550 400 : break;
1551 2400 : }
1552 2400 : buckets.push(bucket.into());
1553 : }
1554 400 : buckets
1555 400 : });
1556 :
1557 400 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1558 400 : register_histogram_vec!(
1559 400 : "pageserver_page_service_batch_size",
1560 400 : "Batch size of pageserver page service requests",
1561 400 : &["tenant_id", "shard_id", "timeline_id"],
1562 400 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1563 400 : )
1564 400 : .expect("failed to define a metric")
1565 400 : });
1566 :
1567 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1568 0 : register_int_gauge_vec!(
1569 0 : "pageserver_page_service_config_max_batch_size",
1570 0 : "Configured maximum batch size for the server-side batching functionality of page_service. \
1571 0 : Labels expose more of the configuration parameters.",
1572 0 : &["mode", "execution"]
1573 0 : )
1574 0 : .expect("failed to define a metric")
1575 0 : });
1576 :
1577 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
1578 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
1579 0 : let (label_values, value) = match conf {
1580 0 : PageServicePipeliningConfig::Serial => (["serial", "-"], 1),
1581 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
1582 0 : max_batch_size,
1583 0 : execution,
1584 0 : }) => {
1585 0 : let mode = "pipelined";
1586 0 : let execution = match execution {
1587 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
1588 0 : "concurrent-futures"
1589 : }
1590 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
1591 : };
1592 0 : ([mode, execution], max_batch_size.get())
1593 : }
1594 : };
1595 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
1596 0 : .with_label_values(&label_values)
1597 0 : .set(value.try_into().unwrap());
1598 0 : }
1599 :
1600 400 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
1601 400 : register_int_counter_vec!(
1602 400 : "pageserver_page_service_pagestream_flush_in_progress_micros",
1603 400 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
1604 400 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
1605 400 : easily discoverable in monitoring. \
1606 400 : Hence, this is NOT a completion latency historgram.",
1607 400 : &["tenant_id", "shard_id", "timeline_id"],
1608 400 : )
1609 400 : .expect("failed to define a metric")
1610 400 : });
1611 :
1612 400 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
1613 400 : register_int_counter!(
1614 400 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
1615 400 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
1616 400 : )
1617 400 : .expect("failed to define a metric")
1618 400 : });
1619 :
1620 400 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1621 400 : register_histogram_vec!(
1622 400 : "pageserver_page_service_pagestream_batch_wait_time_seconds",
1623 400 : "Time a request spent waiting in its batch until the batch moved to throttle&execution.",
1624 400 : &["tenant_id", "shard_id", "timeline_id"],
1625 400 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1626 400 : )
1627 400 : .expect("failed to define a metric")
1628 400 : });
1629 :
1630 400 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1631 400 : register_histogram!(
1632 400 : "pageserver_page_service_pagestream_batch_wait_time_seconds_global",
1633 400 : "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
1634 400 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
1635 400 : )
1636 400 : .expect("failed to define a metric")
1637 400 : });
1638 :
1639 : impl SmgrQueryTimePerTimeline {
1640 892 : pub(crate) fn new(
1641 892 : tenant_shard_id: &TenantShardId,
1642 892 : timeline_id: &TimelineId,
1643 892 : pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,
1644 892 : ) -> Self {
1645 892 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1646 892 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1647 892 : let timeline_id = timeline_id.to_string();
1648 5352 : let global_started = std::array::from_fn(|i| {
1649 5352 : let op = SmgrQueryType::from_repr(i).unwrap();
1650 5352 : SMGR_QUERY_STARTED_GLOBAL
1651 5352 : .get_metric_with_label_values(&[op.into()])
1652 5352 : .unwrap()
1653 5352 : });
1654 5352 : let global_latency = std::array::from_fn(|i| {
1655 5352 : let op = SmgrQueryType::from_repr(i).unwrap();
1656 5352 : SMGR_QUERY_TIME_GLOBAL
1657 5352 : .get_metric_with_label_values(&[op.into()])
1658 5352 : .unwrap()
1659 5352 : });
1660 892 :
1661 892 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
1662 892 : .get_metric_with_label_values(&[
1663 892 : SmgrQueryType::GetPageAtLsn.into(),
1664 892 : &tenant_id,
1665 892 : &shard_slug,
1666 892 : &timeline_id,
1667 892 : ])
1668 892 : .unwrap();
1669 892 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1670 892 : .get_metric_with_label_values(&[
1671 892 : SmgrQueryType::GetPageAtLsn.into(),
1672 892 : &tenant_id,
1673 892 : &shard_slug,
1674 892 : &timeline_id,
1675 892 : ])
1676 892 : .unwrap();
1677 892 :
1678 892 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
1679 892 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
1680 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1681 892 : .unwrap();
1682 892 :
1683 892 : let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
1684 892 : let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
1685 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1686 892 : .unwrap();
1687 892 :
1688 892 : let global_flush_in_progress_micros =
1689 892 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
1690 892 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
1691 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1692 892 : .unwrap();
1693 892 :
1694 892 : Self {
1695 892 : global_started,
1696 892 : global_latency,
1697 892 : per_timeline_getpage_latency,
1698 892 : per_timeline_getpage_started,
1699 892 : global_batch_size,
1700 892 : per_timeline_batch_size,
1701 892 : global_flush_in_progress_micros,
1702 892 : per_timeline_flush_in_progress_micros,
1703 892 : global_batch_wait_time,
1704 892 : per_timeline_batch_wait_time,
1705 892 : throttling: pagestream_throttle_metrics,
1706 892 : }
1707 892 : }
1708 0 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
1709 0 : self.global_started[op as usize].inc();
1710 :
1711 0 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1712 0 : self.per_timeline_getpage_started.inc();
1713 0 : Some(self.per_timeline_getpage_latency.clone())
1714 : } else {
1715 0 : None
1716 : };
1717 :
1718 0 : SmgrOpTimer(Some(SmgrOpTimerInner {
1719 0 : global_execution_latency_histo: self.global_latency[op as usize].clone(),
1720 0 : per_timeline_execution_latency_histo: per_timeline_latency_histo,
1721 0 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
1722 0 : per_timeline_flush_in_progress_micros: self
1723 0 : .per_timeline_flush_in_progress_micros
1724 0 : .clone(),
1725 0 : global_batch_wait_time: self.global_batch_wait_time.clone(),
1726 0 : per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
1727 0 : throttling: self.throttling.clone(),
1728 0 : timings: SmgrOpTimerState::Received { received_at },
1729 0 : }))
1730 0 : }
1731 :
1732 : /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
1733 0 : pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
1734 0 : self.global_batch_size.observe(batch_size as f64);
1735 0 : self.per_timeline_batch_size.observe(batch_size as f64);
1736 0 : }
1737 : }
1738 :
1739 : // keep in sync with control plane Go code so that we can validate
1740 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1741 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1742 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1743 0 : [
1744 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1745 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1746 0 : ]
1747 0 : .map(|ms| (ms as f64) / 1000.0)
1748 0 : });
1749 :
1750 : pub(crate) struct BasebackupQueryTime {
1751 : ok: Histogram,
1752 : error: Histogram,
1753 : client_error: Histogram,
1754 : }
1755 :
1756 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1757 0 : let vec = register_histogram_vec!(
1758 0 : "pageserver_basebackup_query_seconds",
1759 0 : "Histogram of basebackup queries durations, by result type",
1760 0 : &["result"],
1761 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1762 0 : )
1763 0 : .expect("failed to define a metric");
1764 0 : BasebackupQueryTime {
1765 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1766 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1767 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
1768 0 : }
1769 0 : });
1770 :
1771 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
1772 : parent: &'a BasebackupQueryTime,
1773 : start: std::time::Instant,
1774 : }
1775 :
1776 : impl BasebackupQueryTime {
1777 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
1778 0 : let start = Instant::now();
1779 0 : BasebackupQueryTimeOngoingRecording {
1780 0 : parent: self,
1781 0 : start,
1782 0 : }
1783 0 : }
1784 : }
1785 :
1786 : impl BasebackupQueryTimeOngoingRecording<'_> {
1787 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
1788 0 : let elapsed = self.start.elapsed().as_secs_f64();
1789 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
1790 0 : let metric = match res {
1791 0 : Ok(_) => &self.parent.ok,
1792 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
1793 0 : if is_expected_io_error(io_error) =>
1794 0 : {
1795 0 : &self.parent.client_error
1796 : }
1797 0 : Err(_) => &self.parent.error,
1798 : };
1799 0 : metric.observe(elapsed);
1800 0 : }
1801 : }
1802 :
1803 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1804 0 : register_int_counter_pair_vec!(
1805 0 : "pageserver_live_connections_started",
1806 0 : "Number of network connections that we started handling",
1807 0 : "pageserver_live_connections_finished",
1808 0 : "Number of network connections that we finished handling",
1809 0 : &["pageserver_connection_kind"]
1810 0 : )
1811 0 : .expect("failed to define a metric")
1812 0 : });
1813 :
1814 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1815 : pub(crate) enum ComputeCommandKind {
1816 : PageStreamV3,
1817 : PageStreamV2,
1818 : Basebackup,
1819 : Fullbackup,
1820 : LeaseLsn,
1821 : }
1822 :
1823 : pub(crate) struct ComputeCommandCounters {
1824 : map: EnumMap<ComputeCommandKind, IntCounter>,
1825 : }
1826 :
1827 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
1828 0 : let inner = register_int_counter_vec!(
1829 0 : "pageserver_compute_commands",
1830 0 : "Number of compute -> pageserver commands processed",
1831 0 : &["command"]
1832 0 : )
1833 0 : .expect("failed to define a metric");
1834 0 :
1835 0 : ComputeCommandCounters {
1836 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
1837 0 : let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
1838 0 : let command_str: &'static str = command.into();
1839 0 : inner.with_label_values(&[command_str])
1840 0 : })),
1841 0 : }
1842 0 : });
1843 :
1844 : impl ComputeCommandCounters {
1845 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
1846 0 : &self.map[command]
1847 0 : }
1848 : }
1849 :
1850 : // remote storage metrics
1851 :
1852 392 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1853 392 : register_int_counter_pair_vec!(
1854 392 : "pageserver_remote_timeline_client_calls_started",
1855 392 : "Number of started calls to remote timeline client.",
1856 392 : "pageserver_remote_timeline_client_calls_finished",
1857 392 : "Number of finshed calls to remote timeline client.",
1858 392 : &[
1859 392 : "tenant_id",
1860 392 : "shard_id",
1861 392 : "timeline_id",
1862 392 : "file_kind",
1863 392 : "op_kind"
1864 392 : ],
1865 392 : )
1866 392 : .unwrap()
1867 392 : });
1868 :
1869 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1870 388 : Lazy::new(|| {
1871 388 : register_int_counter_vec!(
1872 388 : "pageserver_remote_timeline_client_bytes_started",
1873 388 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1874 388 : The increment happens when the operation is scheduled.",
1875 388 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1876 388 : )
1877 388 : .expect("failed to define a metric")
1878 388 : });
1879 :
1880 388 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1881 388 : register_int_counter_vec!(
1882 388 : "pageserver_remote_timeline_client_bytes_finished",
1883 388 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1884 388 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1885 388 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1886 388 : )
1887 388 : .expect("failed to define a metric")
1888 388 : });
1889 :
1890 : pub(crate) struct TenantManagerMetrics {
1891 : tenant_slots_attached: UIntGauge,
1892 : tenant_slots_secondary: UIntGauge,
1893 : tenant_slots_inprogress: UIntGauge,
1894 : pub(crate) tenant_slot_writes: IntCounter,
1895 : pub(crate) unexpected_errors: IntCounter,
1896 : }
1897 :
1898 : impl TenantManagerMetrics {
1899 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
1900 : /// exactly: they track the lifetime of the slots _in the tenant map_.
1901 4 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
1902 4 : match slot {
1903 0 : TenantSlot::Attached(_) => {
1904 0 : self.tenant_slots_attached.inc();
1905 0 : }
1906 0 : TenantSlot::Secondary(_) => {
1907 0 : self.tenant_slots_secondary.inc();
1908 0 : }
1909 4 : TenantSlot::InProgress(_) => {
1910 4 : self.tenant_slots_inprogress.inc();
1911 4 : }
1912 : }
1913 4 : }
1914 :
1915 4 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
1916 4 : match slot {
1917 4 : TenantSlot::Attached(_) => {
1918 4 : self.tenant_slots_attached.dec();
1919 4 : }
1920 0 : TenantSlot::Secondary(_) => {
1921 0 : self.tenant_slots_secondary.dec();
1922 0 : }
1923 0 : TenantSlot::InProgress(_) => {
1924 0 : self.tenant_slots_inprogress.dec();
1925 0 : }
1926 : }
1927 4 : }
1928 :
1929 : #[cfg(all(debug_assertions, not(test)))]
1930 0 : pub(crate) fn slots_total(&self) -> u64 {
1931 0 : self.tenant_slots_attached.get()
1932 0 : + self.tenant_slots_secondary.get()
1933 0 : + self.tenant_slots_inprogress.get()
1934 0 : }
1935 : }
1936 :
1937 4 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1938 4 : let tenant_slots = register_uint_gauge_vec!(
1939 4 : "pageserver_tenant_manager_slots",
1940 4 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1941 4 : &["mode"]
1942 4 : )
1943 4 : .expect("failed to define a metric");
1944 4 : TenantManagerMetrics {
1945 4 : tenant_slots_attached: tenant_slots
1946 4 : .get_metric_with_label_values(&["attached"])
1947 4 : .unwrap(),
1948 4 : tenant_slots_secondary: tenant_slots
1949 4 : .get_metric_with_label_values(&["secondary"])
1950 4 : .unwrap(),
1951 4 : tenant_slots_inprogress: tenant_slots
1952 4 : .get_metric_with_label_values(&["inprogress"])
1953 4 : .unwrap(),
1954 4 : tenant_slot_writes: register_int_counter!(
1955 4 : "pageserver_tenant_manager_slot_writes",
1956 4 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1957 4 : )
1958 4 : .expect("failed to define a metric"),
1959 4 : unexpected_errors: register_int_counter!(
1960 4 : "pageserver_tenant_manager_unexpected_errors_total",
1961 4 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1962 4 : )
1963 4 : .expect("failed to define a metric"),
1964 4 : }
1965 4 : });
1966 :
1967 : pub(crate) struct DeletionQueueMetrics {
1968 : pub(crate) keys_submitted: IntCounter,
1969 : pub(crate) keys_dropped: IntCounter,
1970 : pub(crate) keys_executed: IntCounter,
1971 : pub(crate) keys_validated: IntCounter,
1972 : pub(crate) dropped_lsn_updates: IntCounter,
1973 : pub(crate) unexpected_errors: IntCounter,
1974 : pub(crate) remote_errors: IntCounterVec,
1975 : }
1976 67 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1977 67 : DeletionQueueMetrics{
1978 67 :
1979 67 : keys_submitted: register_int_counter!(
1980 67 : "pageserver_deletion_queue_submitted_total",
1981 67 : "Number of objects submitted for deletion"
1982 67 : )
1983 67 : .expect("failed to define a metric"),
1984 67 :
1985 67 : keys_dropped: register_int_counter!(
1986 67 : "pageserver_deletion_queue_dropped_total",
1987 67 : "Number of object deletions dropped due to stale generation."
1988 67 : )
1989 67 : .expect("failed to define a metric"),
1990 67 :
1991 67 : keys_executed: register_int_counter!(
1992 67 : "pageserver_deletion_queue_executed_total",
1993 67 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
1994 67 : )
1995 67 : .expect("failed to define a metric"),
1996 67 :
1997 67 : keys_validated: register_int_counter!(
1998 67 : "pageserver_deletion_queue_validated_total",
1999 67 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
2000 67 : )
2001 67 : .expect("failed to define a metric"),
2002 67 :
2003 67 : dropped_lsn_updates: register_int_counter!(
2004 67 : "pageserver_deletion_queue_dropped_lsn_updates_total",
2005 67 : "Updates to remote_consistent_lsn dropped due to stale generation number."
2006 67 : )
2007 67 : .expect("failed to define a metric"),
2008 67 : unexpected_errors: register_int_counter!(
2009 67 : "pageserver_deletion_queue_unexpected_errors_total",
2010 67 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
2011 67 : )
2012 67 : .expect("failed to define a metric"),
2013 67 : remote_errors: register_int_counter_vec!(
2014 67 : "pageserver_deletion_queue_remote_errors_total",
2015 67 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
2016 67 : &["op_kind"],
2017 67 : )
2018 67 : .expect("failed to define a metric")
2019 67 : }
2020 67 : });
2021 :
2022 : pub(crate) struct SecondaryModeMetrics {
2023 : pub(crate) upload_heatmap: IntCounter,
2024 : pub(crate) upload_heatmap_errors: IntCounter,
2025 : pub(crate) upload_heatmap_duration: Histogram,
2026 : pub(crate) download_heatmap: IntCounter,
2027 : pub(crate) download_layer: IntCounter,
2028 : }
2029 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2030 0 : SecondaryModeMetrics {
2031 0 : upload_heatmap: register_int_counter!(
2032 0 : "pageserver_secondary_upload_heatmap",
2033 0 : "Number of heatmaps written to remote storage by attached tenants"
2034 0 : )
2035 0 : .expect("failed to define a metric"),
2036 0 : upload_heatmap_errors: register_int_counter!(
2037 0 : "pageserver_secondary_upload_heatmap_errors",
2038 0 : "Failures writing heatmap to remote storage"
2039 0 : )
2040 0 : .expect("failed to define a metric"),
2041 0 : upload_heatmap_duration: register_histogram!(
2042 0 : "pageserver_secondary_upload_heatmap_duration",
2043 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2044 0 : )
2045 0 : .expect("failed to define a metric"),
2046 0 : download_heatmap: register_int_counter!(
2047 0 : "pageserver_secondary_download_heatmap",
2048 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2049 0 : )
2050 0 : .expect("failed to define a metric"),
2051 0 : download_layer: register_int_counter!(
2052 0 : "pageserver_secondary_download_layer",
2053 0 : "Number of downloads of layers by secondary mode locations"
2054 0 : )
2055 0 : .expect("failed to define a metric"),
2056 0 : }
2057 0 : });
2058 :
2059 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2060 0 : register_uint_gauge_vec!(
2061 0 : "pageserver_secondary_resident_physical_size",
2062 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2063 0 : &["tenant_id", "shard_id"]
2064 0 : )
2065 0 : .expect("failed to define a metric")
2066 0 : });
2067 :
2068 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2069 0 : register_uint_gauge!(
2070 0 : "pageserver_utilization_score",
2071 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2072 0 : )
2073 0 : .expect("failed to define a metric")
2074 0 : });
2075 :
2076 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2077 0 : register_uint_gauge_vec!(
2078 0 : "pageserver_secondary_heatmap_total_size",
2079 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2080 0 : &["tenant_id", "shard_id"]
2081 0 : )
2082 0 : .expect("failed to define a metric")
2083 0 : });
2084 :
2085 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2086 : pub enum RemoteOpKind {
2087 : Upload,
2088 : Download,
2089 : Delete,
2090 : }
2091 : impl RemoteOpKind {
2092 30295 : pub fn as_str(&self) -> &'static str {
2093 30295 : match self {
2094 28508 : Self::Upload => "upload",
2095 104 : Self::Download => "download",
2096 1683 : Self::Delete => "delete",
2097 : }
2098 30295 : }
2099 : }
2100 :
2101 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2102 : pub enum RemoteOpFileKind {
2103 : Layer,
2104 : Index,
2105 : }
2106 : impl RemoteOpFileKind {
2107 30295 : pub fn as_str(&self) -> &'static str {
2108 30295 : match self {
2109 21240 : Self::Layer => "layer",
2110 9055 : Self::Index => "index",
2111 : }
2112 30295 : }
2113 : }
2114 :
2115 385 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
2116 385 : register_histogram_vec!(
2117 385 : "pageserver_remote_operation_seconds",
2118 385 : "Time spent on remote storage operations. \
2119 385 : Grouped by tenant, timeline, operation_kind and status. \
2120 385 : Does not account for time spent waiting in remote timeline client's queues.",
2121 385 : &["file_kind", "op_kind", "status"]
2122 385 : )
2123 385 : .expect("failed to define a metric")
2124 385 : });
2125 :
2126 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2127 0 : register_int_counter_vec!(
2128 0 : "pageserver_tenant_task_events",
2129 0 : "Number of task start/stop/fail events.",
2130 0 : &["event"],
2131 0 : )
2132 0 : .expect("Failed to register tenant_task_events metric")
2133 0 : });
2134 :
2135 : pub struct BackgroundLoopSemaphoreMetrics {
2136 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2137 : durations: EnumMap<BackgroundLoopKind, Counter>,
2138 : }
2139 :
2140 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
2141 40 : || {
2142 40 : let counters = register_int_counter_pair_vec!(
2143 40 : "pageserver_background_loop_semaphore_wait_start_count",
2144 40 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2145 40 : "pageserver_background_loop_semaphore_wait_finish_count",
2146 40 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2147 40 : &["task"],
2148 40 : )
2149 40 : .unwrap();
2150 40 :
2151 40 : let durations = register_counter_vec!(
2152 40 : "pageserver_background_loop_semaphore_wait_duration_seconds",
2153 40 : "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
2154 40 : &["task"],
2155 40 : )
2156 40 : .unwrap();
2157 40 :
2158 40 : BackgroundLoopSemaphoreMetrics {
2159 360 : counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2160 360 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2161 360 : counters.with_label_values(&[kind.into()])
2162 360 : })),
2163 360 : durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2164 360 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2165 360 : durations.with_label_values(&[kind.into()])
2166 360 : })),
2167 40 : }
2168 40 : },
2169 : );
2170 :
2171 : impl BackgroundLoopSemaphoreMetrics {
2172 728 : pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
2173 : struct Record<'a> {
2174 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2175 : task: BackgroundLoopKind,
2176 : _counter_guard: metrics::IntCounterPairGuard,
2177 : start: Instant,
2178 : }
2179 : impl Drop for Record<'_> {
2180 728 : fn drop(&mut self) {
2181 728 : let elapsed = self.start.elapsed().as_secs_f64();
2182 728 : self.metrics.durations[self.task].inc_by(elapsed);
2183 728 : }
2184 : }
2185 728 : Record {
2186 728 : metrics: self,
2187 728 : task,
2188 728 : _counter_guard: self.counters[task].guard(),
2189 728 : start: Instant::now(),
2190 728 : }
2191 728 : }
2192 : }
2193 :
2194 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2195 0 : register_int_counter_vec!(
2196 0 : "pageserver_background_loop_period_overrun_count",
2197 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2198 0 : &["task", "period"],
2199 0 : )
2200 0 : .expect("failed to define a metric")
2201 0 : });
2202 :
2203 : // walreceiver metrics
2204 :
2205 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2206 0 : register_int_counter!(
2207 0 : "pageserver_walreceiver_started_connections_total",
2208 0 : "Number of started walreceiver connections"
2209 0 : )
2210 0 : .expect("failed to define a metric")
2211 0 : });
2212 :
2213 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2214 0 : register_int_gauge!(
2215 0 : "pageserver_walreceiver_active_managers",
2216 0 : "Number of active walreceiver managers"
2217 0 : )
2218 0 : .expect("failed to define a metric")
2219 0 : });
2220 :
2221 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2222 0 : register_int_counter_vec!(
2223 0 : "pageserver_walreceiver_switches_total",
2224 0 : "Number of walreceiver manager change_connection calls",
2225 0 : &["reason"]
2226 0 : )
2227 0 : .expect("failed to define a metric")
2228 0 : });
2229 :
2230 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2231 0 : register_int_counter!(
2232 0 : "pageserver_walreceiver_broker_updates_total",
2233 0 : "Number of received broker updates in walreceiver"
2234 0 : )
2235 0 : .expect("failed to define a metric")
2236 0 : });
2237 :
2238 4 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2239 4 : register_int_counter_vec!(
2240 4 : "pageserver_walreceiver_candidates_events_total",
2241 4 : "Number of walreceiver candidate events",
2242 4 : &["event"]
2243 4 : )
2244 4 : .expect("failed to define a metric")
2245 4 : });
2246 :
2247 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2248 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2249 :
2250 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2251 4 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2252 :
2253 : // Metrics collected on WAL redo operations
2254 : //
2255 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2256 : // for access to the postgres process ('wait') since there is only one for
2257 : // each tenant.
2258 :
2259 : /// Time buckets are small because we want to be able to measure the
2260 : /// smallest redo processing times. These buckets allow us to measure down
2261 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2262 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2263 : ///
2264 : /// Values up to 1s are recorded because metrics show that we have redo
2265 : /// durations and lock times larger than 0.250s.
2266 : macro_rules! redo_histogram_time_buckets {
2267 : () => {
2268 : vec![
2269 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2270 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2271 : 1.000_000,
2272 : ]
2273 : };
2274 : }
2275 :
2276 : /// While we're at it, also measure the amount of records replayed in each
2277 : /// operation. We have a global 'total replayed' counter, but that's not
2278 : /// as useful as 'what is the skew for how many records we replay in one
2279 : /// operation'.
2280 : macro_rules! redo_histogram_count_buckets {
2281 : () => {
2282 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2283 : };
2284 : }
2285 :
2286 : macro_rules! redo_bytes_histogram_count_buckets {
2287 : () => {
2288 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2289 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2290 : vec![
2291 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2292 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2293 : ]
2294 : };
2295 : }
2296 :
2297 : pub(crate) struct WalIngestMetrics {
2298 : pub(crate) bytes_received: IntCounter,
2299 : pub(crate) records_received: IntCounter,
2300 : pub(crate) records_observed: IntCounter,
2301 : pub(crate) records_committed: IntCounter,
2302 : pub(crate) records_filtered: IntCounter,
2303 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2304 : pub(crate) clear_vm_bits_unknown: IntCounterVec,
2305 : }
2306 :
2307 20 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
2308 20 : WalIngestMetrics {
2309 20 : bytes_received: register_int_counter!(
2310 20 : "pageserver_wal_ingest_bytes_received",
2311 20 : "Bytes of WAL ingested from safekeepers",
2312 20 : )
2313 20 : .unwrap(),
2314 20 : records_received: register_int_counter!(
2315 20 : "pageserver_wal_ingest_records_received",
2316 20 : "Number of WAL records received from safekeepers"
2317 20 : )
2318 20 : .expect("failed to define a metric"),
2319 20 : records_observed: register_int_counter!(
2320 20 : "pageserver_wal_ingest_records_observed",
2321 20 : "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
2322 20 : )
2323 20 : .expect("failed to define a metric"),
2324 20 : records_committed: register_int_counter!(
2325 20 : "pageserver_wal_ingest_records_committed",
2326 20 : "Number of WAL records which resulted in writes to pageserver storage"
2327 20 : )
2328 20 : .expect("failed to define a metric"),
2329 20 : records_filtered: register_int_counter!(
2330 20 : "pageserver_wal_ingest_records_filtered",
2331 20 : "Number of WAL records filtered out due to sharding"
2332 20 : )
2333 20 : .expect("failed to define a metric"),
2334 20 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2335 20 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2336 20 : "Total number of zero gap blocks written on relation extends"
2337 20 : )
2338 20 : .expect("failed to define a metric"),
2339 20 : clear_vm_bits_unknown: register_int_counter_vec!(
2340 20 : "pageserver_wal_ingest_clear_vm_bits_unknown",
2341 20 : "Number of ignored ClearVmBits operations due to unknown pages/relations",
2342 20 : &["entity"],
2343 20 : )
2344 20 : .expect("failed to define a metric"),
2345 20 : }
2346 20 : });
2347 :
2348 400 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2349 400 : register_int_counter_vec!(
2350 400 : "pageserver_timeline_wal_records_received",
2351 400 : "Number of WAL records received per shard",
2352 400 : &["tenant_id", "shard_id", "timeline_id"]
2353 400 : )
2354 400 : .expect("failed to define a metric")
2355 400 : });
2356 :
2357 12 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2358 12 : register_histogram!(
2359 12 : "pageserver_wal_redo_seconds",
2360 12 : "Time spent on WAL redo",
2361 12 : redo_histogram_time_buckets!()
2362 12 : )
2363 12 : .expect("failed to define a metric")
2364 12 : });
2365 :
2366 12 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2367 12 : register_histogram!(
2368 12 : "pageserver_wal_redo_records_histogram",
2369 12 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2370 12 : redo_histogram_count_buckets!(),
2371 12 : )
2372 12 : .expect("failed to define a metric")
2373 12 : });
2374 :
2375 12 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2376 12 : register_histogram!(
2377 12 : "pageserver_wal_redo_bytes_histogram",
2378 12 : "Histogram of number of records replayed per redo sent to Postgres",
2379 12 : redo_bytes_histogram_count_buckets!(),
2380 12 : )
2381 12 : .expect("failed to define a metric")
2382 12 : });
2383 :
2384 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2385 12 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2386 12 : register_int_counter!(
2387 12 : "pageserver_replayed_wal_records_total",
2388 12 : "Number of WAL records replayed in WAL redo process"
2389 12 : )
2390 12 : .unwrap()
2391 12 : });
2392 :
2393 : #[rustfmt::skip]
2394 16 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2395 16 : register_histogram!(
2396 16 : "pageserver_wal_redo_process_launch_duration",
2397 16 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2398 16 : vec![
2399 16 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2400 16 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2401 16 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2402 16 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2403 16 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2404 16 : ],
2405 16 : )
2406 16 : .expect("failed to define a metric")
2407 16 : });
2408 :
2409 : pub(crate) struct WalRedoProcessCounters {
2410 : pub(crate) started: IntCounter,
2411 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
2412 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2413 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2414 : }
2415 :
2416 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2417 : pub(crate) enum WalRedoKillCause {
2418 : WalRedoProcessDrop,
2419 : NoLeakChildDrop,
2420 : Startup,
2421 : }
2422 :
2423 : impl Default for WalRedoProcessCounters {
2424 16 : fn default() -> Self {
2425 16 : let started = register_int_counter!(
2426 16 : "pageserver_wal_redo_process_started_total",
2427 16 : "Number of WAL redo processes started",
2428 16 : )
2429 16 : .unwrap();
2430 16 :
2431 16 : let killed = register_int_counter_vec!(
2432 16 : "pageserver_wal_redo_process_stopped_total",
2433 16 : "Number of WAL redo processes stopped",
2434 16 : &["cause"],
2435 16 : )
2436 16 : .unwrap();
2437 16 :
2438 16 : let active_stderr_logger_tasks_started = register_int_counter!(
2439 16 : "pageserver_walredo_stderr_logger_tasks_started_total",
2440 16 : "Number of active walredo stderr logger tasks that have started",
2441 16 : )
2442 16 : .unwrap();
2443 16 :
2444 16 : let active_stderr_logger_tasks_finished = register_int_counter!(
2445 16 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2446 16 : "Number of active walredo stderr logger tasks that have finished",
2447 16 : )
2448 16 : .unwrap();
2449 16 :
2450 16 : Self {
2451 16 : started,
2452 48 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2453 48 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
2454 48 : let cause_str: &'static str = cause.into();
2455 48 : killed.with_label_values(&[cause_str])
2456 48 : })),
2457 16 : active_stderr_logger_tasks_started,
2458 16 : active_stderr_logger_tasks_finished,
2459 16 : }
2460 16 : }
2461 : }
2462 :
2463 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2464 : Lazy::new(WalRedoProcessCounters::default);
2465 :
2466 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2467 : pub(crate) struct StorageTimeMetricsTimer {
2468 : metrics: StorageTimeMetrics,
2469 : start: Instant,
2470 : }
2471 :
2472 : impl StorageTimeMetricsTimer {
2473 4236 : fn new(metrics: StorageTimeMetrics) -> Self {
2474 4236 : Self {
2475 4236 : metrics,
2476 4236 : start: Instant::now(),
2477 4236 : }
2478 4236 : }
2479 :
2480 : /// Returns the elapsed duration of the timer.
2481 4236 : pub fn elapsed(&self) -> Duration {
2482 4236 : self.start.elapsed()
2483 4236 : }
2484 :
2485 : /// Record the time from creation to now and return it.
2486 4236 : pub fn stop_and_record(self) -> Duration {
2487 4236 : let duration = self.elapsed();
2488 4236 : let seconds = duration.as_secs_f64();
2489 4236 : self.metrics.timeline_sum.inc_by(seconds);
2490 4236 : self.metrics.timeline_count.inc();
2491 4236 : self.metrics.global_histogram.observe(seconds);
2492 4236 : duration
2493 4236 : }
2494 :
2495 : /// Turns this timer into a timer, which will always record -- usually this means recording
2496 : /// regardless an early `?` path was taken in a function.
2497 8 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2498 8 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2499 8 : }
2500 : }
2501 :
2502 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2503 :
2504 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2505 8 : fn drop(&mut self) {
2506 8 : if let Some(inner) = self.0.take() {
2507 8 : inner.stop_and_record();
2508 8 : }
2509 8 : }
2510 : }
2511 :
2512 : impl AlwaysRecordingStorageTimeMetricsTimer {
2513 : /// Returns the elapsed duration of the timer.
2514 0 : pub fn elapsed(&self) -> Duration {
2515 0 : self.0.as_ref().expect("not dropped yet").elapsed()
2516 0 : }
2517 : }
2518 :
2519 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2520 : /// timeline total sum and count.
2521 : #[derive(Clone, Debug)]
2522 : pub(crate) struct StorageTimeMetrics {
2523 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2524 : timeline_sum: Counter,
2525 : /// Number of oeprations, per operation, tenant_id and timeline_id
2526 : timeline_count: IntCounter,
2527 : /// Global histogram having only the "operation" label.
2528 : global_histogram: Histogram,
2529 : }
2530 :
2531 : impl StorageTimeMetrics {
2532 8028 : pub fn new(
2533 8028 : operation: StorageTimeOperation,
2534 8028 : tenant_id: &str,
2535 8028 : shard_id: &str,
2536 8028 : timeline_id: &str,
2537 8028 : ) -> Self {
2538 8028 : let operation: &'static str = operation.into();
2539 8028 :
2540 8028 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2541 8028 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2542 8028 : .unwrap();
2543 8028 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2544 8028 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2545 8028 : .unwrap();
2546 8028 : let global_histogram = STORAGE_TIME_GLOBAL
2547 8028 : .get_metric_with_label_values(&[operation])
2548 8028 : .unwrap();
2549 8028 :
2550 8028 : StorageTimeMetrics {
2551 8028 : timeline_sum,
2552 8028 : timeline_count,
2553 8028 : global_histogram,
2554 8028 : }
2555 8028 : }
2556 :
2557 : /// Starts timing a new operation.
2558 : ///
2559 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2560 4236 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2561 4236 : StorageTimeMetricsTimer::new(self.clone())
2562 4236 : }
2563 : }
2564 :
2565 : #[derive(Debug)]
2566 : pub(crate) struct TimelineMetrics {
2567 : tenant_id: String,
2568 : shard_id: String,
2569 : timeline_id: String,
2570 : pub flush_time_histo: StorageTimeMetrics,
2571 : pub flush_delay_histo: StorageTimeMetrics,
2572 : pub compact_time_histo: StorageTimeMetrics,
2573 : pub create_images_time_histo: StorageTimeMetrics,
2574 : pub logical_size_histo: StorageTimeMetrics,
2575 : pub imitate_logical_size_histo: StorageTimeMetrics,
2576 : pub load_layer_map_histo: StorageTimeMetrics,
2577 : pub garbage_collect_histo: StorageTimeMetrics,
2578 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2579 : pub last_record_lsn_gauge: IntGauge,
2580 : pub disk_consistent_lsn_gauge: IntGauge,
2581 : pub pitr_history_size: UIntGauge,
2582 : pub archival_size: UIntGauge,
2583 : pub(crate) layer_size_image: UIntGauge,
2584 : pub(crate) layer_count_image: UIntGauge,
2585 : pub(crate) layer_size_delta: UIntGauge,
2586 : pub(crate) layer_count_delta: UIntGauge,
2587 : pub standby_horizon_gauge: IntGauge,
2588 : pub resident_physical_size_gauge: UIntGauge,
2589 : pub visible_physical_size_gauge: UIntGauge,
2590 : /// copy of LayeredTimeline.current_logical_size
2591 : pub current_logical_size_gauge: UIntGauge,
2592 : pub aux_file_size_gauge: IntGauge,
2593 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2594 : pub evictions: IntCounter,
2595 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2596 : /// Number of valid LSN leases.
2597 : pub valid_lsn_lease_count_gauge: UIntGauge,
2598 : pub wal_records_received: IntCounter,
2599 : shutdown: std::sync::atomic::AtomicBool,
2600 : }
2601 :
2602 : impl TimelineMetrics {
2603 892 : pub fn new(
2604 892 : tenant_shard_id: &TenantShardId,
2605 892 : timeline_id_raw: &TimelineId,
2606 892 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2607 892 : ) -> Self {
2608 892 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2609 892 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2610 892 : let timeline_id = timeline_id_raw.to_string();
2611 892 : let flush_time_histo = StorageTimeMetrics::new(
2612 892 : StorageTimeOperation::LayerFlush,
2613 892 : &tenant_id,
2614 892 : &shard_id,
2615 892 : &timeline_id,
2616 892 : );
2617 892 : let flush_delay_histo = StorageTimeMetrics::new(
2618 892 : StorageTimeOperation::LayerFlushDelay,
2619 892 : &tenant_id,
2620 892 : &shard_id,
2621 892 : &timeline_id,
2622 892 : );
2623 892 : let compact_time_histo = StorageTimeMetrics::new(
2624 892 : StorageTimeOperation::Compact,
2625 892 : &tenant_id,
2626 892 : &shard_id,
2627 892 : &timeline_id,
2628 892 : );
2629 892 : let create_images_time_histo = StorageTimeMetrics::new(
2630 892 : StorageTimeOperation::CreateImages,
2631 892 : &tenant_id,
2632 892 : &shard_id,
2633 892 : &timeline_id,
2634 892 : );
2635 892 : let logical_size_histo = StorageTimeMetrics::new(
2636 892 : StorageTimeOperation::LogicalSize,
2637 892 : &tenant_id,
2638 892 : &shard_id,
2639 892 : &timeline_id,
2640 892 : );
2641 892 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2642 892 : StorageTimeOperation::ImitateLogicalSize,
2643 892 : &tenant_id,
2644 892 : &shard_id,
2645 892 : &timeline_id,
2646 892 : );
2647 892 : let load_layer_map_histo = StorageTimeMetrics::new(
2648 892 : StorageTimeOperation::LoadLayerMap,
2649 892 : &tenant_id,
2650 892 : &shard_id,
2651 892 : &timeline_id,
2652 892 : );
2653 892 : let garbage_collect_histo = StorageTimeMetrics::new(
2654 892 : StorageTimeOperation::Gc,
2655 892 : &tenant_id,
2656 892 : &shard_id,
2657 892 : &timeline_id,
2658 892 : );
2659 892 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2660 892 : StorageTimeOperation::FindGcCutoffs,
2661 892 : &tenant_id,
2662 892 : &shard_id,
2663 892 : &timeline_id,
2664 892 : );
2665 892 : let last_record_lsn_gauge = LAST_RECORD_LSN
2666 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2667 892 : .unwrap();
2668 892 :
2669 892 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
2670 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2671 892 : .unwrap();
2672 892 :
2673 892 : let pitr_history_size = PITR_HISTORY_SIZE
2674 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2675 892 : .unwrap();
2676 892 :
2677 892 : let archival_size = TIMELINE_ARCHIVE_SIZE
2678 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2679 892 : .unwrap();
2680 892 :
2681 892 : let layer_size_image = TIMELINE_LAYER_SIZE
2682 892 : .get_metric_with_label_values(&[
2683 892 : &tenant_id,
2684 892 : &shard_id,
2685 892 : &timeline_id,
2686 892 : MetricLayerKind::Image.into(),
2687 892 : ])
2688 892 : .unwrap();
2689 892 :
2690 892 : let layer_count_image = TIMELINE_LAYER_COUNT
2691 892 : .get_metric_with_label_values(&[
2692 892 : &tenant_id,
2693 892 : &shard_id,
2694 892 : &timeline_id,
2695 892 : MetricLayerKind::Image.into(),
2696 892 : ])
2697 892 : .unwrap();
2698 892 :
2699 892 : let layer_size_delta = TIMELINE_LAYER_SIZE
2700 892 : .get_metric_with_label_values(&[
2701 892 : &tenant_id,
2702 892 : &shard_id,
2703 892 : &timeline_id,
2704 892 : MetricLayerKind::Delta.into(),
2705 892 : ])
2706 892 : .unwrap();
2707 892 :
2708 892 : let layer_count_delta = TIMELINE_LAYER_COUNT
2709 892 : .get_metric_with_label_values(&[
2710 892 : &tenant_id,
2711 892 : &shard_id,
2712 892 : &timeline_id,
2713 892 : MetricLayerKind::Delta.into(),
2714 892 : ])
2715 892 : .unwrap();
2716 892 :
2717 892 : let standby_horizon_gauge = STANDBY_HORIZON
2718 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2719 892 : .unwrap();
2720 892 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2721 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2722 892 : .unwrap();
2723 892 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
2724 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2725 892 : .unwrap();
2726 892 : // TODO: we shouldn't expose this metric
2727 892 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2728 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2729 892 : .unwrap();
2730 892 : let aux_file_size_gauge = AUX_FILE_SIZE
2731 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2732 892 : .unwrap();
2733 892 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2734 892 : let directory_entries_count_gauge_closure = {
2735 892 : let tenant_shard_id = *tenant_shard_id;
2736 892 : let timeline_id_raw = *timeline_id_raw;
2737 0 : move || {
2738 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2739 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2740 0 : let timeline_id = timeline_id_raw.to_string();
2741 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2742 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2743 0 : .unwrap();
2744 0 : gauge
2745 0 : }
2746 : };
2747 892 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2748 892 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2749 892 : let evictions = EVICTIONS
2750 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2751 892 : .unwrap();
2752 892 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2753 892 : .build(&tenant_id, &shard_id, &timeline_id);
2754 892 :
2755 892 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2756 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2757 892 : .unwrap();
2758 892 :
2759 892 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
2760 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2761 892 : .unwrap();
2762 892 :
2763 892 : TimelineMetrics {
2764 892 : tenant_id,
2765 892 : shard_id,
2766 892 : timeline_id,
2767 892 : flush_time_histo,
2768 892 : flush_delay_histo,
2769 892 : compact_time_histo,
2770 892 : create_images_time_histo,
2771 892 : logical_size_histo,
2772 892 : imitate_logical_size_histo,
2773 892 : garbage_collect_histo,
2774 892 : find_gc_cutoffs_histo,
2775 892 : load_layer_map_histo,
2776 892 : last_record_lsn_gauge,
2777 892 : disk_consistent_lsn_gauge,
2778 892 : pitr_history_size,
2779 892 : archival_size,
2780 892 : layer_size_image,
2781 892 : layer_count_image,
2782 892 : layer_size_delta,
2783 892 : layer_count_delta,
2784 892 : standby_horizon_gauge,
2785 892 : resident_physical_size_gauge,
2786 892 : visible_physical_size_gauge,
2787 892 : current_logical_size_gauge,
2788 892 : aux_file_size_gauge,
2789 892 : directory_entries_count_gauge,
2790 892 : evictions,
2791 892 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2792 892 : evictions_with_low_residence_duration,
2793 892 : ),
2794 892 : valid_lsn_lease_count_gauge,
2795 892 : wal_records_received,
2796 892 : shutdown: std::sync::atomic::AtomicBool::default(),
2797 892 : }
2798 892 : }
2799 :
2800 3144 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2801 3144 : self.resident_physical_size_add(sz);
2802 3144 : }
2803 :
2804 1056 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2805 1056 : self.resident_physical_size_gauge.sub(sz);
2806 1056 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2807 1056 : }
2808 :
2809 3400 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2810 3400 : self.resident_physical_size_gauge.add(sz);
2811 3400 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2812 3400 : }
2813 :
2814 20 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2815 20 : self.resident_physical_size_gauge.get()
2816 20 : }
2817 :
2818 20 : pub(crate) fn shutdown(&self) {
2819 20 : let was_shutdown = self
2820 20 : .shutdown
2821 20 : .swap(true, std::sync::atomic::Ordering::Relaxed);
2822 20 :
2823 20 : if was_shutdown {
2824 : // this happens on tenant deletion because tenant first shuts down timelines, then
2825 : // invokes timeline deletion which first shuts down the timeline again.
2826 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
2827 0 : return;
2828 20 : }
2829 20 :
2830 20 : let tenant_id = &self.tenant_id;
2831 20 : let timeline_id = &self.timeline_id;
2832 20 : let shard_id = &self.shard_id;
2833 20 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2834 20 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2835 20 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2836 20 : {
2837 20 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2838 20 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2839 20 : }
2840 20 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2841 20 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2842 20 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2843 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2844 20 : }
2845 :
2846 20 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2847 20 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2848 20 :
2849 20 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2850 20 : tenant_id,
2851 20 : shard_id,
2852 20 : timeline_id,
2853 20 : MetricLayerKind::Image.into(),
2854 20 : ]);
2855 20 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2856 20 : tenant_id,
2857 20 : shard_id,
2858 20 : timeline_id,
2859 20 : MetricLayerKind::Image.into(),
2860 20 : ]);
2861 20 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2862 20 : tenant_id,
2863 20 : shard_id,
2864 20 : timeline_id,
2865 20 : MetricLayerKind::Delta.into(),
2866 20 : ]);
2867 20 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2868 20 : tenant_id,
2869 20 : shard_id,
2870 20 : timeline_id,
2871 20 : MetricLayerKind::Delta.into(),
2872 20 : ]);
2873 20 :
2874 20 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2875 20 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2876 20 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2877 20 :
2878 20 : self.evictions_with_low_residence_duration
2879 20 : .write()
2880 20 : .unwrap()
2881 20 : .remove(tenant_id, shard_id, timeline_id);
2882 :
2883 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2884 : // removed at the end of it. The idea is to have the metrics outlive the
2885 : // entity during which they're observed, e.g., the smgr metrics shall
2886 : // outlive an individual smgr connection, but not the timeline.
2887 :
2888 200 : for op in StorageTimeOperation::VARIANTS {
2889 180 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2890 180 : op,
2891 180 : tenant_id,
2892 180 : shard_id,
2893 180 : timeline_id,
2894 180 : ]);
2895 180 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2896 180 : op,
2897 180 : tenant_id,
2898 180 : shard_id,
2899 180 : timeline_id,
2900 180 : ]);
2901 180 : }
2902 :
2903 60 : for op in STORAGE_IO_SIZE_OPERATIONS {
2904 40 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2905 40 : }
2906 :
2907 20 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
2908 20 : SmgrQueryType::GetPageAtLsn.into(),
2909 20 : tenant_id,
2910 20 : shard_id,
2911 20 : timeline_id,
2912 20 : ]);
2913 20 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2914 20 : SmgrQueryType::GetPageAtLsn.into(),
2915 20 : tenant_id,
2916 20 : shard_id,
2917 20 : timeline_id,
2918 20 : ]);
2919 20 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
2920 20 : tenant_id,
2921 20 : shard_id,
2922 20 : timeline_id,
2923 20 : ]);
2924 20 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
2925 20 : tenant_id,
2926 20 : shard_id,
2927 20 : timeline_id,
2928 20 : ]);
2929 20 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
2930 20 : tenant_id,
2931 20 : shard_id,
2932 20 : timeline_id,
2933 20 : ]);
2934 20 : let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
2935 20 : tenant_id,
2936 20 : shard_id,
2937 20 : timeline_id,
2938 20 : ]);
2939 20 : }
2940 : }
2941 :
2942 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2943 12 : // Only shard zero deals in synthetic sizes
2944 12 : if tenant_shard_id.is_shard_zero() {
2945 12 : let tid = tenant_shard_id.tenant_id.to_string();
2946 12 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2947 12 : }
2948 :
2949 12 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
2950 12 :
2951 12 : // we leave the BROKEN_TENANTS_SET entry if any
2952 12 : }
2953 :
2954 : use futures::Future;
2955 : use pin_project_lite::pin_project;
2956 : use std::collections::HashMap;
2957 : use std::num::NonZeroUsize;
2958 : use std::pin::Pin;
2959 : use std::sync::atomic::AtomicU64;
2960 : use std::sync::{Arc, Mutex};
2961 : use std::task::{Context, Poll};
2962 : use std::time::{Duration, Instant};
2963 :
2964 : use crate::config::PageServerConf;
2965 : use crate::context::{PageContentKind, RequestContext};
2966 : use crate::task_mgr::TaskKind;
2967 : use crate::tenant::mgr::TenantSlot;
2968 : use crate::tenant::tasks::BackgroundLoopKind;
2969 : use crate::tenant::throttle::ThrottleResult;
2970 : use crate::tenant::Timeline;
2971 :
2972 : /// Maintain a per timeline gauge in addition to the global gauge.
2973 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
2974 : last_set: AtomicU64,
2975 : gauge: UIntGauge,
2976 : }
2977 :
2978 : impl PerTimelineRemotePhysicalSizeGauge {
2979 912 : fn new(per_timeline_gauge: UIntGauge) -> Self {
2980 912 : Self {
2981 912 : last_set: AtomicU64::new(0),
2982 912 : gauge: per_timeline_gauge,
2983 912 : }
2984 912 : }
2985 3835 : pub(crate) fn set(&self, sz: u64) {
2986 3835 : self.gauge.set(sz);
2987 3835 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
2988 3835 : if sz < prev {
2989 75 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
2990 3760 : } else {
2991 3760 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
2992 3760 : };
2993 3835 : }
2994 4 : pub(crate) fn get(&self) -> u64 {
2995 4 : self.gauge.get()
2996 4 : }
2997 : }
2998 :
2999 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
3000 40 : fn drop(&mut self) {
3001 40 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
3002 40 : }
3003 : }
3004 :
3005 : pub(crate) struct RemoteTimelineClientMetrics {
3006 : tenant_id: String,
3007 : shard_id: String,
3008 : timeline_id: String,
3009 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
3010 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
3011 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3012 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3013 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
3014 : }
3015 :
3016 : impl RemoteTimelineClientMetrics {
3017 912 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
3018 912 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
3019 912 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
3020 912 : let timeline_id_str = timeline_id.to_string();
3021 912 :
3022 912 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
3023 912 : REMOTE_PHYSICAL_SIZE
3024 912 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3025 912 : .unwrap(),
3026 912 : );
3027 912 :
3028 912 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
3029 912 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3030 912 : .unwrap();
3031 912 :
3032 912 : RemoteTimelineClientMetrics {
3033 912 : tenant_id: tenant_id_str,
3034 912 : shard_id: shard_id_str,
3035 912 : timeline_id: timeline_id_str,
3036 912 : calls: Mutex::new(HashMap::default()),
3037 912 : bytes_started_counter: Mutex::new(HashMap::default()),
3038 912 : bytes_finished_counter: Mutex::new(HashMap::default()),
3039 912 : remote_physical_size_gauge,
3040 912 : projected_remote_consistent_lsn_gauge,
3041 912 : }
3042 912 : }
3043 :
3044 6042 : pub fn remote_operation_time(
3045 6042 : &self,
3046 6042 : file_kind: &RemoteOpFileKind,
3047 6042 : op_kind: &RemoteOpKind,
3048 6042 : status: &'static str,
3049 6042 : ) -> Histogram {
3050 6042 : let key = (file_kind.as_str(), op_kind.as_str(), status);
3051 6042 : REMOTE_OPERATION_TIME
3052 6042 : .get_metric_with_label_values(&[key.0, key.1, key.2])
3053 6042 : .unwrap()
3054 6042 : }
3055 :
3056 14270 : fn calls_counter_pair(
3057 14270 : &self,
3058 14270 : file_kind: &RemoteOpFileKind,
3059 14270 : op_kind: &RemoteOpKind,
3060 14270 : ) -> IntCounterPair {
3061 14270 : let mut guard = self.calls.lock().unwrap();
3062 14270 : let key = (file_kind.as_str(), op_kind.as_str());
3063 14270 : let metric = guard.entry(key).or_insert_with(move || {
3064 1634 : REMOTE_TIMELINE_CLIENT_CALLS
3065 1634 : .get_metric_with_label_values(&[
3066 1634 : &self.tenant_id,
3067 1634 : &self.shard_id,
3068 1634 : &self.timeline_id,
3069 1634 : key.0,
3070 1634 : key.1,
3071 1634 : ])
3072 1634 : .unwrap()
3073 14270 : });
3074 14270 : metric.clone()
3075 14270 : }
3076 :
3077 3456 : fn bytes_started_counter(
3078 3456 : &self,
3079 3456 : file_kind: &RemoteOpFileKind,
3080 3456 : op_kind: &RemoteOpKind,
3081 3456 : ) -> IntCounter {
3082 3456 : let mut guard = self.bytes_started_counter.lock().unwrap();
3083 3456 : let key = (file_kind.as_str(), op_kind.as_str());
3084 3456 : let metric = guard.entry(key).or_insert_with(move || {
3085 644 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3086 644 : .get_metric_with_label_values(&[
3087 644 : &self.tenant_id,
3088 644 : &self.shard_id,
3089 644 : &self.timeline_id,
3090 644 : key.0,
3091 644 : key.1,
3092 644 : ])
3093 644 : .unwrap()
3094 3456 : });
3095 3456 : metric.clone()
3096 3456 : }
3097 :
3098 6503 : fn bytes_finished_counter(
3099 6503 : &self,
3100 6503 : file_kind: &RemoteOpFileKind,
3101 6503 : op_kind: &RemoteOpKind,
3102 6503 : ) -> IntCounter {
3103 6503 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3104 6503 : let key = (file_kind.as_str(), op_kind.as_str());
3105 6503 : let metric = guard.entry(key).or_insert_with(move || {
3106 644 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3107 644 : .get_metric_with_label_values(&[
3108 644 : &self.tenant_id,
3109 644 : &self.shard_id,
3110 644 : &self.timeline_id,
3111 644 : key.0,
3112 644 : key.1,
3113 644 : ])
3114 644 : .unwrap()
3115 6503 : });
3116 6503 : metric.clone()
3117 6503 : }
3118 : }
3119 :
3120 : #[cfg(test)]
3121 : impl RemoteTimelineClientMetrics {
3122 12 : pub fn get_bytes_started_counter_value(
3123 12 : &self,
3124 12 : file_kind: &RemoteOpFileKind,
3125 12 : op_kind: &RemoteOpKind,
3126 12 : ) -> Option<u64> {
3127 12 : let guard = self.bytes_started_counter.lock().unwrap();
3128 12 : let key = (file_kind.as_str(), op_kind.as_str());
3129 12 : guard.get(&key).map(|counter| counter.get())
3130 12 : }
3131 :
3132 12 : pub fn get_bytes_finished_counter_value(
3133 12 : &self,
3134 12 : file_kind: &RemoteOpFileKind,
3135 12 : op_kind: &RemoteOpKind,
3136 12 : ) -> Option<u64> {
3137 12 : let guard = self.bytes_finished_counter.lock().unwrap();
3138 12 : let key = (file_kind.as_str(), op_kind.as_str());
3139 12 : guard.get(&key).map(|counter| counter.get())
3140 12 : }
3141 : }
3142 :
3143 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3144 : #[must_use]
3145 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3146 : /// Decremented on drop.
3147 : calls_counter_pair: Option<IntCounterPair>,
3148 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3149 : bytes_finished: Option<(IntCounter, u64)>,
3150 : }
3151 :
3152 : impl RemoteTimelineClientCallMetricGuard {
3153 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3154 : /// The caller vouches to do the metric updates manually.
3155 7560 : pub fn will_decrement_manually(mut self) {
3156 7560 : let RemoteTimelineClientCallMetricGuard {
3157 7560 : calls_counter_pair,
3158 7560 : bytes_finished,
3159 7560 : } = &mut self;
3160 7560 : calls_counter_pair.take();
3161 7560 : bytes_finished.take();
3162 7560 : }
3163 : }
3164 :
3165 : impl Drop for RemoteTimelineClientCallMetricGuard {
3166 7612 : fn drop(&mut self) {
3167 7612 : let RemoteTimelineClientCallMetricGuard {
3168 7612 : calls_counter_pair,
3169 7612 : bytes_finished,
3170 7612 : } = self;
3171 7612 : if let Some(guard) = calls_counter_pair.take() {
3172 52 : guard.dec();
3173 7560 : }
3174 7612 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3175 0 : bytes_finished_metric.inc_by(*value);
3176 7612 : }
3177 7612 : }
3178 : }
3179 :
3180 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3181 : /// track the byte size of this call in applicable metric(s).
3182 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3183 : /// Do not account for this call's byte size in any metrics.
3184 : /// The `reason` field is there to make the call sites self-documenting
3185 : /// about why they don't need the metric.
3186 : DontTrackSize { reason: &'static str },
3187 : /// Track the byte size of the call in applicable metric(s).
3188 : Bytes(u64),
3189 : }
3190 :
3191 : impl RemoteTimelineClientMetrics {
3192 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3193 : ///
3194 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3195 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3196 : /// is more suitable.
3197 : /// Never do both.
3198 7612 : pub(crate) fn call_begin(
3199 7612 : &self,
3200 7612 : file_kind: &RemoteOpFileKind,
3201 7612 : op_kind: &RemoteOpKind,
3202 7612 : size: RemoteTimelineClientMetricsCallTrackSize,
3203 7612 : ) -> RemoteTimelineClientCallMetricGuard {
3204 7612 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3205 7612 : calls_counter_pair.inc();
3206 :
3207 7612 : let bytes_finished = match size {
3208 4156 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3209 4156 : // nothing to do
3210 4156 : None
3211 : }
3212 3456 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3213 3456 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3214 3456 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3215 3456 : Some((finished_counter, size))
3216 : }
3217 : };
3218 7612 : RemoteTimelineClientCallMetricGuard {
3219 7612 : calls_counter_pair: Some(calls_counter_pair),
3220 7612 : bytes_finished,
3221 7612 : }
3222 7612 : }
3223 :
3224 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3225 : /// Using the guard object is generally preferable.
3226 : /// See [`call_begin`](Self::call_begin) for more context.
3227 6658 : pub(crate) fn call_end(
3228 6658 : &self,
3229 6658 : file_kind: &RemoteOpFileKind,
3230 6658 : op_kind: &RemoteOpKind,
3231 6658 : size: RemoteTimelineClientMetricsCallTrackSize,
3232 6658 : ) {
3233 6658 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3234 6658 : calls_counter_pair.dec();
3235 6658 : match size {
3236 3611 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3237 3047 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3238 3047 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3239 3047 : }
3240 : }
3241 6658 : }
3242 : }
3243 :
3244 : impl Drop for RemoteTimelineClientMetrics {
3245 40 : fn drop(&mut self) {
3246 40 : let RemoteTimelineClientMetrics {
3247 40 : tenant_id,
3248 40 : shard_id,
3249 40 : timeline_id,
3250 40 : remote_physical_size_gauge,
3251 40 : calls,
3252 40 : bytes_started_counter,
3253 40 : bytes_finished_counter,
3254 40 : projected_remote_consistent_lsn_gauge,
3255 40 : } = self;
3256 48 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3257 48 : let mut res = [Ok(()), Ok(())];
3258 48 : REMOTE_TIMELINE_CLIENT_CALLS
3259 48 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3260 48 : // don't care about results
3261 48 : }
3262 40 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3263 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3264 12 : tenant_id,
3265 12 : shard_id,
3266 12 : timeline_id,
3267 12 : a,
3268 12 : b,
3269 12 : ]);
3270 12 : }
3271 40 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3272 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3273 12 : tenant_id,
3274 12 : shard_id,
3275 12 : timeline_id,
3276 12 : a,
3277 12 : b,
3278 12 : ]);
3279 12 : }
3280 40 : {
3281 40 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3282 40 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3283 40 : }
3284 40 : {
3285 40 : let _ = projected_remote_consistent_lsn_gauge;
3286 40 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3287 40 : tenant_id,
3288 40 : shard_id,
3289 40 : timeline_id,
3290 40 : ]);
3291 40 : }
3292 40 : }
3293 : }
3294 :
3295 : /// Wrapper future that measures the time spent by a remote storage operation,
3296 : /// and records the time and success/failure as a prometheus metric.
3297 : pub(crate) trait MeasureRemoteOp: Sized {
3298 6335 : fn measure_remote_op(
3299 6335 : self,
3300 6335 : file_kind: RemoteOpFileKind,
3301 6335 : op: RemoteOpKind,
3302 6335 : metrics: Arc<RemoteTimelineClientMetrics>,
3303 6335 : ) -> MeasuredRemoteOp<Self> {
3304 6335 : let start = Instant::now();
3305 6335 : MeasuredRemoteOp {
3306 6335 : inner: self,
3307 6335 : file_kind,
3308 6335 : op,
3309 6335 : start,
3310 6335 : metrics,
3311 6335 : }
3312 6335 : }
3313 : }
3314 :
3315 : impl<T: Sized> MeasureRemoteOp for T {}
3316 :
3317 : pin_project! {
3318 : pub(crate) struct MeasuredRemoteOp<F>
3319 : {
3320 : #[pin]
3321 : inner: F,
3322 : file_kind: RemoteOpFileKind,
3323 : op: RemoteOpKind,
3324 : start: Instant,
3325 : metrics: Arc<RemoteTimelineClientMetrics>,
3326 : }
3327 : }
3328 :
3329 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
3330 : type Output = Result<O, E>;
3331 :
3332 91890 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
3333 91890 : let this = self.project();
3334 91890 : let poll_result = this.inner.poll(cx);
3335 91890 : if let Poll::Ready(ref res) = poll_result {
3336 6042 : let duration = this.start.elapsed();
3337 6042 : let status = if res.is_ok() { &"success" } else { &"failure" };
3338 6042 : this.metrics
3339 6042 : .remote_operation_time(this.file_kind, this.op, status)
3340 6042 : .observe(duration.as_secs_f64());
3341 85848 : }
3342 91890 : poll_result
3343 91890 : }
3344 : }
3345 :
3346 : pub mod tokio_epoll_uring {
3347 : use std::{
3348 : collections::HashMap,
3349 : sync::{Arc, Mutex},
3350 : };
3351 :
3352 : use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
3353 : use once_cell::sync::Lazy;
3354 :
3355 : /// Shared storage for tokio-epoll-uring thread local metrics.
3356 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3357 232 : Lazy::new(|| {
3358 232 : let slots_submission_queue_depth = register_histogram!(
3359 232 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3360 232 : "The slots waiters queue depth of each tokio_epoll_uring system",
3361 232 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
3362 232 : )
3363 232 : .expect("failed to define a metric");
3364 232 : ThreadLocalMetricsStorage {
3365 232 : observers: Mutex::new(HashMap::new()),
3366 232 : slots_submission_queue_depth,
3367 232 : }
3368 232 : });
3369 :
3370 : pub struct ThreadLocalMetricsStorage {
3371 : /// List of thread local metrics observers.
3372 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3373 : /// A histogram shared between all thread local systems
3374 : /// for collecting slots submission queue depth.
3375 : slots_submission_queue_depth: Histogram,
3376 : }
3377 :
3378 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3379 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3380 : ///
3381 : /// The System makes observations into [`Self`] and periodically, the collector
3382 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3383 : ///
3384 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3385 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3386 : /// for cache coherence protocol to get an exclusive cache line.
3387 : pub struct ThreadLocalMetrics {
3388 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3389 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3390 : }
3391 :
3392 : impl ThreadLocalMetricsStorage {
3393 : /// Registers a new thread local system. Returns a thread local metrics observer.
3394 1045 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3395 1045 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3396 1045 : self.slots_submission_queue_depth.local(),
3397 1045 : ));
3398 1045 : let mut g = self.observers.lock().unwrap();
3399 1045 : g.insert(id, Arc::clone(&per_system_metrics));
3400 1045 : per_system_metrics
3401 1045 : }
3402 :
3403 : /// Removes metrics observer for a thread local system.
3404 : /// This should be called before dropping a thread local system.
3405 232 : pub fn remove_system(&self, id: u64) {
3406 232 : let mut g = self.observers.lock().unwrap();
3407 232 : g.remove(&id);
3408 232 : }
3409 :
3410 : /// Flush all thread local metrics to the shared storage.
3411 0 : pub fn flush_thread_local_metrics(&self) {
3412 0 : let g = self.observers.lock().unwrap();
3413 0 : g.values().for_each(|local| {
3414 0 : local.flush();
3415 0 : });
3416 0 : }
3417 : }
3418 :
3419 : impl ThreadLocalMetrics {
3420 1045 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3421 1045 : ThreadLocalMetrics {
3422 1045 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3423 1045 : }
3424 1045 : }
3425 :
3426 : /// Flushes the thread local metrics to shared aggregator.
3427 0 : pub fn flush(&self) {
3428 0 : let Self {
3429 0 : slots_submission_queue_depth,
3430 0 : } = self;
3431 0 : slots_submission_queue_depth.lock().unwrap().flush();
3432 0 : }
3433 : }
3434 :
3435 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3436 1819112 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3437 1819112 : let Self {
3438 1819112 : slots_submission_queue_depth,
3439 1819112 : } = self;
3440 1819112 : slots_submission_queue_depth
3441 1819112 : .lock()
3442 1819112 : .unwrap()
3443 1819112 : .observe(queue_depth as f64);
3444 1819112 : }
3445 : }
3446 :
3447 : pub struct Collector {
3448 : descs: Vec<metrics::core::Desc>,
3449 : systems_created: UIntGauge,
3450 : systems_destroyed: UIntGauge,
3451 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3452 : }
3453 :
3454 : impl metrics::core::Collector for Collector {
3455 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3456 0 : self.descs.iter().collect()
3457 0 : }
3458 :
3459 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3460 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3461 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3462 0 : systems_created,
3463 0 : systems_destroyed,
3464 0 : } = tokio_epoll_uring::metrics::global();
3465 0 : self.systems_created.set(systems_created);
3466 0 : mfs.extend(self.systems_created.collect());
3467 0 : self.systems_destroyed.set(systems_destroyed);
3468 0 : mfs.extend(self.systems_destroyed.collect());
3469 0 :
3470 0 : self.thread_local_metrics_storage
3471 0 : .flush_thread_local_metrics();
3472 0 :
3473 0 : mfs.extend(
3474 0 : self.thread_local_metrics_storage
3475 0 : .slots_submission_queue_depth
3476 0 : .collect(),
3477 0 : );
3478 0 : mfs
3479 0 : }
3480 : }
3481 :
3482 : impl Collector {
3483 : const NMETRICS: usize = 3;
3484 :
3485 : #[allow(clippy::new_without_default)]
3486 0 : pub fn new() -> Self {
3487 0 : let mut descs = Vec::new();
3488 0 :
3489 0 : let systems_created = UIntGauge::new(
3490 0 : "pageserver_tokio_epoll_uring_systems_created",
3491 0 : "counter of tokio-epoll-uring systems that were created",
3492 0 : )
3493 0 : .unwrap();
3494 0 : descs.extend(
3495 0 : metrics::core::Collector::desc(&systems_created)
3496 0 : .into_iter()
3497 0 : .cloned(),
3498 0 : );
3499 0 :
3500 0 : let systems_destroyed = UIntGauge::new(
3501 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
3502 0 : "counter of tokio-epoll-uring systems that were destroyed",
3503 0 : )
3504 0 : .unwrap();
3505 0 : descs.extend(
3506 0 : metrics::core::Collector::desc(&systems_destroyed)
3507 0 : .into_iter()
3508 0 : .cloned(),
3509 0 : );
3510 0 :
3511 0 : Self {
3512 0 : descs,
3513 0 : systems_created,
3514 0 : systems_destroyed,
3515 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
3516 0 : }
3517 0 : }
3518 : }
3519 :
3520 232 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3521 232 : register_int_counter!(
3522 232 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
3523 232 : "Number of times where thread_local_system creation spanned multiple executor threads",
3524 232 : )
3525 232 : .unwrap()
3526 232 : });
3527 :
3528 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3529 0 : register_int_counter!(
3530 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
3531 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
3532 0 : )
3533 0 : .unwrap()
3534 0 : });
3535 : }
3536 :
3537 : pub(crate) mod tenant_throttling {
3538 : use metrics::{register_int_counter_vec, IntCounter};
3539 : use once_cell::sync::Lazy;
3540 : use utils::shard::TenantShardId;
3541 :
3542 : pub(crate) struct GlobalAndPerTenantIntCounter {
3543 : global: IntCounter,
3544 : per_tenant: IntCounter,
3545 : }
3546 :
3547 : impl GlobalAndPerTenantIntCounter {
3548 : #[inline(always)]
3549 0 : pub(crate) fn inc(&self) {
3550 0 : self.inc_by(1)
3551 0 : }
3552 : #[inline(always)]
3553 0 : pub(crate) fn inc_by(&self, n: u64) {
3554 0 : self.global.inc_by(n);
3555 0 : self.per_tenant.inc_by(n);
3556 0 : }
3557 : }
3558 :
3559 : pub(crate) struct Metrics<const KIND: usize> {
3560 : pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
3561 : pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,
3562 : pub(super) wait_time: GlobalAndPerTenantIntCounter,
3563 : pub(super) count_throttled: GlobalAndPerTenantIntCounter,
3564 : }
3565 :
3566 404 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3567 404 : register_int_counter_vec!(
3568 404 : "pageserver_tenant_throttling_count_accounted_start_global",
3569 404 : "Count of tenant throttling starts, by kind of throttle.",
3570 404 : &["kind"]
3571 404 : )
3572 404 : .unwrap()
3573 404 : });
3574 404 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3575 404 : register_int_counter_vec!(
3576 404 : "pageserver_tenant_throttling_count_accounted_start",
3577 404 : "Count of tenant throttling starts, by kind of throttle.",
3578 404 : &["kind", "tenant_id", "shard_id"]
3579 404 : )
3580 404 : .unwrap()
3581 404 : });
3582 404 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3583 404 : register_int_counter_vec!(
3584 404 : "pageserver_tenant_throttling_count_accounted_finish_global",
3585 404 : "Count of tenant throttling finishes, by kind of throttle.",
3586 404 : &["kind"]
3587 404 : )
3588 404 : .unwrap()
3589 404 : });
3590 404 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3591 404 : register_int_counter_vec!(
3592 404 : "pageserver_tenant_throttling_count_accounted_finish",
3593 404 : "Count of tenant throttling finishes, by kind of throttle.",
3594 404 : &["kind", "tenant_id", "shard_id"]
3595 404 : )
3596 404 : .unwrap()
3597 404 : });
3598 404 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3599 404 : register_int_counter_vec!(
3600 404 : "pageserver_tenant_throttling_wait_usecs_sum_global",
3601 404 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3602 404 : &["kind"]
3603 404 : )
3604 404 : .unwrap()
3605 404 : });
3606 404 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3607 404 : register_int_counter_vec!(
3608 404 : "pageserver_tenant_throttling_wait_usecs_sum",
3609 404 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3610 404 : &["kind", "tenant_id", "shard_id"]
3611 404 : )
3612 404 : .unwrap()
3613 404 : });
3614 :
3615 404 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3616 404 : register_int_counter_vec!(
3617 404 : "pageserver_tenant_throttling_count_global",
3618 404 : "Count of tenant throttlings, by kind of throttle.",
3619 404 : &["kind"]
3620 404 : )
3621 404 : .unwrap()
3622 404 : });
3623 404 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3624 404 : register_int_counter_vec!(
3625 404 : "pageserver_tenant_throttling_count",
3626 404 : "Count of tenant throttlings, by kind of throttle.",
3627 404 : &["kind", "tenant_id", "shard_id"]
3628 404 : )
3629 404 : .unwrap()
3630 404 : });
3631 :
3632 : const KINDS: &[&str] = &["pagestream"];
3633 : pub type Pagestream = Metrics<0>;
3634 :
3635 : impl<const KIND: usize> Metrics<KIND> {
3636 440 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
3637 440 : let per_tenant_label_values = &[
3638 440 : KINDS[KIND],
3639 440 : &tenant_shard_id.tenant_id.to_string(),
3640 440 : &tenant_shard_id.shard_slug().to_string(),
3641 440 : ];
3642 440 : Metrics {
3643 440 : count_accounted_start: {
3644 440 : GlobalAndPerTenantIntCounter {
3645 440 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
3646 440 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
3647 440 : .with_label_values(per_tenant_label_values),
3648 440 : }
3649 440 : },
3650 440 : count_accounted_finish: {
3651 440 : GlobalAndPerTenantIntCounter {
3652 440 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
3653 440 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
3654 440 : .with_label_values(per_tenant_label_values),
3655 440 : }
3656 440 : },
3657 440 : wait_time: {
3658 440 : GlobalAndPerTenantIntCounter {
3659 440 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
3660 440 : per_tenant: WAIT_USECS_PER_TENANT
3661 440 : .with_label_values(per_tenant_label_values),
3662 440 : }
3663 440 : },
3664 440 : count_throttled: {
3665 440 : GlobalAndPerTenantIntCounter {
3666 440 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
3667 440 : per_tenant: WAIT_COUNT_PER_TENANT
3668 440 : .with_label_values(per_tenant_label_values),
3669 440 : }
3670 440 : },
3671 440 : }
3672 440 : }
3673 : }
3674 :
3675 0 : pub(crate) fn preinitialize_global_metrics() {
3676 0 : Lazy::force(&COUNT_ACCOUNTED_START);
3677 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
3678 0 : Lazy::force(&WAIT_USECS);
3679 0 : Lazy::force(&WAIT_COUNT);
3680 0 : }
3681 :
3682 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3683 48 : for m in &[
3684 12 : &COUNT_ACCOUNTED_START_PER_TENANT,
3685 12 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
3686 12 : &WAIT_USECS_PER_TENANT,
3687 12 : &WAIT_COUNT_PER_TENANT,
3688 12 : ] {
3689 96 : for kind in KINDS {
3690 48 : let _ = m.remove_label_values(&[
3691 48 : kind,
3692 48 : &tenant_shard_id.tenant_id.to_string(),
3693 48 : &tenant_shard_id.shard_slug().to_string(),
3694 48 : ]);
3695 48 : }
3696 : }
3697 12 : }
3698 : }
3699 :
3700 : pub(crate) mod disk_usage_based_eviction {
3701 : use super::*;
3702 :
3703 : pub(crate) struct Metrics {
3704 : pub(crate) tenant_collection_time: Histogram,
3705 : pub(crate) tenant_layer_count: Histogram,
3706 : pub(crate) layers_collected: IntCounter,
3707 : pub(crate) layers_selected: IntCounter,
3708 : pub(crate) layers_evicted: IntCounter,
3709 : }
3710 :
3711 : impl Default for Metrics {
3712 0 : fn default() -> Self {
3713 0 : let tenant_collection_time = register_histogram!(
3714 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
3715 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
3716 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
3717 0 : )
3718 0 : .unwrap();
3719 0 :
3720 0 : let tenant_layer_count = register_histogram!(
3721 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
3722 0 : "Amount of layers gathered from a tenant",
3723 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
3724 0 : )
3725 0 : .unwrap();
3726 0 :
3727 0 : let layers_collected = register_int_counter!(
3728 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
3729 0 : "Amount of layers collected"
3730 0 : )
3731 0 : .unwrap();
3732 0 :
3733 0 : let layers_selected = register_int_counter!(
3734 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
3735 0 : "Amount of layers selected"
3736 0 : )
3737 0 : .unwrap();
3738 0 :
3739 0 : let layers_evicted = register_int_counter!(
3740 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
3741 0 : "Amount of layers successfully evicted"
3742 0 : )
3743 0 : .unwrap();
3744 0 :
3745 0 : Self {
3746 0 : tenant_collection_time,
3747 0 : tenant_layer_count,
3748 0 : layers_collected,
3749 0 : layers_selected,
3750 0 : layers_evicted,
3751 0 : }
3752 0 : }
3753 : }
3754 :
3755 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
3756 : }
3757 :
3758 392 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
3759 392 : register_uint_gauge_vec!(
3760 392 : "pageserver_tokio_executor_thread_configured_count",
3761 392 : "Total number of configued tokio executor threads in the process.
3762 392 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
3763 392 : &["setup"],
3764 392 : )
3765 392 : .unwrap()
3766 392 : });
3767 :
3768 392 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
3769 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
3770 392 : let _guard = SERIALIZE.lock().unwrap();
3771 392 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
3772 392 : TOKIO_EXECUTOR_THREAD_COUNT
3773 392 : .get_metric_with_label_values(&[setup])
3774 392 : .unwrap()
3775 392 : .set(u64::try_from(num_threads.get()).unwrap());
3776 392 : }
3777 :
3778 0 : pub fn preinitialize_metrics(conf: &'static PageServerConf) {
3779 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
3780 0 :
3781 0 : // Python tests need these and on some we do alerting.
3782 0 : //
3783 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
3784 0 : // order:
3785 0 : // - global metrics reside in a Lazy<PageserverMetrics>
3786 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
3787 0 : // - could move the statics into TimelineMetrics::new()?
3788 0 :
3789 0 : // counters
3790 0 : [
3791 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
3792 0 : &WALRECEIVER_STARTED_CONNECTIONS,
3793 0 : &WALRECEIVER_BROKER_UPDATES,
3794 0 : &WALRECEIVER_CANDIDATES_ADDED,
3795 0 : &WALRECEIVER_CANDIDATES_REMOVED,
3796 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
3797 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
3798 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
3799 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
3800 0 : &CIRCUIT_BREAKERS_BROKEN,
3801 0 : &CIRCUIT_BREAKERS_UNBROKEN,
3802 0 : &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
3803 0 : ]
3804 0 : .into_iter()
3805 0 : .for_each(|c| {
3806 0 : Lazy::force(c);
3807 0 : });
3808 0 :
3809 0 : // Deletion queue stats
3810 0 : Lazy::force(&DELETION_QUEUE);
3811 0 :
3812 0 : // Tenant stats
3813 0 : Lazy::force(&TENANT);
3814 0 :
3815 0 : // Tenant manager stats
3816 0 : Lazy::force(&TENANT_MANAGER);
3817 0 :
3818 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
3819 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
3820 :
3821 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
3822 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
3823 0 : // values from last restart.
3824 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
3825 0 : }
3826 :
3827 : // countervecs
3828 0 : [
3829 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
3830 0 : &SMGR_QUERY_STARTED_GLOBAL,
3831 0 : ]
3832 0 : .into_iter()
3833 0 : .for_each(|c| {
3834 0 : Lazy::force(c);
3835 0 : });
3836 0 :
3837 0 : // gauges
3838 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
3839 0 :
3840 0 : // histograms
3841 0 : [
3842 0 : &VEC_READ_NUM_LAYERS_VISITED,
3843 0 : &WAIT_LSN_TIME,
3844 0 : &WAL_REDO_TIME,
3845 0 : &WAL_REDO_RECORDS_HISTOGRAM,
3846 0 : &WAL_REDO_BYTES_HISTOGRAM,
3847 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
3848 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
3849 0 : &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
3850 0 : ]
3851 0 : .into_iter()
3852 0 : .for_each(|h| {
3853 0 : Lazy::force(h);
3854 0 : });
3855 0 :
3856 0 : // Custom
3857 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
3858 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
3859 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
3860 0 :
3861 0 : tenant_throttling::preinitialize_global_metrics();
3862 0 : }
|