Line data Source code
1 : use enum_map::EnumMap;
2 : use metrics::{
3 : register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
4 : register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
5 : register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6 : Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
7 : IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
8 : };
9 : use once_cell::sync::Lazy;
10 : use pageserver_api::config::{
11 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined,
12 : PageServiceProtocolPipelinedExecutionStrategy,
13 : };
14 : use pageserver_api::shard::TenantShardId;
15 : use postgres_backend::{is_expected_io_error, QueryError};
16 : use pq_proto::framed::ConnectionError;
17 : use strum::{EnumCount, VariantNames};
18 : use strum_macros::{IntoStaticStr, VariantNames};
19 : use utils::id::TimelineId;
20 :
21 : /// Prometheus histogram buckets (in seconds) for operations in the critical
22 : /// path. In other words, operations that directly affect that latency of user
23 : /// queries.
24 : ///
25 : /// The buckets capture the majority of latencies in the microsecond and
26 : /// millisecond range but also extend far enough up to distinguish "bad" from
27 : /// "really bad".
28 : const CRITICAL_OP_BUCKETS: &[f64] = &[
29 : 0.000_001, 0.000_010, 0.000_100, // 1 us, 10 us, 100 us
30 : 0.001_000, 0.010_000, 0.100_000, // 1 ms, 10 ms, 100 ms
31 : 1.0, 10.0, 100.0, // 1 s, 10 s, 100 s
32 : ];
33 :
34 : // Metrics collected on operations on the storage repository.
35 : #[derive(Debug, VariantNames, IntoStaticStr)]
36 : #[strum(serialize_all = "kebab_case")]
37 : pub(crate) enum StorageTimeOperation {
38 : #[strum(serialize = "layer flush")]
39 : LayerFlush,
40 :
41 : #[strum(serialize = "layer flush delay")]
42 : LayerFlushDelay,
43 :
44 : #[strum(serialize = "compact")]
45 : Compact,
46 :
47 : #[strum(serialize = "create images")]
48 : CreateImages,
49 :
50 : #[strum(serialize = "logical size")]
51 : LogicalSize,
52 :
53 : #[strum(serialize = "imitate logical size")]
54 : ImitateLogicalSize,
55 :
56 : #[strum(serialize = "load layer map")]
57 : LoadLayerMap,
58 :
59 : #[strum(serialize = "gc")]
60 : Gc,
61 :
62 : #[strum(serialize = "find gc cutoffs")]
63 : FindGcCutoffs,
64 : }
65 :
66 400 : pub(crate) static STORAGE_TIME_SUM_PER_TIMELINE: Lazy<CounterVec> = Lazy::new(|| {
67 400 : register_counter_vec!(
68 400 : "pageserver_storage_operations_seconds_sum",
69 400 : "Total time spent on storage operations with operation, tenant and timeline dimensions",
70 400 : &["operation", "tenant_id", "shard_id", "timeline_id"],
71 400 : )
72 400 : .expect("failed to define a metric")
73 400 : });
74 :
75 400 : pub(crate) static STORAGE_TIME_COUNT_PER_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
76 400 : register_int_counter_vec!(
77 400 : "pageserver_storage_operations_seconds_count",
78 400 : "Count of storage operations with operation, tenant and timeline dimensions",
79 400 : &["operation", "tenant_id", "shard_id", "timeline_id"],
80 400 : )
81 400 : .expect("failed to define a metric")
82 400 : });
83 :
84 : // Buckets for background operations like compaction, GC, size calculation
85 : const STORAGE_OP_BUCKETS: &[f64] = &[0.010, 0.100, 1.0, 10.0, 100.0, 1000.0];
86 :
87 400 : pub(crate) static STORAGE_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
88 400 : register_histogram_vec!(
89 400 : "pageserver_storage_operations_seconds_global",
90 400 : "Time spent on storage operations",
91 400 : &["operation"],
92 400 : STORAGE_OP_BUCKETS.into(),
93 400 : )
94 400 : .expect("failed to define a metric")
95 400 : });
96 :
97 392 : pub(crate) static VEC_READ_NUM_LAYERS_VISITED: Lazy<Histogram> = Lazy::new(|| {
98 392 : register_histogram!(
99 392 : "pageserver_layers_visited_per_vectored_read_global",
100 392 : "Average number of layers visited to reconstruct one key",
101 392 : vec![1.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
102 392 : )
103 392 : .expect("failed to define a metric")
104 392 : });
105 :
106 0 : pub(crate) static CONCURRENT_INITDBS: Lazy<UIntGauge> = Lazy::new(|| {
107 0 : register_uint_gauge!(
108 0 : "pageserver_concurrent_initdb",
109 0 : "Number of initdb processes running"
110 0 : )
111 0 : .expect("failed to define a metric")
112 0 : });
113 :
114 0 : pub(crate) static INITDB_SEMAPHORE_ACQUISITION_TIME: Lazy<Histogram> = Lazy::new(|| {
115 0 : register_histogram!(
116 0 : "pageserver_initdb_semaphore_seconds_global",
117 0 : "Time spent getting a permit from the global initdb semaphore",
118 0 : STORAGE_OP_BUCKETS.into()
119 0 : )
120 0 : .expect("failed to define metric")
121 0 : });
122 :
123 0 : pub(crate) static INITDB_RUN_TIME: Lazy<Histogram> = Lazy::new(|| {
124 0 : register_histogram!(
125 0 : "pageserver_initdb_seconds_global",
126 0 : "Time spent performing initdb",
127 0 : STORAGE_OP_BUCKETS.into()
128 0 : )
129 0 : .expect("failed to define metric")
130 0 : });
131 :
132 : pub(crate) struct GetVectoredLatency {
133 : map: EnumMap<TaskKind, Option<Histogram>>,
134 : }
135 :
136 : #[allow(dead_code)]
137 : pub(crate) struct ScanLatency {
138 : map: EnumMap<TaskKind, Option<Histogram>>,
139 : }
140 :
141 : impl GetVectoredLatency {
142 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
143 : // cardinality of the metric.
144 : const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
145 :
146 39356 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
147 39356 : self.map[task_kind].as_ref()
148 39356 : }
149 : }
150 :
151 : impl ScanLatency {
152 : // Only these task types perform vectored gets. Filter all other tasks out to reduce total
153 : // cardinality of the metric.
154 : const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
155 :
156 24 : pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
157 24 : self.map[task_kind].as_ref()
158 24 : }
159 : }
160 :
161 : pub(crate) struct ScanLatencyOngoingRecording<'a> {
162 : parent: &'a Histogram,
163 : start: std::time::Instant,
164 : }
165 :
166 : impl<'a> ScanLatencyOngoingRecording<'a> {
167 0 : pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
168 0 : let start = Instant::now();
169 0 : ScanLatencyOngoingRecording { parent, start }
170 0 : }
171 :
172 0 : pub(crate) fn observe(self) {
173 0 : let elapsed = self.start.elapsed();
174 0 : self.parent.observe(elapsed.as_secs_f64());
175 0 : }
176 : }
177 :
178 384 : pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
179 384 : let inner = register_histogram_vec!(
180 384 : "pageserver_get_vectored_seconds",
181 384 : "Time spent in get_vectored.",
182 384 : &["task_kind"],
183 384 : CRITICAL_OP_BUCKETS.into(),
184 384 : )
185 384 : .expect("failed to define a metric");
186 384 :
187 384 : GetVectoredLatency {
188 11904 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
189 11904 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
190 11904 :
191 11904 : if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
192 768 : let task_kind = task_kind.into();
193 768 : Some(inner.with_label_values(&[task_kind]))
194 : } else {
195 11136 : None
196 : }
197 11904 : })),
198 384 : }
199 384 : });
200 :
201 8 : pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
202 8 : let inner = register_histogram_vec!(
203 8 : "pageserver_scan_seconds",
204 8 : "Time spent in scan.",
205 8 : &["task_kind"],
206 8 : CRITICAL_OP_BUCKETS.into(),
207 8 : )
208 8 : .expect("failed to define a metric");
209 8 :
210 8 : ScanLatency {
211 248 : map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
212 248 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
213 248 :
214 248 : if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
215 8 : let task_kind = task_kind.into();
216 8 : Some(inner.with_label_values(&[task_kind]))
217 : } else {
218 240 : None
219 : }
220 248 : })),
221 8 : }
222 8 : });
223 :
224 : pub(crate) struct PageCacheMetricsForTaskKind {
225 : pub read_accesses_immutable: IntCounter,
226 : pub read_hits_immutable: IntCounter,
227 : }
228 :
229 : pub(crate) struct PageCacheMetrics {
230 : map: EnumMap<TaskKind, EnumMap<PageContentKind, PageCacheMetricsForTaskKind>>,
231 : }
232 :
233 184 : static PAGE_CACHE_READ_HITS: Lazy<IntCounterVec> = Lazy::new(|| {
234 184 : register_int_counter_vec!(
235 184 : "pageserver_page_cache_read_hits_total",
236 184 : "Number of read accesses to the page cache that hit",
237 184 : &["task_kind", "key_kind", "content_kind", "hit_kind"]
238 184 : )
239 184 : .expect("failed to define a metric")
240 184 : });
241 :
242 184 : static PAGE_CACHE_READ_ACCESSES: Lazy<IntCounterVec> = Lazy::new(|| {
243 184 : register_int_counter_vec!(
244 184 : "pageserver_page_cache_read_accesses_total",
245 184 : "Number of read accesses to the page cache",
246 184 : &["task_kind", "key_kind", "content_kind"]
247 184 : )
248 184 : .expect("failed to define a metric")
249 184 : });
250 :
251 184 : pub(crate) static PAGE_CACHE: Lazy<PageCacheMetrics> = Lazy::new(|| PageCacheMetrics {
252 5704 : map: EnumMap::from_array(std::array::from_fn(|task_kind| {
253 5704 : let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind);
254 5704 : let task_kind: &'static str = task_kind.into();
255 45632 : EnumMap::from_array(std::array::from_fn(|content_kind| {
256 45632 : let content_kind = <PageContentKind as enum_map::Enum>::from_usize(content_kind);
257 45632 : let content_kind: &'static str = content_kind.into();
258 45632 : PageCacheMetricsForTaskKind {
259 45632 : read_accesses_immutable: {
260 45632 : PAGE_CACHE_READ_ACCESSES
261 45632 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind])
262 45632 : .unwrap()
263 45632 : },
264 45632 :
265 45632 : read_hits_immutable: {
266 45632 : PAGE_CACHE_READ_HITS
267 45632 : .get_metric_with_label_values(&[task_kind, "immutable", content_kind, "-"])
268 45632 : .unwrap()
269 45632 : },
270 45632 : }
271 45632 : }))
272 5704 : })),
273 184 : });
274 :
275 : impl PageCacheMetrics {
276 1956046 : pub(crate) fn for_ctx(&self, ctx: &RequestContext) -> &PageCacheMetricsForTaskKind {
277 1956046 : &self.map[ctx.task_kind()][ctx.page_content_kind()]
278 1956046 : }
279 : }
280 :
281 : pub(crate) struct PageCacheSizeMetrics {
282 : pub max_bytes: UIntGauge,
283 :
284 : pub current_bytes_immutable: UIntGauge,
285 : }
286 :
287 184 : static PAGE_CACHE_SIZE_CURRENT_BYTES: Lazy<UIntGaugeVec> = Lazy::new(|| {
288 184 : register_uint_gauge_vec!(
289 184 : "pageserver_page_cache_size_current_bytes",
290 184 : "Current size of the page cache in bytes, by key kind",
291 184 : &["key_kind"]
292 184 : )
293 184 : .expect("failed to define a metric")
294 184 : });
295 :
296 : pub(crate) static PAGE_CACHE_SIZE: Lazy<PageCacheSizeMetrics> =
297 184 : Lazy::new(|| PageCacheSizeMetrics {
298 184 : max_bytes: {
299 184 : register_uint_gauge!(
300 184 : "pageserver_page_cache_size_max_bytes",
301 184 : "Maximum size of the page cache in bytes"
302 184 : )
303 184 : .expect("failed to define a metric")
304 184 : },
305 184 : current_bytes_immutable: {
306 184 : PAGE_CACHE_SIZE_CURRENT_BYTES
307 184 : .get_metric_with_label_values(&["immutable"])
308 184 : .unwrap()
309 184 : },
310 184 : });
311 :
312 : pub(crate) mod page_cache_eviction_metrics {
313 : use std::num::NonZeroUsize;
314 :
315 : use metrics::{register_int_counter_vec, IntCounter, IntCounterVec};
316 : use once_cell::sync::Lazy;
317 :
318 : #[derive(Clone, Copy)]
319 : pub(crate) enum Outcome {
320 : FoundSlotUnused { iters: NonZeroUsize },
321 : FoundSlotEvicted { iters: NonZeroUsize },
322 : ItersExceeded { iters: NonZeroUsize },
323 : }
324 :
325 184 : static ITERS_TOTAL_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
326 184 : register_int_counter_vec!(
327 184 : "pageserver_page_cache_find_victim_iters_total",
328 184 : "Counter for the number of iterations in the find_victim loop",
329 184 : &["outcome"],
330 184 : )
331 184 : .expect("failed to define a metric")
332 184 : });
333 :
334 184 : static CALLS_VEC: Lazy<IntCounterVec> = Lazy::new(|| {
335 184 : register_int_counter_vec!(
336 184 : "pageserver_page_cache_find_victim_calls",
337 184 : "Incremented at the end of each find_victim() call.\
338 184 : Filter by outcome to get e.g., eviction rate.",
339 184 : &["outcome"]
340 184 : )
341 184 : .unwrap()
342 184 : });
343 :
344 64243 : pub(crate) fn observe(outcome: Outcome) {
345 : macro_rules! dry {
346 : ($label:literal, $iters:expr) => {{
347 : static LABEL: &'static str = $label;
348 : static ITERS_TOTAL: Lazy<IntCounter> =
349 224 : Lazy::new(|| ITERS_TOTAL_VEC.with_label_values(&[LABEL]));
350 : static CALLS: Lazy<IntCounter> =
351 224 : Lazy::new(|| CALLS_VEC.with_label_values(&[LABEL]));
352 : ITERS_TOTAL.inc_by(($iters.get()) as u64);
353 : CALLS.inc();
354 : }};
355 : }
356 64243 : match outcome {
357 3272 : Outcome::FoundSlotUnused { iters } => dry!("found_empty", iters),
358 60971 : Outcome::FoundSlotEvicted { iters } => {
359 60971 : dry!("found_evicted", iters)
360 : }
361 0 : Outcome::ItersExceeded { iters } => {
362 0 : dry!("err_iters_exceeded", iters);
363 0 : super::page_cache_errors_inc(super::PageCacheErrorKind::EvictIterLimit);
364 0 : }
365 : }
366 64243 : }
367 : }
368 :
369 0 : static PAGE_CACHE_ERRORS: Lazy<IntCounterVec> = Lazy::new(|| {
370 0 : register_int_counter_vec!(
371 0 : "page_cache_errors_total",
372 0 : "Number of timeouts while acquiring a pinned slot in the page cache",
373 0 : &["error_kind"]
374 0 : )
375 0 : .expect("failed to define a metric")
376 0 : });
377 :
378 : #[derive(IntoStaticStr)]
379 : #[strum(serialize_all = "kebab_case")]
380 : pub(crate) enum PageCacheErrorKind {
381 : AcquirePinnedSlotTimeout,
382 : EvictIterLimit,
383 : }
384 :
385 0 : pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) {
386 0 : PAGE_CACHE_ERRORS
387 0 : .get_metric_with_label_values(&[error_kind.into()])
388 0 : .unwrap()
389 0 : .inc();
390 0 : }
391 :
392 40 : pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
393 40 : register_histogram!(
394 40 : "pageserver_wait_lsn_seconds",
395 40 : "Time spent waiting for WAL to arrive",
396 40 : CRITICAL_OP_BUCKETS.into(),
397 40 : )
398 40 : .expect("failed to define a metric")
399 40 : });
400 :
401 400 : static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
402 400 : register_gauge_vec!(
403 400 : "pageserver_flush_wait_upload_seconds",
404 400 : "Time spent waiting for preceding uploads during layer flush",
405 400 : &["tenant_id", "shard_id", "timeline_id"]
406 400 : )
407 400 : .expect("failed to define a metric")
408 400 : });
409 :
410 400 : static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
411 400 : register_int_gauge_vec!(
412 400 : "pageserver_last_record_lsn",
413 400 : "Last record LSN grouped by timeline",
414 400 : &["tenant_id", "shard_id", "timeline_id"]
415 400 : )
416 400 : .expect("failed to define a metric")
417 400 : });
418 :
419 400 : static DISK_CONSISTENT_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
420 400 : register_int_gauge_vec!(
421 400 : "pageserver_disk_consistent_lsn",
422 400 : "Disk consistent LSN grouped by timeline",
423 400 : &["tenant_id", "shard_id", "timeline_id"]
424 400 : )
425 400 : .expect("failed to define a metric")
426 400 : });
427 :
428 400 : pub(crate) static PROJECTED_REMOTE_CONSISTENT_LSN: Lazy<UIntGaugeVec> = Lazy::new(|| {
429 400 : register_uint_gauge_vec!(
430 400 : "pageserver_projected_remote_consistent_lsn",
431 400 : "Projected remote consistent LSN grouped by timeline",
432 400 : &["tenant_id", "shard_id", "timeline_id"]
433 400 : )
434 400 : .expect("failed to define a metric")
435 400 : });
436 :
437 400 : static PITR_HISTORY_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
438 400 : register_uint_gauge_vec!(
439 400 : "pageserver_pitr_history_size",
440 400 : "Data written since PITR cutoff on this timeline",
441 400 : &["tenant_id", "shard_id", "timeline_id"]
442 400 : )
443 400 : .expect("failed to define a metric")
444 400 : });
445 :
446 0 : #[derive(strum_macros::EnumString, strum_macros::Display, strum_macros::IntoStaticStr)]
447 : #[strum(serialize_all = "kebab_case")]
448 : pub(crate) enum MetricLayerKind {
449 : Delta,
450 : Image,
451 : }
452 :
453 400 : static TIMELINE_LAYER_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
454 400 : register_uint_gauge_vec!(
455 400 : "pageserver_layer_bytes",
456 400 : "Sum of layer physical sizes in bytes",
457 400 : &["tenant_id", "shard_id", "timeline_id", "kind"]
458 400 : )
459 400 : .expect("failed to define a metric")
460 400 : });
461 :
462 400 : static TIMELINE_LAYER_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
463 400 : register_uint_gauge_vec!(
464 400 : "pageserver_layer_count",
465 400 : "Number of layers that exist",
466 400 : &["tenant_id", "shard_id", "timeline_id", "kind"]
467 400 : )
468 400 : .expect("failed to define a metric")
469 400 : });
470 :
471 400 : static TIMELINE_ARCHIVE_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
472 400 : register_uint_gauge_vec!(
473 400 : "pageserver_archive_size",
474 400 : "Timeline's logical size if it is considered eligible for archival (outside PITR window), else zero",
475 400 : &["tenant_id", "shard_id", "timeline_id"]
476 400 : )
477 400 : .expect("failed to define a metric")
478 400 : });
479 :
480 400 : static STANDBY_HORIZON: Lazy<IntGaugeVec> = Lazy::new(|| {
481 400 : register_int_gauge_vec!(
482 400 : "pageserver_standby_horizon",
483 400 : "Standby apply LSN for which GC is hold off, by timeline.",
484 400 : &["tenant_id", "shard_id", "timeline_id"]
485 400 : )
486 400 : .expect("failed to define a metric")
487 400 : });
488 :
489 400 : static RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
490 400 : register_uint_gauge_vec!(
491 400 : "pageserver_resident_physical_size",
492 400 : "The size of the layer files present in the pageserver's filesystem, for attached locations.",
493 400 : &["tenant_id", "shard_id", "timeline_id"]
494 400 : )
495 400 : .expect("failed to define a metric")
496 400 : });
497 :
498 400 : static VISIBLE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
499 400 : register_uint_gauge_vec!(
500 400 : "pageserver_visible_physical_size",
501 400 : "The size of the layer files present in the pageserver's filesystem.",
502 400 : &["tenant_id", "shard_id", "timeline_id"]
503 400 : )
504 400 : .expect("failed to define a metric")
505 400 : });
506 :
507 392 : pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
508 392 : register_uint_gauge!(
509 392 : "pageserver_resident_physical_size_global",
510 392 : "Like `pageserver_resident_physical_size`, but without tenant/timeline dimensions."
511 392 : )
512 392 : .expect("failed to define a metric")
513 392 : });
514 :
515 400 : static REMOTE_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
516 400 : register_uint_gauge_vec!(
517 400 : "pageserver_remote_physical_size",
518 400 : "The size of the layer files present in the remote storage that are listed in the remote index_part.json.",
519 400 : // Corollary: If any files are missing from the index part, they won't be included here.
520 400 : &["tenant_id", "shard_id", "timeline_id"]
521 400 : )
522 400 : .expect("failed to define a metric")
523 400 : });
524 :
525 400 : static REMOTE_PHYSICAL_SIZE_GLOBAL: Lazy<UIntGauge> = Lazy::new(|| {
526 400 : register_uint_gauge!(
527 400 : "pageserver_remote_physical_size_global",
528 400 : "Like `pageserver_remote_physical_size`, but without tenant/timeline dimensions."
529 400 : )
530 400 : .expect("failed to define a metric")
531 400 : });
532 :
533 8 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_LAYERS: Lazy<IntCounter> = Lazy::new(|| {
534 8 : register_int_counter!(
535 8 : "pageserver_remote_ondemand_downloaded_layers_total",
536 8 : "Total on-demand downloaded layers"
537 8 : )
538 8 : .unwrap()
539 8 : });
540 :
541 8 : pub(crate) static REMOTE_ONDEMAND_DOWNLOADED_BYTES: Lazy<IntCounter> = Lazy::new(|| {
542 8 : register_int_counter!(
543 8 : "pageserver_remote_ondemand_downloaded_bytes_total",
544 8 : "Total bytes of layers on-demand downloaded",
545 8 : )
546 8 : .unwrap()
547 8 : });
548 :
549 400 : static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
550 400 : register_uint_gauge_vec!(
551 400 : "pageserver_current_logical_size",
552 400 : "Current logical size grouped by timeline",
553 400 : &["tenant_id", "shard_id", "timeline_id"]
554 400 : )
555 400 : .expect("failed to define current logical size metric")
556 400 : });
557 :
558 400 : static AUX_FILE_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
559 400 : register_int_gauge_vec!(
560 400 : "pageserver_aux_file_estimated_size",
561 400 : "The size of all aux files for a timeline in aux file v2 store.",
562 400 : &["tenant_id", "shard_id", "timeline_id"]
563 400 : )
564 400 : .expect("failed to define a metric")
565 400 : });
566 :
567 400 : static VALID_LSN_LEASE_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
568 400 : register_uint_gauge_vec!(
569 400 : "pageserver_valid_lsn_lease_count",
570 400 : "The number of valid leases after refreshing gc info.",
571 400 : &["tenant_id", "shard_id", "timeline_id"],
572 400 : )
573 400 : .expect("failed to define a metric")
574 400 : });
575 :
576 0 : pub(crate) static CIRCUIT_BREAKERS_BROKEN: Lazy<IntCounter> = Lazy::new(|| {
577 0 : register_int_counter!(
578 0 : "pageserver_circuit_breaker_broken",
579 0 : "How many times a circuit breaker has broken"
580 0 : )
581 0 : .expect("failed to define a metric")
582 0 : });
583 :
584 0 : pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy<IntCounter> = Lazy::new(|| {
585 0 : register_int_counter!(
586 0 : "pageserver_circuit_breaker_unbroken",
587 0 : "How many times a circuit breaker has been un-broken (recovered)"
588 0 : )
589 0 : .expect("failed to define a metric")
590 0 : });
591 :
592 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
593 384 : register_int_counter!(
594 384 : "pageserver_compression_image_in_bytes_total",
595 384 : "Size of data written into image layers before compression"
596 384 : )
597 384 : .expect("failed to define a metric")
598 384 : });
599 :
600 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy<IntCounter> = Lazy::new(|| {
601 384 : register_int_counter!(
602 384 : "pageserver_compression_image_in_bytes_considered",
603 384 : "Size of potentially compressible data written into image layers before compression"
604 384 : )
605 384 : .expect("failed to define a metric")
606 384 : });
607 :
608 384 : pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy<IntCounter> = Lazy::new(|| {
609 384 : register_int_counter!(
610 384 : "pageserver_compression_image_in_bytes_chosen",
611 384 : "Size of data whose compressed form was written into image layers"
612 384 : )
613 384 : .expect("failed to define a metric")
614 384 : });
615 :
616 384 : pub(crate) static COMPRESSION_IMAGE_OUTPUT_BYTES: Lazy<IntCounter> = Lazy::new(|| {
617 384 : register_int_counter!(
618 384 : "pageserver_compression_image_out_bytes_total",
619 384 : "Size of compressed image layer written"
620 384 : )
621 384 : .expect("failed to define a metric")
622 384 : });
623 :
624 20 : pub(crate) static RELSIZE_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
625 20 : register_uint_gauge!(
626 20 : "pageserver_relsize_cache_entries",
627 20 : "Number of entries in the relation size cache",
628 20 : )
629 20 : .expect("failed to define a metric")
630 20 : });
631 :
632 20 : pub(crate) static RELSIZE_CACHE_HITS: Lazy<IntCounter> = Lazy::new(|| {
633 20 : register_int_counter!("pageserver_relsize_cache_hits", "Relation size cache hits",)
634 20 : .expect("failed to define a metric")
635 20 : });
636 :
637 20 : pub(crate) static RELSIZE_CACHE_MISSES: Lazy<IntCounter> = Lazy::new(|| {
638 20 : register_int_counter!(
639 20 : "pageserver_relsize_cache_misses",
640 20 : "Relation size cache misses",
641 20 : )
642 20 : .expect("failed to define a metric")
643 20 : });
644 :
645 8 : pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy<IntCounter> = Lazy::new(|| {
646 8 : register_int_counter!(
647 8 : "pageserver_relsize_cache_misses_old",
648 8 : "Relation size cache misses where the lookup LSN is older than the last relation update"
649 8 : )
650 8 : .expect("failed to define a metric")
651 8 : });
652 :
653 : pub(crate) mod initial_logical_size {
654 : use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
655 : use once_cell::sync::Lazy;
656 :
657 : pub(crate) struct StartCalculation(IntCounterVec);
658 400 : pub(crate) static START_CALCULATION: Lazy<StartCalculation> = Lazy::new(|| {
659 400 : StartCalculation(
660 400 : register_int_counter_vec!(
661 400 : "pageserver_initial_logical_size_start_calculation",
662 400 : "Incremented each time we start an initial logical size calculation attempt. \
663 400 : The `circumstances` label provides some additional details.",
664 400 : &["attempt", "circumstances"]
665 400 : )
666 400 : .unwrap(),
667 400 : )
668 400 : });
669 :
670 : struct DropCalculation {
671 : first: IntCounter,
672 : retry: IntCounter,
673 : }
674 :
675 400 : static DROP_CALCULATION: Lazy<DropCalculation> = Lazy::new(|| {
676 400 : let vec = register_int_counter_vec!(
677 400 : "pageserver_initial_logical_size_drop_calculation",
678 400 : "Incremented each time we abort a started size calculation attmpt.",
679 400 : &["attempt"]
680 400 : )
681 400 : .unwrap();
682 400 : DropCalculation {
683 400 : first: vec.with_label_values(&["first"]),
684 400 : retry: vec.with_label_values(&["retry"]),
685 400 : }
686 400 : });
687 :
688 : pub(crate) struct Calculated {
689 : pub(crate) births: IntCounter,
690 : pub(crate) deaths: IntCounter,
691 : }
692 :
693 400 : pub(crate) static CALCULATED: Lazy<Calculated> = Lazy::new(|| Calculated {
694 400 : births: register_int_counter!(
695 400 : "pageserver_initial_logical_size_finish_calculation",
696 400 : "Incremented every time we finish calculation of initial logical size.\
697 400 : If everything is working well, this should happen at most once per Timeline object."
698 400 : )
699 400 : .unwrap(),
700 400 : deaths: register_int_counter!(
701 400 : "pageserver_initial_logical_size_drop_finished_calculation",
702 400 : "Incremented when we drop a finished initial logical size calculation result.\
703 400 : Mainly useful to turn pageserver_initial_logical_size_finish_calculation into a gauge."
704 400 : )
705 400 : .unwrap(),
706 400 : });
707 :
708 : pub(crate) struct OngoingCalculationGuard {
709 : inc_drop_calculation: Option<IntCounter>,
710 : }
711 :
712 : #[derive(strum_macros::IntoStaticStr)]
713 : pub(crate) enum StartCircumstances {
714 : EmptyInitial,
715 : SkippedConcurrencyLimiter,
716 : AfterBackgroundTasksRateLimit,
717 : }
718 :
719 : impl StartCalculation {
720 424 : pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
721 424 : let circumstances_label: &'static str = circumstances.into();
722 424 : self.0
723 424 : .with_label_values(&["first", circumstances_label])
724 424 : .inc();
725 424 : OngoingCalculationGuard {
726 424 : inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
727 424 : }
728 424 : }
729 0 : pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
730 0 : let circumstances_label: &'static str = circumstances.into();
731 0 : self.0
732 0 : .with_label_values(&["retry", circumstances_label])
733 0 : .inc();
734 0 : OngoingCalculationGuard {
735 0 : inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
736 0 : }
737 0 : }
738 : }
739 :
740 : impl Drop for OngoingCalculationGuard {
741 424 : fn drop(&mut self) {
742 424 : if let Some(counter) = self.inc_drop_calculation.take() {
743 0 : counter.inc();
744 424 : }
745 424 : }
746 : }
747 :
748 : impl OngoingCalculationGuard {
749 424 : pub(crate) fn calculation_result_saved(mut self) -> FinishedCalculationGuard {
750 424 : drop(self.inc_drop_calculation.take());
751 424 : CALCULATED.births.inc();
752 424 : FinishedCalculationGuard {
753 424 : inc_on_drop: CALCULATED.deaths.clone(),
754 424 : }
755 424 : }
756 : }
757 :
758 : pub(crate) struct FinishedCalculationGuard {
759 : inc_on_drop: IntCounter,
760 : }
761 :
762 : impl Drop for FinishedCalculationGuard {
763 12 : fn drop(&mut self) {
764 12 : self.inc_on_drop.inc();
765 12 : }
766 : }
767 :
768 : // context: https://github.com/neondatabase/neon/issues/5963
769 : pub(crate) static TIMELINES_WHERE_WALRECEIVER_GOT_APPROXIMATE_SIZE: Lazy<IntCounter> =
770 0 : Lazy::new(|| {
771 0 : register_int_counter!(
772 0 : "pageserver_initial_logical_size_timelines_where_walreceiver_got_approximate_size",
773 0 : "Counter for the following event: walreceiver calls\
774 0 : Timeline::get_current_logical_size() and it returns `Approximate` for the first time."
775 0 : )
776 0 : .unwrap()
777 0 : });
778 : }
779 :
780 0 : static DIRECTORY_ENTRIES_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
781 0 : register_uint_gauge_vec!(
782 0 : "pageserver_directory_entries_count",
783 0 : "Sum of the entries in pageserver-stored directory listings",
784 0 : &["tenant_id", "shard_id", "timeline_id"]
785 0 : )
786 0 : .expect("failed to define a metric")
787 0 : });
788 :
789 404 : pub(crate) static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
790 404 : register_uint_gauge_vec!(
791 404 : "pageserver_tenant_states_count",
792 404 : "Count of tenants per state",
793 404 : &["state"]
794 404 : )
795 404 : .expect("Failed to register pageserver_tenant_states_count metric")
796 404 : });
797 :
798 : /// A set of broken tenants.
799 : ///
800 : /// These are expected to be so rare that a set is fine. Set as in a new timeseries per each broken
801 : /// tenant.
802 20 : pub(crate) static BROKEN_TENANTS_SET: Lazy<UIntGaugeVec> = Lazy::new(|| {
803 20 : register_uint_gauge_vec!(
804 20 : "pageserver_broken_tenants_count",
805 20 : "Set of broken tenants",
806 20 : &["tenant_id", "shard_id"]
807 20 : )
808 20 : .expect("Failed to register pageserver_tenant_states_count metric")
809 20 : });
810 :
811 12 : pub(crate) static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
812 12 : register_uint_gauge_vec!(
813 12 : "pageserver_tenant_synthetic_cached_size_bytes",
814 12 : "Synthetic size of each tenant in bytes",
815 12 : &["tenant_id"]
816 12 : )
817 12 : .expect("Failed to register pageserver_tenant_synthetic_cached_size_bytes metric")
818 12 : });
819 :
820 0 : pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
821 0 : register_histogram_vec!(
822 0 : "pageserver_eviction_iteration_duration_seconds_global",
823 0 : "Time spent on a single eviction iteration",
824 0 : &["period_secs", "threshold_secs"],
825 0 : STORAGE_OP_BUCKETS.into(),
826 0 : )
827 0 : .expect("failed to define a metric")
828 0 : });
829 :
830 400 : static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
831 400 : register_int_counter_vec!(
832 400 : "pageserver_evictions",
833 400 : "Number of layers evicted from the pageserver",
834 400 : &["tenant_id", "shard_id", "timeline_id"]
835 400 : )
836 400 : .expect("failed to define a metric")
837 400 : });
838 :
839 400 : static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(|| {
840 400 : register_int_counter_vec!(
841 400 : "pageserver_evictions_with_low_residence_duration",
842 400 : "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
843 400 : Residence duration is determined using the `residence_duration_data_source`.",
844 400 : &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
845 400 : )
846 400 : .expect("failed to define a metric")
847 400 : });
848 :
849 0 : pub(crate) static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
850 0 : register_int_counter!(
851 0 : "pageserver_unexpected_ondemand_downloads_count",
852 0 : "Number of unexpected on-demand downloads. \
853 0 : We log more context for each increment, so, forgo any labels in this metric.",
854 0 : )
855 0 : .expect("failed to define a metric")
856 0 : });
857 :
858 : /// How long did we take to start up? Broken down by labels to describe
859 : /// different phases of startup.
860 0 : pub static STARTUP_DURATION: Lazy<GaugeVec> = Lazy::new(|| {
861 0 : register_gauge_vec!(
862 0 : "pageserver_startup_duration_seconds",
863 0 : "Time taken by phases of pageserver startup, in seconds",
864 0 : &["phase"]
865 0 : )
866 0 : .expect("Failed to register pageserver_startup_duration_seconds metric")
867 0 : });
868 :
869 0 : pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
870 0 : register_uint_gauge!(
871 0 : "pageserver_startup_is_loading",
872 0 : "1 while in initial startup load of tenants, 0 at other times"
873 0 : )
874 0 : .expect("Failed to register pageserver_startup_is_loading")
875 0 : });
876 :
877 392 : pub(crate) static TIMELINE_EPHEMERAL_BYTES: Lazy<UIntGauge> = Lazy::new(|| {
878 392 : register_uint_gauge!(
879 392 : "pageserver_timeline_ephemeral_bytes",
880 392 : "Total number of bytes in ephemeral layers, summed for all timelines. Approximate, lazily updated."
881 392 : )
882 392 : .expect("Failed to register metric")
883 392 : });
884 :
885 : /// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
886 : /// like how long it took to load.
887 : ///
888 : /// Note that these are process-global metrics, _not_ per-tenant metrics. Per-tenant
889 : /// metrics are rather expensive, and usually fine grained stuff makes more sense
890 : /// at a timeline level than tenant level.
891 : pub(crate) struct TenantMetrics {
892 : /// How long did tenants take to go from construction to active state?
893 : pub(crate) activation: Histogram,
894 : pub(crate) preload: Histogram,
895 : pub(crate) attach: Histogram,
896 :
897 : /// How many tenants are included in the initial startup of the pagesrever?
898 : pub(crate) startup_scheduled: IntCounter,
899 : pub(crate) startup_complete: IntCounter,
900 : }
901 :
902 0 : pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
903 0 : TenantMetrics {
904 0 : activation: register_histogram!(
905 0 : "pageserver_tenant_activation_seconds",
906 0 : "Time taken by tenants to activate, in seconds",
907 0 : CRITICAL_OP_BUCKETS.into()
908 0 : )
909 0 : .expect("Failed to register metric"),
910 0 : preload: register_histogram!(
911 0 : "pageserver_tenant_preload_seconds",
912 0 : "Time taken by tenants to load remote metadata on startup/attach, in seconds",
913 0 : CRITICAL_OP_BUCKETS.into()
914 0 : )
915 0 : .expect("Failed to register metric"),
916 0 : attach: register_histogram!(
917 0 : "pageserver_tenant_attach_seconds",
918 0 : "Time taken by tenants to intialize, after remote metadata is already loaded",
919 0 : CRITICAL_OP_BUCKETS.into()
920 0 : )
921 0 : .expect("Failed to register metric"),
922 0 : startup_scheduled: register_int_counter!(
923 0 : "pageserver_tenant_startup_scheduled",
924 0 : "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
925 0 : ).expect("Failed to register metric"),
926 0 : startup_complete: register_int_counter!(
927 0 : "pageserver_tenant_startup_complete",
928 0 : "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
929 0 : should eventually reach `pageserver_tenant_startup_scheduled_total`. Does not include broken \
930 0 : tenants: such cases will lead to this metric never reaching the scheduled count."
931 0 : ).expect("Failed to register metric"),
932 0 : }
933 0 : });
934 :
935 : /// Each `Timeline`'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
936 : #[derive(Debug)]
937 : pub(crate) struct EvictionsWithLowResidenceDuration {
938 : data_source: &'static str,
939 : threshold: Duration,
940 : counter: Option<IntCounter>,
941 : }
942 :
943 : pub(crate) struct EvictionsWithLowResidenceDurationBuilder {
944 : data_source: &'static str,
945 : threshold: Duration,
946 : }
947 :
948 : impl EvictionsWithLowResidenceDurationBuilder {
949 892 : pub fn new(data_source: &'static str, threshold: Duration) -> Self {
950 892 : Self {
951 892 : data_source,
952 892 : threshold,
953 892 : }
954 892 : }
955 :
956 892 : fn build(
957 892 : &self,
958 892 : tenant_id: &str,
959 892 : shard_id: &str,
960 892 : timeline_id: &str,
961 892 : ) -> EvictionsWithLowResidenceDuration {
962 892 : let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
963 892 : .get_metric_with_label_values(&[
964 892 : tenant_id,
965 892 : shard_id,
966 892 : timeline_id,
967 892 : self.data_source,
968 892 : &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
969 892 : ])
970 892 : .unwrap();
971 892 : EvictionsWithLowResidenceDuration {
972 892 : data_source: self.data_source,
973 892 : threshold: self.threshold,
974 892 : counter: Some(counter),
975 892 : }
976 892 : }
977 : }
978 :
979 : impl EvictionsWithLowResidenceDuration {
980 912 : fn threshold_label_value(threshold: Duration) -> String {
981 912 : format!("{}", threshold.as_secs())
982 912 : }
983 :
984 8 : pub fn observe(&self, observed_value: Duration) {
985 8 : if observed_value < self.threshold {
986 8 : self.counter
987 8 : .as_ref()
988 8 : .expect("nobody calls this function after `remove_from_vec`")
989 8 : .inc();
990 8 : }
991 8 : }
992 :
993 0 : pub fn change_threshold(
994 0 : &mut self,
995 0 : tenant_id: &str,
996 0 : shard_id: &str,
997 0 : timeline_id: &str,
998 0 : new_threshold: Duration,
999 0 : ) {
1000 0 : if new_threshold == self.threshold {
1001 0 : return;
1002 0 : }
1003 0 : let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
1004 0 : self.data_source,
1005 0 : new_threshold,
1006 0 : )
1007 0 : .build(tenant_id, shard_id, timeline_id);
1008 0 : std::mem::swap(self, &mut with_new);
1009 0 : with_new.remove(tenant_id, shard_id, timeline_id);
1010 0 : }
1011 :
1012 : // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
1013 20 : fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
1014 20 : let Some(_counter) = self.counter.take() else {
1015 0 : return;
1016 : };
1017 :
1018 20 : let threshold = Self::threshold_label_value(self.threshold);
1019 20 :
1020 20 : let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
1021 20 : tenant_id,
1022 20 : shard_id,
1023 20 : timeline_id,
1024 20 : self.data_source,
1025 20 : &threshold,
1026 20 : ]);
1027 20 :
1028 20 : match removed {
1029 0 : Err(e) => {
1030 0 : // this has been hit in staging as
1031 0 : // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
1032 0 : // because we can be in the drop path already, don't risk:
1033 0 : // - "double-panic => illegal instruction" or
1034 0 : // - future "drop panick => abort"
1035 0 : //
1036 0 : // so just nag: (the error has the labels)
1037 0 : tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
1038 : }
1039 : Ok(()) => {
1040 : // to help identify cases where we double-remove the same values, let's log all
1041 : // deletions?
1042 20 : tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
1043 : }
1044 : }
1045 20 : }
1046 : }
1047 :
1048 : // Metrics collected on disk IO operations
1049 : //
1050 : // Roughly logarithmic scale.
1051 : const STORAGE_IO_TIME_BUCKETS: &[f64] = &[
1052 : 0.000030, // 30 usec
1053 : 0.001000, // 1000 usec
1054 : 0.030, // 30 ms
1055 : 1.000, // 1000 ms
1056 : 30.000, // 30000 ms
1057 : ];
1058 :
1059 : /// VirtualFile fs operation variants.
1060 : ///
1061 : /// Operations:
1062 : /// - open ([`std::fs::OpenOptions::open`])
1063 : /// - close (dropping [`crate::virtual_file::VirtualFile`])
1064 : /// - close-by-replace (close by replacement algorithm)
1065 : /// - read (`read_at`)
1066 : /// - write (`write_at`)
1067 : /// - seek (modify internal position or file length query)
1068 : /// - fsync ([`std::fs::File::sync_all`])
1069 : /// - metadata ([`std::fs::File::metadata`])
1070 : #[derive(
1071 0 : Debug, Clone, Copy, strum_macros::EnumCount, strum_macros::EnumIter, strum_macros::FromRepr,
1072 : )]
1073 : pub(crate) enum StorageIoOperation {
1074 : Open,
1075 : OpenAfterReplace,
1076 : Close,
1077 : CloseByReplace,
1078 : Read,
1079 : Write,
1080 : Seek,
1081 : Fsync,
1082 : Metadata,
1083 : }
1084 :
1085 : impl StorageIoOperation {
1086 4140 : pub fn as_str(&self) -> &'static str {
1087 4140 : match self {
1088 460 : StorageIoOperation::Open => "open",
1089 460 : StorageIoOperation::OpenAfterReplace => "open-after-replace",
1090 460 : StorageIoOperation::Close => "close",
1091 460 : StorageIoOperation::CloseByReplace => "close-by-replace",
1092 460 : StorageIoOperation::Read => "read",
1093 460 : StorageIoOperation::Write => "write",
1094 460 : StorageIoOperation::Seek => "seek",
1095 460 : StorageIoOperation::Fsync => "fsync",
1096 460 : StorageIoOperation::Metadata => "metadata",
1097 : }
1098 4140 : }
1099 : }
1100 :
1101 : /// Tracks time taken by fs operations near VirtualFile.
1102 : #[derive(Debug)]
1103 : pub(crate) struct StorageIoTime {
1104 : metrics: [Histogram; StorageIoOperation::COUNT],
1105 : }
1106 :
1107 : impl StorageIoTime {
1108 460 : fn new() -> Self {
1109 460 : let storage_io_histogram_vec = register_histogram_vec!(
1110 460 : "pageserver_io_operations_seconds",
1111 460 : "Time spent in IO operations",
1112 460 : &["operation"],
1113 460 : STORAGE_IO_TIME_BUCKETS.into()
1114 460 : )
1115 460 : .expect("failed to define a metric");
1116 4140 : let metrics = std::array::from_fn(|i| {
1117 4140 : let op = StorageIoOperation::from_repr(i).unwrap();
1118 4140 : storage_io_histogram_vec
1119 4140 : .get_metric_with_label_values(&[op.as_str()])
1120 4140 : .unwrap()
1121 4140 : });
1122 460 : Self { metrics }
1123 460 : }
1124 :
1125 4031816 : pub(crate) fn get(&self, op: StorageIoOperation) -> &Histogram {
1126 4031816 : &self.metrics[op as usize]
1127 4031816 : }
1128 : }
1129 :
1130 : pub(crate) static STORAGE_IO_TIME_METRIC: Lazy<StorageIoTime> = Lazy::new(StorageIoTime::new);
1131 :
1132 : const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"];
1133 :
1134 : // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1
1135 452 : pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1136 452 : register_int_gauge_vec!(
1137 452 : "pageserver_io_operations_bytes_total",
1138 452 : "Total amount of bytes read/written in IO operations",
1139 452 : &["operation", "tenant_id", "shard_id", "timeline_id"]
1140 452 : )
1141 452 : .expect("failed to define a metric")
1142 452 : });
1143 :
1144 : #[cfg(not(test))]
1145 : pub(crate) mod virtual_file_descriptor_cache {
1146 : use super::*;
1147 :
1148 0 : pub(crate) static SIZE_MAX: Lazy<UIntGauge> = Lazy::new(|| {
1149 0 : register_uint_gauge!(
1150 0 : "pageserver_virtual_file_descriptor_cache_size_max",
1151 0 : "Maximum number of open file descriptors in the cache."
1152 0 : )
1153 0 : .unwrap()
1154 0 : });
1155 :
1156 : // SIZE_CURRENT: derive it like so:
1157 : // ```
1158 : // sum (pageserver_io_operations_seconds_count{operation=~"^(open|open-after-replace)$")
1159 : // -ignoring(operation)
1160 : // sum(pageserver_io_operations_seconds_count{operation=~"^(close|close-by-replace)$"}
1161 : // ```
1162 : }
1163 :
1164 : #[cfg(not(test))]
1165 : pub(crate) mod virtual_file_io_engine {
1166 : use super::*;
1167 :
1168 0 : pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
1169 0 : register_uint_gauge_vec!(
1170 0 : "pageserver_virtual_file_io_engine_kind",
1171 0 : "The configured io engine for VirtualFile",
1172 0 : &["kind"],
1173 0 : )
1174 0 : .unwrap()
1175 0 : });
1176 : }
1177 :
1178 : pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
1179 : pub(crate) struct SmgrOpTimerInner {
1180 : global_execution_latency_histo: Histogram,
1181 : per_timeline_execution_latency_histo: Option<Histogram>,
1182 :
1183 : global_batch_wait_time: Histogram,
1184 : per_timeline_batch_wait_time: Histogram,
1185 :
1186 : global_flush_in_progress_micros: IntCounter,
1187 : per_timeline_flush_in_progress_micros: IntCounter,
1188 :
1189 : throttling: Arc<tenant_throttling::Pagestream>,
1190 :
1191 : timings: SmgrOpTimerState,
1192 : }
1193 :
1194 : /// The stages of request processing are represented by the enum variants.
1195 : /// Used as part of [`SmgrOpTimerInner::timings`].
1196 : ///
1197 : /// Request processing calls into the `SmgrOpTimer::observe_*` methods at the
1198 : /// transition points.
1199 : /// These methods bump relevant counters and then update [`SmgrOpTimerInner::timings`]
1200 : /// to the next state.
1201 : ///
1202 : /// Each request goes through every stage, in all configurations.
1203 : ///
1204 : #[derive(Debug)]
1205 : enum SmgrOpTimerState {
1206 : Received {
1207 : // In the future, we may want to track the full time the request spent
1208 : // inside pageserver process (time spent in kernel buffers can't be tracked).
1209 : // `received_at` would be used for that.
1210 : #[allow(dead_code)]
1211 : received_at: Instant,
1212 : },
1213 : Throttling {
1214 : throttle_started_at: Instant,
1215 : },
1216 : Batching {
1217 : throttle_done_at: Instant,
1218 : },
1219 : Executing {
1220 : execution_started_at: Instant,
1221 : },
1222 : Flushing,
1223 : // NB: when adding observation points, remember to update the Drop impl.
1224 : }
1225 :
1226 : // NB: when adding observation points, remember to update the Drop impl.
1227 : impl SmgrOpTimer {
1228 : /// See [`SmgrOpTimerState`] for more context.
1229 0 : pub(crate) fn observe_throttle_start(&mut self, at: Instant) {
1230 0 : let Some(inner) = self.0.as_mut() else {
1231 0 : return;
1232 : };
1233 0 : let SmgrOpTimerState::Received { received_at: _ } = &mut inner.timings else {
1234 0 : return;
1235 : };
1236 0 : inner.throttling.count_accounted_start.inc();
1237 0 : inner.timings = SmgrOpTimerState::Throttling {
1238 0 : throttle_started_at: at,
1239 0 : };
1240 0 : }
1241 :
1242 : /// See [`SmgrOpTimerState`] for more context.
1243 0 : pub(crate) fn observe_throttle_done(&mut self, throttle: ThrottleResult) {
1244 0 : let Some(inner) = self.0.as_mut() else {
1245 0 : return;
1246 : };
1247 : let SmgrOpTimerState::Throttling {
1248 0 : throttle_started_at,
1249 0 : } = &inner.timings
1250 : else {
1251 0 : return;
1252 : };
1253 0 : inner.throttling.count_accounted_finish.inc();
1254 0 : match throttle {
1255 0 : ThrottleResult::NotThrottled { end } => {
1256 0 : inner.timings = SmgrOpTimerState::Batching {
1257 0 : throttle_done_at: end,
1258 0 : };
1259 0 : }
1260 0 : ThrottleResult::Throttled { end } => {
1261 0 : // update metrics
1262 0 : inner.throttling.count_throttled.inc();
1263 0 : inner
1264 0 : .throttling
1265 0 : .wait_time
1266 0 : .inc_by((end - *throttle_started_at).as_micros().try_into().unwrap());
1267 0 : // state transition
1268 0 : inner.timings = SmgrOpTimerState::Batching {
1269 0 : throttle_done_at: end,
1270 0 : };
1271 0 : }
1272 : }
1273 0 : }
1274 :
1275 : /// See [`SmgrOpTimerState`] for more context.
1276 0 : pub(crate) fn observe_execution_start(&mut self, at: Instant) {
1277 0 : let Some(inner) = self.0.as_mut() else {
1278 0 : return;
1279 : };
1280 0 : let SmgrOpTimerState::Batching { throttle_done_at } = &inner.timings else {
1281 0 : return;
1282 : };
1283 : // update metrics
1284 0 : let batch = at - *throttle_done_at;
1285 0 : inner.global_batch_wait_time.observe(batch.as_secs_f64());
1286 0 : inner
1287 0 : .per_timeline_batch_wait_time
1288 0 : .observe(batch.as_secs_f64());
1289 0 : // state transition
1290 0 : inner.timings = SmgrOpTimerState::Executing {
1291 0 : execution_started_at: at,
1292 0 : }
1293 0 : }
1294 :
1295 : /// For all but the first caller, this is a no-op.
1296 : /// The first callers receives Some, subsequent ones None.
1297 : ///
1298 : /// See [`SmgrOpTimerState`] for more context.
1299 0 : pub(crate) fn observe_execution_end_flush_start(
1300 0 : &mut self,
1301 0 : at: Instant,
1302 0 : ) -> Option<SmgrOpFlushInProgress> {
1303 : // NB: unlike the other observe_* methods, this one take()s.
1304 : #[allow(clippy::question_mark)] // maintain similar code pattern.
1305 0 : let Some(mut inner) = self.0.take() else {
1306 0 : return None;
1307 : };
1308 : let SmgrOpTimerState::Executing {
1309 0 : execution_started_at,
1310 0 : } = &inner.timings
1311 : else {
1312 0 : return None;
1313 : };
1314 : // update metrics
1315 0 : let execution = at - *execution_started_at;
1316 0 : inner
1317 0 : .global_execution_latency_histo
1318 0 : .observe(execution.as_secs_f64());
1319 0 : if let Some(per_timeline_execution_latency_histo) =
1320 0 : &inner.per_timeline_execution_latency_histo
1321 0 : {
1322 0 : per_timeline_execution_latency_histo.observe(execution.as_secs_f64());
1323 0 : }
1324 :
1325 : // state transition
1326 0 : inner.timings = SmgrOpTimerState::Flushing;
1327 0 :
1328 0 : // return the flush in progress object which
1329 0 : // will do the remaining metrics updates
1330 0 : let SmgrOpTimerInner {
1331 0 : global_flush_in_progress_micros,
1332 0 : per_timeline_flush_in_progress_micros,
1333 0 : ..
1334 0 : } = inner;
1335 0 : Some(SmgrOpFlushInProgress {
1336 0 : flush_started_at: at,
1337 0 : global_micros: global_flush_in_progress_micros,
1338 0 : per_timeline_micros: per_timeline_flush_in_progress_micros,
1339 0 : })
1340 0 : }
1341 : }
1342 :
1343 : /// The last stage of request processing is serializing and flushing the request
1344 : /// into the TCP connection. We want to make slow flushes observable
1345 : /// _while they are occuring_, so this struct provides a wrapper method [`Self::measure`]
1346 : /// to periodically bump the metric.
1347 : ///
1348 : /// If in the future we decide that we're not interested in live updates, we can
1349 : /// add another `observe_*` method to [`SmgrOpTimer`], follow the existing pattern there,
1350 : /// and remove this struct from the code base.
1351 : pub(crate) struct SmgrOpFlushInProgress {
1352 : flush_started_at: Instant,
1353 : global_micros: IntCounter,
1354 : per_timeline_micros: IntCounter,
1355 : }
1356 :
1357 : impl Drop for SmgrOpTimer {
1358 0 : fn drop(&mut self) {
1359 0 : // In case of early drop, update any of the remaining metrics with
1360 0 : // observations so that (started,finished) counter pairs balance out
1361 0 : // and all counters on the latency path have the the same number of
1362 0 : // observations.
1363 0 : // It's technically lying and it would be better if each metric had
1364 0 : // a separate label or similar for cancelled requests.
1365 0 : // But we don't have that right now and counter pairs balancing
1366 0 : // out is useful when using the metrics in panels and whatnot.
1367 0 : let now = Instant::now();
1368 0 : self.observe_throttle_start(now);
1369 0 : self.observe_throttle_done(ThrottleResult::NotThrottled { end: now });
1370 0 : self.observe_execution_start(now);
1371 0 : self.observe_execution_end_flush_start(now);
1372 0 : }
1373 : }
1374 :
1375 : impl SmgrOpFlushInProgress {
1376 0 : pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
1377 0 : where
1378 0 : Fut: std::future::Future<Output = O>,
1379 0 : {
1380 0 : let mut fut = std::pin::pin!(fut);
1381 0 :
1382 0 : // Whenever observe_guard gets called, or dropped,
1383 0 : // it adds the time elapsed since its last call to metrics.
1384 0 : // Last call is tracked in `now`.
1385 0 : let mut observe_guard = scopeguard::guard(
1386 0 : || {
1387 0 : let now = Instant::now();
1388 0 : let elapsed = now - self.flush_started_at;
1389 0 : self.global_micros
1390 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1391 0 : self.per_timeline_micros
1392 0 : .inc_by(u64::try_from(elapsed.as_micros()).unwrap());
1393 0 : self.flush_started_at = now;
1394 0 : },
1395 0 : |mut observe| {
1396 0 : observe();
1397 0 : },
1398 0 : );
1399 :
1400 : loop {
1401 0 : match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
1402 0 : Ok(v) => return v,
1403 0 : Err(_timeout) => {
1404 0 : (*observe_guard)();
1405 0 : }
1406 : }
1407 : }
1408 0 : }
1409 : }
1410 :
1411 : #[derive(
1412 : Debug,
1413 : Clone,
1414 : Copy,
1415 : IntoStaticStr,
1416 : strum_macros::EnumCount,
1417 0 : strum_macros::EnumIter,
1418 : strum_macros::FromRepr,
1419 : enum_map::Enum,
1420 : )]
1421 : #[strum(serialize_all = "snake_case")]
1422 : pub enum SmgrQueryType {
1423 : GetRelExists,
1424 : GetRelSize,
1425 : GetPageAtLsn,
1426 : GetDbSize,
1427 : GetSlruSegment,
1428 : #[cfg(feature = "testing")]
1429 : Test,
1430 : }
1431 :
1432 : pub(crate) struct SmgrQueryTimePerTimeline {
1433 : global_started: [IntCounter; SmgrQueryType::COUNT],
1434 : global_latency: [Histogram; SmgrQueryType::COUNT],
1435 : per_timeline_getpage_started: IntCounter,
1436 : per_timeline_getpage_latency: Histogram,
1437 : global_batch_size: Histogram,
1438 : per_timeline_batch_size: Histogram,
1439 : global_flush_in_progress_micros: IntCounter,
1440 : per_timeline_flush_in_progress_micros: IntCounter,
1441 : global_batch_wait_time: Histogram,
1442 : per_timeline_batch_wait_time: Histogram,
1443 : throttling: Arc<tenant_throttling::Pagestream>,
1444 : }
1445 :
1446 400 : static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
1447 400 : register_int_counter_vec!(
1448 400 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1449 400 : "pageserver_smgr_query_started_global_count",
1450 400 : "Number of smgr queries started, aggregated by query type.",
1451 400 : &["smgr_query_type"],
1452 400 : )
1453 400 : .expect("failed to define a metric")
1454 400 : });
1455 :
1456 400 : static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
1457 400 : register_int_counter_vec!(
1458 400 : // it's a counter, but, name is prepared to extend it to a histogram of queue depth
1459 400 : "pageserver_smgr_query_started_count",
1460 400 : "Number of smgr queries started, aggregated by query type and tenant/timeline.",
1461 400 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1462 400 : )
1463 400 : .expect("failed to define a metric")
1464 400 : });
1465 :
1466 : // Alias so all histograms recording per-timeline smgr timings use the same buckets.
1467 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
1468 :
1469 400 : static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1470 400 : register_histogram_vec!(
1471 400 : "pageserver_smgr_query_seconds",
1472 400 : "Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
1473 400 : &["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
1474 400 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1475 400 : )
1476 400 : .expect("failed to define a metric")
1477 400 : });
1478 :
1479 400 : static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
1480 400 : [
1481 400 : 1,
1482 400 : 10,
1483 400 : 20,
1484 400 : 40,
1485 400 : 60,
1486 400 : 80,
1487 400 : 100,
1488 400 : 200,
1489 400 : 300,
1490 400 : 400,
1491 400 : 500,
1492 400 : 600,
1493 400 : 700,
1494 400 : 800,
1495 400 : 900,
1496 400 : 1_000, // 1ms
1497 400 : 2_000,
1498 400 : 4_000,
1499 400 : 6_000,
1500 400 : 8_000,
1501 400 : 10_000, // 10ms
1502 400 : 20_000,
1503 400 : 40_000,
1504 400 : 60_000,
1505 400 : 80_000,
1506 400 : 100_000,
1507 400 : 200_000,
1508 400 : 400_000,
1509 400 : 600_000,
1510 400 : 800_000,
1511 400 : 1_000_000, // 1s
1512 400 : 2_000_000,
1513 400 : 4_000_000,
1514 400 : 6_000_000,
1515 400 : 8_000_000,
1516 400 : 10_000_000, // 10s
1517 400 : 20_000_000,
1518 400 : 50_000_000,
1519 400 : 100_000_000,
1520 400 : 200_000_000,
1521 400 : 1_000_000_000, // 1000s
1522 400 : ]
1523 400 : .into_iter()
1524 400 : .map(Duration::from_micros)
1525 16400 : .map(|d| d.as_secs_f64())
1526 400 : .collect()
1527 400 : });
1528 :
1529 400 : static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
1530 400 : register_histogram_vec!(
1531 400 : "pageserver_smgr_query_seconds_global",
1532 400 : "Like pageserver_smgr_query_seconds, but aggregated to instance level.",
1533 400 : &["smgr_query_type"],
1534 400 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
1535 400 : )
1536 400 : .expect("failed to define a metric")
1537 400 : });
1538 :
1539 400 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL: Lazy<Vec<f64>> = Lazy::new(|| {
1540 400 : (1..=u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap())
1541 12800 : .map(|v| v.into())
1542 400 : .collect()
1543 400 : });
1544 :
1545 400 : static PAGE_SERVICE_BATCH_SIZE_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1546 400 : register_histogram!(
1547 400 : "pageserver_page_service_batch_size_global",
1548 400 : "Batch size of pageserver page service requests",
1549 400 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_GLOBAL.clone(),
1550 400 : )
1551 400 : .expect("failed to define a metric")
1552 400 : });
1553 :
1554 400 : static PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE: Lazy<Vec<f64>> = Lazy::new(|| {
1555 400 : let mut buckets = Vec::new();
1556 2800 : for i in 0.. {
1557 2800 : let bucket = 1 << i;
1558 2800 : if bucket > u32::try_from(Timeline::MAX_GET_VECTORED_KEYS).unwrap() {
1559 400 : break;
1560 2400 : }
1561 2400 : buckets.push(bucket.into());
1562 : }
1563 400 : buckets
1564 400 : });
1565 :
1566 400 : static PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
1567 400 : register_histogram_vec!(
1568 400 : "pageserver_page_service_batch_size",
1569 400 : "Batch size of pageserver page service requests",
1570 400 : &["tenant_id", "shard_id", "timeline_id"],
1571 400 : PAGE_SERVICE_BATCH_SIZE_BUCKETS_PER_TIMELINE.clone()
1572 400 : )
1573 400 : .expect("failed to define a metric")
1574 400 : });
1575 :
1576 0 : pub(crate) static PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
1577 0 : register_int_gauge_vec!(
1578 0 : "pageserver_page_service_config_max_batch_size",
1579 0 : "Configured maximum batch size for the server-side batching functionality of page_service. \
1580 0 : Labels expose more of the configuration parameters.",
1581 0 : &["mode", "execution"]
1582 0 : )
1583 0 : .expect("failed to define a metric")
1584 0 : });
1585 :
1586 0 : fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
1587 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE.reset();
1588 0 : let (label_values, value) = match conf {
1589 0 : PageServicePipeliningConfig::Serial => (["serial", "-"], 1),
1590 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
1591 0 : max_batch_size,
1592 0 : execution,
1593 0 : }) => {
1594 0 : let mode = "pipelined";
1595 0 : let execution = match execution {
1596 : PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures => {
1597 0 : "concurrent-futures"
1598 : }
1599 0 : PageServiceProtocolPipelinedExecutionStrategy::Tasks => "tasks",
1600 : };
1601 0 : ([mode, execution], max_batch_size.get())
1602 : }
1603 : };
1604 0 : PAGE_SERVICE_CONFIG_MAX_BATCH_SIZE
1605 0 : .with_label_values(&label_values)
1606 0 : .set(value.try_into().unwrap());
1607 0 : }
1608 :
1609 400 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
1610 400 : register_int_counter_vec!(
1611 400 : "pageserver_page_service_pagestream_flush_in_progress_micros",
1612 400 : "Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
1613 400 : If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
1614 400 : easily discoverable in monitoring. \
1615 400 : Hence, this is NOT a completion latency historgram.",
1616 400 : &["tenant_id", "shard_id", "timeline_id"],
1617 400 : )
1618 400 : .expect("failed to define a metric")
1619 400 : });
1620 :
1621 400 : static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
1622 400 : register_int_counter!(
1623 400 : "pageserver_page_service_pagestream_flush_in_progress_micros_global",
1624 400 : "Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
1625 400 : )
1626 400 : .expect("failed to define a metric")
1627 400 : });
1628 :
1629 400 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
1630 400 : register_histogram_vec!(
1631 400 : "pageserver_page_service_pagestream_batch_wait_time_seconds",
1632 400 : "Time a request spent waiting in its batch until the batch moved to throttle&execution.",
1633 400 : &["tenant_id", "shard_id", "timeline_id"],
1634 400 : SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
1635 400 : )
1636 400 : .expect("failed to define a metric")
1637 400 : });
1638 :
1639 400 : static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
1640 400 : register_histogram!(
1641 400 : "pageserver_page_service_pagestream_batch_wait_time_seconds_global",
1642 400 : "Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
1643 400 : SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
1644 400 : )
1645 400 : .expect("failed to define a metric")
1646 400 : });
1647 :
1648 : impl SmgrQueryTimePerTimeline {
1649 892 : pub(crate) fn new(
1650 892 : tenant_shard_id: &TenantShardId,
1651 892 : timeline_id: &TimelineId,
1652 892 : pagestream_throttle_metrics: Arc<tenant_throttling::Pagestream>,
1653 892 : ) -> Self {
1654 892 : let tenant_id = tenant_shard_id.tenant_id.to_string();
1655 892 : let shard_slug = format!("{}", tenant_shard_id.shard_slug());
1656 892 : let timeline_id = timeline_id.to_string();
1657 5352 : let global_started = std::array::from_fn(|i| {
1658 5352 : let op = SmgrQueryType::from_repr(i).unwrap();
1659 5352 : SMGR_QUERY_STARTED_GLOBAL
1660 5352 : .get_metric_with_label_values(&[op.into()])
1661 5352 : .unwrap()
1662 5352 : });
1663 5352 : let global_latency = std::array::from_fn(|i| {
1664 5352 : let op = SmgrQueryType::from_repr(i).unwrap();
1665 5352 : SMGR_QUERY_TIME_GLOBAL
1666 5352 : .get_metric_with_label_values(&[op.into()])
1667 5352 : .unwrap()
1668 5352 : });
1669 892 :
1670 892 : let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE
1671 892 : .get_metric_with_label_values(&[
1672 892 : SmgrQueryType::GetPageAtLsn.into(),
1673 892 : &tenant_id,
1674 892 : &shard_slug,
1675 892 : &timeline_id,
1676 892 : ])
1677 892 : .unwrap();
1678 892 : let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
1679 892 : .get_metric_with_label_values(&[
1680 892 : SmgrQueryType::GetPageAtLsn.into(),
1681 892 : &tenant_id,
1682 892 : &shard_slug,
1683 892 : &timeline_id,
1684 892 : ])
1685 892 : .unwrap();
1686 892 :
1687 892 : let global_batch_size = PAGE_SERVICE_BATCH_SIZE_GLOBAL.clone();
1688 892 : let per_timeline_batch_size = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE
1689 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1690 892 : .unwrap();
1691 892 :
1692 892 : let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
1693 892 : let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
1694 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1695 892 : .unwrap();
1696 892 :
1697 892 : let global_flush_in_progress_micros =
1698 892 : PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
1699 892 : let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
1700 892 : .get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
1701 892 : .unwrap();
1702 892 :
1703 892 : Self {
1704 892 : global_started,
1705 892 : global_latency,
1706 892 : per_timeline_getpage_latency,
1707 892 : per_timeline_getpage_started,
1708 892 : global_batch_size,
1709 892 : per_timeline_batch_size,
1710 892 : global_flush_in_progress_micros,
1711 892 : per_timeline_flush_in_progress_micros,
1712 892 : global_batch_wait_time,
1713 892 : per_timeline_batch_wait_time,
1714 892 : throttling: pagestream_throttle_metrics,
1715 892 : }
1716 892 : }
1717 0 : pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
1718 0 : self.global_started[op as usize].inc();
1719 :
1720 0 : let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
1721 0 : self.per_timeline_getpage_started.inc();
1722 0 : Some(self.per_timeline_getpage_latency.clone())
1723 : } else {
1724 0 : None
1725 : };
1726 :
1727 0 : SmgrOpTimer(Some(SmgrOpTimerInner {
1728 0 : global_execution_latency_histo: self.global_latency[op as usize].clone(),
1729 0 : per_timeline_execution_latency_histo: per_timeline_latency_histo,
1730 0 : global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
1731 0 : per_timeline_flush_in_progress_micros: self
1732 0 : .per_timeline_flush_in_progress_micros
1733 0 : .clone(),
1734 0 : global_batch_wait_time: self.global_batch_wait_time.clone(),
1735 0 : per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
1736 0 : throttling: self.throttling.clone(),
1737 0 : timings: SmgrOpTimerState::Received { received_at },
1738 0 : }))
1739 0 : }
1740 :
1741 : /// TODO: do something about this? seems odd, we have a similar call on SmgrOpTimer
1742 0 : pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
1743 0 : self.global_batch_size.observe(batch_size as f64);
1744 0 : self.per_timeline_batch_size.observe(batch_size as f64);
1745 0 : }
1746 : }
1747 :
1748 : // keep in sync with control plane Go code so that we can validate
1749 : // compute's basebackup_ms metric with our perspective in the context of SLI/SLO.
1750 0 : static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
1751 0 : // Go code uses milliseconds. Variable is called `computeStartupBuckets`
1752 0 : [
1753 0 : 5, 10, 20, 30, 50, 70, 100, 120, 150, 200, 250, 300, 350, 400, 450, 500, 600, 800, 1000,
1754 0 : 1500, 2000, 2500, 3000, 5000, 10000, 20000, 40000, 60000,
1755 0 : ]
1756 0 : .map(|ms| (ms as f64) / 1000.0)
1757 0 : });
1758 :
1759 : pub(crate) struct BasebackupQueryTime {
1760 : ok: Histogram,
1761 : error: Histogram,
1762 : client_error: Histogram,
1763 : }
1764 :
1765 0 : pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
1766 0 : let vec = register_histogram_vec!(
1767 0 : "pageserver_basebackup_query_seconds",
1768 0 : "Histogram of basebackup queries durations, by result type",
1769 0 : &["result"],
1770 0 : COMPUTE_STARTUP_BUCKETS.to_vec(),
1771 0 : )
1772 0 : .expect("failed to define a metric");
1773 0 : BasebackupQueryTime {
1774 0 : ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
1775 0 : error: vec.get_metric_with_label_values(&["error"]).unwrap(),
1776 0 : client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
1777 0 : }
1778 0 : });
1779 :
1780 : pub(crate) struct BasebackupQueryTimeOngoingRecording<'a> {
1781 : parent: &'a BasebackupQueryTime,
1782 : start: std::time::Instant,
1783 : }
1784 :
1785 : impl BasebackupQueryTime {
1786 0 : pub(crate) fn start_recording(&self) -> BasebackupQueryTimeOngoingRecording<'_> {
1787 0 : let start = Instant::now();
1788 0 : BasebackupQueryTimeOngoingRecording {
1789 0 : parent: self,
1790 0 : start,
1791 0 : }
1792 0 : }
1793 : }
1794 :
1795 : impl BasebackupQueryTimeOngoingRecording<'_> {
1796 0 : pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
1797 0 : let elapsed = self.start.elapsed().as_secs_f64();
1798 : // If you want to change categorize of a specific error, also change it in `log_query_error`.
1799 0 : let metric = match res {
1800 0 : Ok(_) => &self.parent.ok,
1801 0 : Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
1802 0 : if is_expected_io_error(io_error) =>
1803 0 : {
1804 0 : &self.parent.client_error
1805 : }
1806 0 : Err(_) => &self.parent.error,
1807 : };
1808 0 : metric.observe(elapsed);
1809 0 : }
1810 : }
1811 :
1812 0 : pub(crate) static LIVE_CONNECTIONS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1813 0 : register_int_counter_pair_vec!(
1814 0 : "pageserver_live_connections_started",
1815 0 : "Number of network connections that we started handling",
1816 0 : "pageserver_live_connections_finished",
1817 0 : "Number of network connections that we finished handling",
1818 0 : &["pageserver_connection_kind"]
1819 0 : )
1820 0 : .expect("failed to define a metric")
1821 0 : });
1822 :
1823 : #[derive(Clone, Copy, enum_map::Enum, IntoStaticStr)]
1824 : pub(crate) enum ComputeCommandKind {
1825 : PageStreamV3,
1826 : PageStreamV2,
1827 : Basebackup,
1828 : Fullbackup,
1829 : LeaseLsn,
1830 : }
1831 :
1832 : pub(crate) struct ComputeCommandCounters {
1833 : map: EnumMap<ComputeCommandKind, IntCounter>,
1834 : }
1835 :
1836 0 : pub(crate) static COMPUTE_COMMANDS_COUNTERS: Lazy<ComputeCommandCounters> = Lazy::new(|| {
1837 0 : let inner = register_int_counter_vec!(
1838 0 : "pageserver_compute_commands",
1839 0 : "Number of compute -> pageserver commands processed",
1840 0 : &["command"]
1841 0 : )
1842 0 : .expect("failed to define a metric");
1843 0 :
1844 0 : ComputeCommandCounters {
1845 0 : map: EnumMap::from_array(std::array::from_fn(|i| {
1846 0 : let command = <ComputeCommandKind as enum_map::Enum>::from_usize(i);
1847 0 : let command_str: &'static str = command.into();
1848 0 : inner.with_label_values(&[command_str])
1849 0 : })),
1850 0 : }
1851 0 : });
1852 :
1853 : impl ComputeCommandCounters {
1854 0 : pub(crate) fn for_command(&self, command: ComputeCommandKind) -> &IntCounter {
1855 0 : &self.map[command]
1856 0 : }
1857 : }
1858 :
1859 : // remote storage metrics
1860 :
1861 392 : static REMOTE_TIMELINE_CLIENT_CALLS: Lazy<IntCounterPairVec> = Lazy::new(|| {
1862 392 : register_int_counter_pair_vec!(
1863 392 : "pageserver_remote_timeline_client_calls_started",
1864 392 : "Number of started calls to remote timeline client.",
1865 392 : "pageserver_remote_timeline_client_calls_finished",
1866 392 : "Number of finshed calls to remote timeline client.",
1867 392 : &[
1868 392 : "tenant_id",
1869 392 : "shard_id",
1870 392 : "timeline_id",
1871 392 : "file_kind",
1872 392 : "op_kind"
1873 392 : ],
1874 392 : )
1875 392 : .unwrap()
1876 392 : });
1877 :
1878 : static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> =
1879 388 : Lazy::new(|| {
1880 388 : register_int_counter_vec!(
1881 388 : "pageserver_remote_timeline_client_bytes_started",
1882 388 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1883 388 : The increment happens when the operation is scheduled.",
1884 388 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1885 388 : )
1886 388 : .expect("failed to define a metric")
1887 388 : });
1888 :
1889 388 : static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
1890 388 : register_int_counter_vec!(
1891 388 : "pageserver_remote_timeline_client_bytes_finished",
1892 388 : "Incremented by the number of bytes associated with a remote timeline client operation. \
1893 388 : The increment happens when the operation finishes (regardless of success/failure/shutdown).",
1894 388 : &["tenant_id", "shard_id", "timeline_id", "file_kind", "op_kind"],
1895 388 : )
1896 388 : .expect("failed to define a metric")
1897 388 : });
1898 :
1899 : pub(crate) struct TenantManagerMetrics {
1900 : tenant_slots_attached: UIntGauge,
1901 : tenant_slots_secondary: UIntGauge,
1902 : tenant_slots_inprogress: UIntGauge,
1903 : pub(crate) tenant_slot_writes: IntCounter,
1904 : pub(crate) unexpected_errors: IntCounter,
1905 : }
1906 :
1907 : impl TenantManagerMetrics {
1908 : /// Helpers for tracking slots. Note that these do not track the lifetime of TenantSlot objects
1909 : /// exactly: they track the lifetime of the slots _in the tenant map_.
1910 4 : pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
1911 4 : match slot {
1912 0 : TenantSlot::Attached(_) => {
1913 0 : self.tenant_slots_attached.inc();
1914 0 : }
1915 0 : TenantSlot::Secondary(_) => {
1916 0 : self.tenant_slots_secondary.inc();
1917 0 : }
1918 4 : TenantSlot::InProgress(_) => {
1919 4 : self.tenant_slots_inprogress.inc();
1920 4 : }
1921 : }
1922 4 : }
1923 :
1924 4 : pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
1925 4 : match slot {
1926 4 : TenantSlot::Attached(_) => {
1927 4 : self.tenant_slots_attached.dec();
1928 4 : }
1929 0 : TenantSlot::Secondary(_) => {
1930 0 : self.tenant_slots_secondary.dec();
1931 0 : }
1932 0 : TenantSlot::InProgress(_) => {
1933 0 : self.tenant_slots_inprogress.dec();
1934 0 : }
1935 : }
1936 4 : }
1937 :
1938 : #[cfg(all(debug_assertions, not(test)))]
1939 0 : pub(crate) fn slots_total(&self) -> u64 {
1940 0 : self.tenant_slots_attached.get()
1941 0 : + self.tenant_slots_secondary.get()
1942 0 : + self.tenant_slots_inprogress.get()
1943 0 : }
1944 : }
1945 :
1946 4 : pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
1947 4 : let tenant_slots = register_uint_gauge_vec!(
1948 4 : "pageserver_tenant_manager_slots",
1949 4 : "How many slots currently exist, including all attached, secondary and in-progress operations",
1950 4 : &["mode"]
1951 4 : )
1952 4 : .expect("failed to define a metric");
1953 4 : TenantManagerMetrics {
1954 4 : tenant_slots_attached: tenant_slots
1955 4 : .get_metric_with_label_values(&["attached"])
1956 4 : .unwrap(),
1957 4 : tenant_slots_secondary: tenant_slots
1958 4 : .get_metric_with_label_values(&["secondary"])
1959 4 : .unwrap(),
1960 4 : tenant_slots_inprogress: tenant_slots
1961 4 : .get_metric_with_label_values(&["inprogress"])
1962 4 : .unwrap(),
1963 4 : tenant_slot_writes: register_int_counter!(
1964 4 : "pageserver_tenant_manager_slot_writes",
1965 4 : "Writes to a tenant slot, including all of create/attach/detach/delete"
1966 4 : )
1967 4 : .expect("failed to define a metric"),
1968 4 : unexpected_errors: register_int_counter!(
1969 4 : "pageserver_tenant_manager_unexpected_errors_total",
1970 4 : "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
1971 4 : )
1972 4 : .expect("failed to define a metric"),
1973 4 : }
1974 4 : });
1975 :
1976 : pub(crate) struct DeletionQueueMetrics {
1977 : pub(crate) keys_submitted: IntCounter,
1978 : pub(crate) keys_dropped: IntCounter,
1979 : pub(crate) keys_executed: IntCounter,
1980 : pub(crate) keys_validated: IntCounter,
1981 : pub(crate) dropped_lsn_updates: IntCounter,
1982 : pub(crate) unexpected_errors: IntCounter,
1983 : pub(crate) remote_errors: IntCounterVec,
1984 : }
1985 62 : pub(crate) static DELETION_QUEUE: Lazy<DeletionQueueMetrics> = Lazy::new(|| {
1986 62 : DeletionQueueMetrics{
1987 62 :
1988 62 : keys_submitted: register_int_counter!(
1989 62 : "pageserver_deletion_queue_submitted_total",
1990 62 : "Number of objects submitted for deletion"
1991 62 : )
1992 62 : .expect("failed to define a metric"),
1993 62 :
1994 62 : keys_dropped: register_int_counter!(
1995 62 : "pageserver_deletion_queue_dropped_total",
1996 62 : "Number of object deletions dropped due to stale generation."
1997 62 : )
1998 62 : .expect("failed to define a metric"),
1999 62 :
2000 62 : keys_executed: register_int_counter!(
2001 62 : "pageserver_deletion_queue_executed_total",
2002 62 : "Number of objects deleted. Only includes objects that we actually deleted, sum with pageserver_deletion_queue_dropped_total for the total number of keys processed to completion"
2003 62 : )
2004 62 : .expect("failed to define a metric"),
2005 62 :
2006 62 : keys_validated: register_int_counter!(
2007 62 : "pageserver_deletion_queue_validated_total",
2008 62 : "Number of keys validated for deletion. Sum with pageserver_deletion_queue_dropped_total for the total number of keys that have passed through the validation stage."
2009 62 : )
2010 62 : .expect("failed to define a metric"),
2011 62 :
2012 62 : dropped_lsn_updates: register_int_counter!(
2013 62 : "pageserver_deletion_queue_dropped_lsn_updates_total",
2014 62 : "Updates to remote_consistent_lsn dropped due to stale generation number."
2015 62 : )
2016 62 : .expect("failed to define a metric"),
2017 62 : unexpected_errors: register_int_counter!(
2018 62 : "pageserver_deletion_queue_unexpected_errors_total",
2019 62 : "Number of unexpected condiions that may stall the queue: any value above zero is unexpected."
2020 62 : )
2021 62 : .expect("failed to define a metric"),
2022 62 : remote_errors: register_int_counter_vec!(
2023 62 : "pageserver_deletion_queue_remote_errors_total",
2024 62 : "Retryable remote I/O errors while executing deletions, for example 503 responses to DeleteObjects",
2025 62 : &["op_kind"],
2026 62 : )
2027 62 : .expect("failed to define a metric")
2028 62 : }
2029 62 : });
2030 :
2031 : pub(crate) struct SecondaryModeMetrics {
2032 : pub(crate) upload_heatmap: IntCounter,
2033 : pub(crate) upload_heatmap_errors: IntCounter,
2034 : pub(crate) upload_heatmap_duration: Histogram,
2035 : pub(crate) download_heatmap: IntCounter,
2036 : pub(crate) download_layer: IntCounter,
2037 : }
2038 0 : pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
2039 0 : SecondaryModeMetrics {
2040 0 : upload_heatmap: register_int_counter!(
2041 0 : "pageserver_secondary_upload_heatmap",
2042 0 : "Number of heatmaps written to remote storage by attached tenants"
2043 0 : )
2044 0 : .expect("failed to define a metric"),
2045 0 : upload_heatmap_errors: register_int_counter!(
2046 0 : "pageserver_secondary_upload_heatmap_errors",
2047 0 : "Failures writing heatmap to remote storage"
2048 0 : )
2049 0 : .expect("failed to define a metric"),
2050 0 : upload_heatmap_duration: register_histogram!(
2051 0 : "pageserver_secondary_upload_heatmap_duration",
2052 0 : "Time to build and upload a heatmap, including any waiting inside the remote storage client"
2053 0 : )
2054 0 : .expect("failed to define a metric"),
2055 0 : download_heatmap: register_int_counter!(
2056 0 : "pageserver_secondary_download_heatmap",
2057 0 : "Number of downloads of heatmaps by secondary mode locations, including when it hasn't changed"
2058 0 : )
2059 0 : .expect("failed to define a metric"),
2060 0 : download_layer: register_int_counter!(
2061 0 : "pageserver_secondary_download_layer",
2062 0 : "Number of downloads of layers by secondary mode locations"
2063 0 : )
2064 0 : .expect("failed to define a metric"),
2065 0 : }
2066 0 : });
2067 :
2068 0 : pub(crate) static SECONDARY_RESIDENT_PHYSICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2069 0 : register_uint_gauge_vec!(
2070 0 : "pageserver_secondary_resident_physical_size",
2071 0 : "The size of the layer files present in the pageserver's filesystem, for secondary locations.",
2072 0 : &["tenant_id", "shard_id"]
2073 0 : )
2074 0 : .expect("failed to define a metric")
2075 0 : });
2076 :
2077 0 : pub(crate) static NODE_UTILIZATION_SCORE: Lazy<UIntGauge> = Lazy::new(|| {
2078 0 : register_uint_gauge!(
2079 0 : "pageserver_utilization_score",
2080 0 : "The utilization score we report to the storage controller for scheduling, where 0 is empty, 1000000 is full, and anything above is considered overloaded",
2081 0 : )
2082 0 : .expect("failed to define a metric")
2083 0 : });
2084 :
2085 0 : pub(crate) static SECONDARY_HEATMAP_TOTAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
2086 0 : register_uint_gauge_vec!(
2087 0 : "pageserver_secondary_heatmap_total_size",
2088 0 : "The total size in bytes of all layers in the most recently downloaded heatmap.",
2089 0 : &["tenant_id", "shard_id"]
2090 0 : )
2091 0 : .expect("failed to define a metric")
2092 0 : });
2093 :
2094 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2095 : pub enum RemoteOpKind {
2096 : Upload,
2097 : Download,
2098 : Delete,
2099 : }
2100 : impl RemoteOpKind {
2101 29597 : pub fn as_str(&self) -> &'static str {
2102 29597 : match self {
2103 27954 : Self::Upload => "upload",
2104 104 : Self::Download => "download",
2105 1539 : Self::Delete => "delete",
2106 : }
2107 29597 : }
2108 : }
2109 :
2110 : #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
2111 : pub enum RemoteOpFileKind {
2112 : Layer,
2113 : Index,
2114 : }
2115 : impl RemoteOpFileKind {
2116 29597 : pub fn as_str(&self) -> &'static str {
2117 29597 : match self {
2118 20532 : Self::Layer => "layer",
2119 9065 : Self::Index => "index",
2120 : }
2121 29597 : }
2122 : }
2123 :
2124 388 : pub(crate) static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
2125 388 : register_histogram_vec!(
2126 388 : "pageserver_remote_operation_seconds",
2127 388 : "Time spent on remote storage operations. \
2128 388 : Grouped by tenant, timeline, operation_kind and status. \
2129 388 : Does not account for time spent waiting in remote timeline client's queues.",
2130 388 : &["file_kind", "op_kind", "status"]
2131 388 : )
2132 388 : .expect("failed to define a metric")
2133 388 : });
2134 :
2135 0 : pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2136 0 : register_int_counter_vec!(
2137 0 : "pageserver_tenant_task_events",
2138 0 : "Number of task start/stop/fail events.",
2139 0 : &["event"],
2140 0 : )
2141 0 : .expect("Failed to register tenant_task_events metric")
2142 0 : });
2143 :
2144 : pub struct BackgroundLoopSemaphoreMetrics {
2145 : counters: EnumMap<BackgroundLoopKind, IntCounterPair>,
2146 : durations: EnumMap<BackgroundLoopKind, Counter>,
2147 : }
2148 :
2149 : pub(crate) static BACKGROUND_LOOP_SEMAPHORE: Lazy<BackgroundLoopSemaphoreMetrics> = Lazy::new(
2150 40 : || {
2151 40 : let counters = register_int_counter_pair_vec!(
2152 40 : "pageserver_background_loop_semaphore_wait_start_count",
2153 40 : "Counter for background loop concurrency-limiting semaphore acquire calls started",
2154 40 : "pageserver_background_loop_semaphore_wait_finish_count",
2155 40 : "Counter for background loop concurrency-limiting semaphore acquire calls finished",
2156 40 : &["task"],
2157 40 : )
2158 40 : .unwrap();
2159 40 :
2160 40 : let durations = register_counter_vec!(
2161 40 : "pageserver_background_loop_semaphore_wait_duration_seconds",
2162 40 : "Sum of wall clock time spent waiting on the background loop concurrency-limiting semaphore acquire calls",
2163 40 : &["task"],
2164 40 : )
2165 40 : .unwrap();
2166 40 :
2167 40 : BackgroundLoopSemaphoreMetrics {
2168 360 : counters: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2169 360 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2170 360 : counters.with_label_values(&[kind.into()])
2171 360 : })),
2172 360 : durations: enum_map::EnumMap::from_array(std::array::from_fn(|i| {
2173 360 : let kind = <BackgroundLoopKind as enum_map::Enum>::from_usize(i);
2174 360 : durations.with_label_values(&[kind.into()])
2175 360 : })),
2176 40 : }
2177 40 : },
2178 : );
2179 :
2180 : impl BackgroundLoopSemaphoreMetrics {
2181 728 : pub(crate) fn measure_acquisition(&self, task: BackgroundLoopKind) -> impl Drop + '_ {
2182 : struct Record<'a> {
2183 : metrics: &'a BackgroundLoopSemaphoreMetrics,
2184 : task: BackgroundLoopKind,
2185 : _counter_guard: metrics::IntCounterPairGuard,
2186 : start: Instant,
2187 : }
2188 : impl Drop for Record<'_> {
2189 728 : fn drop(&mut self) {
2190 728 : let elapsed = self.start.elapsed().as_secs_f64();
2191 728 : self.metrics.durations[self.task].inc_by(elapsed);
2192 728 : }
2193 : }
2194 728 : Record {
2195 728 : metrics: self,
2196 728 : task,
2197 728 : _counter_guard: self.counters[task].guard(),
2198 728 : start: Instant::now(),
2199 728 : }
2200 728 : }
2201 : }
2202 :
2203 0 : pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
2204 0 : register_int_counter_vec!(
2205 0 : "pageserver_background_loop_period_overrun_count",
2206 0 : "Incremented whenever warn_when_period_overrun() logs a warning.",
2207 0 : &["task", "period"],
2208 0 : )
2209 0 : .expect("failed to define a metric")
2210 0 : });
2211 :
2212 : // walreceiver metrics
2213 :
2214 0 : pub(crate) static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
2215 0 : register_int_counter!(
2216 0 : "pageserver_walreceiver_started_connections_total",
2217 0 : "Number of started walreceiver connections"
2218 0 : )
2219 0 : .expect("failed to define a metric")
2220 0 : });
2221 :
2222 0 : pub(crate) static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
2223 0 : register_int_gauge!(
2224 0 : "pageserver_walreceiver_active_managers",
2225 0 : "Number of active walreceiver managers"
2226 0 : )
2227 0 : .expect("failed to define a metric")
2228 0 : });
2229 :
2230 0 : pub(crate) static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
2231 0 : register_int_counter_vec!(
2232 0 : "pageserver_walreceiver_switches_total",
2233 0 : "Number of walreceiver manager change_connection calls",
2234 0 : &["reason"]
2235 0 : )
2236 0 : .expect("failed to define a metric")
2237 0 : });
2238 :
2239 0 : pub(crate) static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
2240 0 : register_int_counter!(
2241 0 : "pageserver_walreceiver_broker_updates_total",
2242 0 : "Number of received broker updates in walreceiver"
2243 0 : )
2244 0 : .expect("failed to define a metric")
2245 0 : });
2246 :
2247 4 : pub(crate) static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
2248 4 : register_int_counter_vec!(
2249 4 : "pageserver_walreceiver_candidates_events_total",
2250 4 : "Number of walreceiver candidate events",
2251 4 : &["event"]
2252 4 : )
2253 4 : .expect("failed to define a metric")
2254 4 : });
2255 :
2256 : pub(crate) static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
2257 0 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
2258 :
2259 : pub(crate) static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
2260 4 : Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
2261 :
2262 : // Metrics collected on WAL redo operations
2263 : //
2264 : // We collect the time spent in actual WAL redo ('redo'), and time waiting
2265 : // for access to the postgres process ('wait') since there is only one for
2266 : // each tenant.
2267 :
2268 : /// Time buckets are small because we want to be able to measure the
2269 : /// smallest redo processing times. These buckets allow us to measure down
2270 : /// to 5us, which equates to 200'000 pages/sec, which equates to 1.6GB/sec.
2271 : /// This is much better than the previous 5ms aka 200 pages/sec aka 1.6MB/sec.
2272 : ///
2273 : /// Values up to 1s are recorded because metrics show that we have redo
2274 : /// durations and lock times larger than 0.250s.
2275 : macro_rules! redo_histogram_time_buckets {
2276 : () => {
2277 : vec![
2278 : 0.000_005, 0.000_010, 0.000_025, 0.000_050, 0.000_100, 0.000_250, 0.000_500, 0.001_000,
2279 : 0.002_500, 0.005_000, 0.010_000, 0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000,
2280 : 1.000_000,
2281 : ]
2282 : };
2283 : }
2284 :
2285 : /// While we're at it, also measure the amount of records replayed in each
2286 : /// operation. We have a global 'total replayed' counter, but that's not
2287 : /// as useful as 'what is the skew for how many records we replay in one
2288 : /// operation'.
2289 : macro_rules! redo_histogram_count_buckets {
2290 : () => {
2291 : vec![0.0, 1.0, 2.0, 5.0, 10.0, 25.0, 50.0, 100.0, 250.0, 500.0]
2292 : };
2293 : }
2294 :
2295 : macro_rules! redo_bytes_histogram_count_buckets {
2296 : () => {
2297 : // powers of (2^.5), from 2^4.5 to 2^15 (22 buckets)
2298 : // rounded up to the next multiple of 8 to capture any MAXALIGNed record of that size, too.
2299 : vec![
2300 : 24.0, 32.0, 48.0, 64.0, 96.0, 128.0, 184.0, 256.0, 368.0, 512.0, 728.0, 1024.0, 1456.0,
2301 : 2048.0, 2904.0, 4096.0, 5800.0, 8192.0, 11592.0, 16384.0, 23176.0, 32768.0,
2302 : ]
2303 : };
2304 : }
2305 :
2306 : pub(crate) struct WalIngestMetrics {
2307 : pub(crate) bytes_received: IntCounter,
2308 : pub(crate) records_received: IntCounter,
2309 : pub(crate) records_observed: IntCounter,
2310 : pub(crate) records_committed: IntCounter,
2311 : pub(crate) records_filtered: IntCounter,
2312 : pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
2313 : pub(crate) clear_vm_bits_unknown: IntCounterVec,
2314 : }
2315 :
2316 20 : pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| {
2317 20 : WalIngestMetrics {
2318 20 : bytes_received: register_int_counter!(
2319 20 : "pageserver_wal_ingest_bytes_received",
2320 20 : "Bytes of WAL ingested from safekeepers",
2321 20 : )
2322 20 : .unwrap(),
2323 20 : records_received: register_int_counter!(
2324 20 : "pageserver_wal_ingest_records_received",
2325 20 : "Number of WAL records received from safekeepers"
2326 20 : )
2327 20 : .expect("failed to define a metric"),
2328 20 : records_observed: register_int_counter!(
2329 20 : "pageserver_wal_ingest_records_observed",
2330 20 : "Number of WAL records observed from safekeepers. These are metadata only records for shard 0."
2331 20 : )
2332 20 : .expect("failed to define a metric"),
2333 20 : records_committed: register_int_counter!(
2334 20 : "pageserver_wal_ingest_records_committed",
2335 20 : "Number of WAL records which resulted in writes to pageserver storage"
2336 20 : )
2337 20 : .expect("failed to define a metric"),
2338 20 : records_filtered: register_int_counter!(
2339 20 : "pageserver_wal_ingest_records_filtered",
2340 20 : "Number of WAL records filtered out due to sharding"
2341 20 : )
2342 20 : .expect("failed to define a metric"),
2343 20 : gap_blocks_zeroed_on_rel_extend: register_int_counter!(
2344 20 : "pageserver_gap_blocks_zeroed_on_rel_extend",
2345 20 : "Total number of zero gap blocks written on relation extends"
2346 20 : )
2347 20 : .expect("failed to define a metric"),
2348 20 : clear_vm_bits_unknown: register_int_counter_vec!(
2349 20 : "pageserver_wal_ingest_clear_vm_bits_unknown",
2350 20 : "Number of ignored ClearVmBits operations due to unknown pages/relations",
2351 20 : &["entity"],
2352 20 : )
2353 20 : .expect("failed to define a metric"),
2354 20 : }
2355 20 : });
2356 :
2357 400 : pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
2358 400 : register_int_counter_vec!(
2359 400 : "pageserver_timeline_wal_records_received",
2360 400 : "Number of WAL records received per shard",
2361 400 : &["tenant_id", "shard_id", "timeline_id"]
2362 400 : )
2363 400 : .expect("failed to define a metric")
2364 400 : });
2365 :
2366 12 : pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
2367 12 : register_histogram!(
2368 12 : "pageserver_wal_redo_seconds",
2369 12 : "Time spent on WAL redo",
2370 12 : redo_histogram_time_buckets!()
2371 12 : )
2372 12 : .expect("failed to define a metric")
2373 12 : });
2374 :
2375 12 : pub(crate) static WAL_REDO_RECORDS_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2376 12 : register_histogram!(
2377 12 : "pageserver_wal_redo_records_histogram",
2378 12 : "Histogram of number of records replayed per redo in the Postgres WAL redo process",
2379 12 : redo_histogram_count_buckets!(),
2380 12 : )
2381 12 : .expect("failed to define a metric")
2382 12 : });
2383 :
2384 12 : pub(crate) static WAL_REDO_BYTES_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2385 12 : register_histogram!(
2386 12 : "pageserver_wal_redo_bytes_histogram",
2387 12 : "Histogram of number of records replayed per redo sent to Postgres",
2388 12 : redo_bytes_histogram_count_buckets!(),
2389 12 : )
2390 12 : .expect("failed to define a metric")
2391 12 : });
2392 :
2393 : // FIXME: isn't this already included by WAL_REDO_RECORDS_HISTOGRAM which has _count?
2394 12 : pub(crate) static WAL_REDO_RECORD_COUNTER: Lazy<IntCounter> = Lazy::new(|| {
2395 12 : register_int_counter!(
2396 12 : "pageserver_replayed_wal_records_total",
2397 12 : "Number of WAL records replayed in WAL redo process"
2398 12 : )
2399 12 : .unwrap()
2400 12 : });
2401 :
2402 : #[rustfmt::skip]
2403 16 : pub(crate) static WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM: Lazy<Histogram> = Lazy::new(|| {
2404 16 : register_histogram!(
2405 16 : "pageserver_wal_redo_process_launch_duration",
2406 16 : "Histogram of the duration of successful WalRedoProcess::launch calls",
2407 16 : vec![
2408 16 : 0.0002, 0.0004, 0.0006, 0.0008, 0.0010,
2409 16 : 0.0020, 0.0040, 0.0060, 0.0080, 0.0100,
2410 16 : 0.0200, 0.0400, 0.0600, 0.0800, 0.1000,
2411 16 : 0.2000, 0.4000, 0.6000, 0.8000, 1.0000,
2412 16 : 1.5000, 2.0000, 2.5000, 3.0000, 4.0000, 10.0000
2413 16 : ],
2414 16 : )
2415 16 : .expect("failed to define a metric")
2416 16 : });
2417 :
2418 : pub(crate) struct WalRedoProcessCounters {
2419 : pub(crate) started: IntCounter,
2420 : pub(crate) killed_by_cause: enum_map::EnumMap<WalRedoKillCause, IntCounter>,
2421 : pub(crate) active_stderr_logger_tasks_started: IntCounter,
2422 : pub(crate) active_stderr_logger_tasks_finished: IntCounter,
2423 : }
2424 :
2425 : #[derive(Debug, enum_map::Enum, strum_macros::IntoStaticStr)]
2426 : pub(crate) enum WalRedoKillCause {
2427 : WalRedoProcessDrop,
2428 : NoLeakChildDrop,
2429 : Startup,
2430 : }
2431 :
2432 : impl Default for WalRedoProcessCounters {
2433 16 : fn default() -> Self {
2434 16 : let started = register_int_counter!(
2435 16 : "pageserver_wal_redo_process_started_total",
2436 16 : "Number of WAL redo processes started",
2437 16 : )
2438 16 : .unwrap();
2439 16 :
2440 16 : let killed = register_int_counter_vec!(
2441 16 : "pageserver_wal_redo_process_stopped_total",
2442 16 : "Number of WAL redo processes stopped",
2443 16 : &["cause"],
2444 16 : )
2445 16 : .unwrap();
2446 16 :
2447 16 : let active_stderr_logger_tasks_started = register_int_counter!(
2448 16 : "pageserver_walredo_stderr_logger_tasks_started_total",
2449 16 : "Number of active walredo stderr logger tasks that have started",
2450 16 : )
2451 16 : .unwrap();
2452 16 :
2453 16 : let active_stderr_logger_tasks_finished = register_int_counter!(
2454 16 : "pageserver_walredo_stderr_logger_tasks_finished_total",
2455 16 : "Number of active walredo stderr logger tasks that have finished",
2456 16 : )
2457 16 : .unwrap();
2458 16 :
2459 16 : Self {
2460 16 : started,
2461 48 : killed_by_cause: EnumMap::from_array(std::array::from_fn(|i| {
2462 48 : let cause = <WalRedoKillCause as enum_map::Enum>::from_usize(i);
2463 48 : let cause_str: &'static str = cause.into();
2464 48 : killed.with_label_values(&[cause_str])
2465 48 : })),
2466 16 : active_stderr_logger_tasks_started,
2467 16 : active_stderr_logger_tasks_finished,
2468 16 : }
2469 16 : }
2470 : }
2471 :
2472 : pub(crate) static WAL_REDO_PROCESS_COUNTERS: Lazy<WalRedoProcessCounters> =
2473 : Lazy::new(WalRedoProcessCounters::default);
2474 :
2475 : /// Similar to `prometheus::HistogramTimer` but does not record on drop.
2476 : pub(crate) struct StorageTimeMetricsTimer {
2477 : metrics: StorageTimeMetrics,
2478 : start: Instant,
2479 : }
2480 :
2481 : impl StorageTimeMetricsTimer {
2482 4236 : fn new(metrics: StorageTimeMetrics) -> Self {
2483 4236 : Self {
2484 4236 : metrics,
2485 4236 : start: Instant::now(),
2486 4236 : }
2487 4236 : }
2488 :
2489 : /// Returns the elapsed duration of the timer.
2490 4236 : pub fn elapsed(&self) -> Duration {
2491 4236 : self.start.elapsed()
2492 4236 : }
2493 :
2494 : /// Record the time from creation to now and return it.
2495 4236 : pub fn stop_and_record(self) -> Duration {
2496 4236 : let duration = self.elapsed();
2497 4236 : let seconds = duration.as_secs_f64();
2498 4236 : self.metrics.timeline_sum.inc_by(seconds);
2499 4236 : self.metrics.timeline_count.inc();
2500 4236 : self.metrics.global_histogram.observe(seconds);
2501 4236 : duration
2502 4236 : }
2503 :
2504 : /// Turns this timer into a timer, which will always record -- usually this means recording
2505 : /// regardless an early `?` path was taken in a function.
2506 8 : pub(crate) fn record_on_drop(self) -> AlwaysRecordingStorageTimeMetricsTimer {
2507 8 : AlwaysRecordingStorageTimeMetricsTimer(Some(self))
2508 8 : }
2509 : }
2510 :
2511 : pub(crate) struct AlwaysRecordingStorageTimeMetricsTimer(Option<StorageTimeMetricsTimer>);
2512 :
2513 : impl Drop for AlwaysRecordingStorageTimeMetricsTimer {
2514 8 : fn drop(&mut self) {
2515 8 : if let Some(inner) = self.0.take() {
2516 8 : inner.stop_and_record();
2517 8 : }
2518 8 : }
2519 : }
2520 :
2521 : impl AlwaysRecordingStorageTimeMetricsTimer {
2522 : /// Returns the elapsed duration of the timer.
2523 0 : pub fn elapsed(&self) -> Duration {
2524 0 : self.0.as_ref().expect("not dropped yet").elapsed()
2525 0 : }
2526 : }
2527 :
2528 : /// Timing facilities for an globally histogrammed metric, which is supported by per tenant and
2529 : /// timeline total sum and count.
2530 : #[derive(Clone, Debug)]
2531 : pub(crate) struct StorageTimeMetrics {
2532 : /// Sum of f64 seconds, per operation, tenant_id and timeline_id
2533 : timeline_sum: Counter,
2534 : /// Number of oeprations, per operation, tenant_id and timeline_id
2535 : timeline_count: IntCounter,
2536 : /// Global histogram having only the "operation" label.
2537 : global_histogram: Histogram,
2538 : }
2539 :
2540 : impl StorageTimeMetrics {
2541 8028 : pub fn new(
2542 8028 : operation: StorageTimeOperation,
2543 8028 : tenant_id: &str,
2544 8028 : shard_id: &str,
2545 8028 : timeline_id: &str,
2546 8028 : ) -> Self {
2547 8028 : let operation: &'static str = operation.into();
2548 8028 :
2549 8028 : let timeline_sum = STORAGE_TIME_SUM_PER_TIMELINE
2550 8028 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2551 8028 : .unwrap();
2552 8028 : let timeline_count = STORAGE_TIME_COUNT_PER_TIMELINE
2553 8028 : .get_metric_with_label_values(&[operation, tenant_id, shard_id, timeline_id])
2554 8028 : .unwrap();
2555 8028 : let global_histogram = STORAGE_TIME_GLOBAL
2556 8028 : .get_metric_with_label_values(&[operation])
2557 8028 : .unwrap();
2558 8028 :
2559 8028 : StorageTimeMetrics {
2560 8028 : timeline_sum,
2561 8028 : timeline_count,
2562 8028 : global_histogram,
2563 8028 : }
2564 8028 : }
2565 :
2566 : /// Starts timing a new operation.
2567 : ///
2568 : /// Note: unlike `prometheus::HistogramTimer` the returned timer does not record on drop.
2569 4236 : pub fn start_timer(&self) -> StorageTimeMetricsTimer {
2570 4236 : StorageTimeMetricsTimer::new(self.clone())
2571 4236 : }
2572 : }
2573 :
2574 : #[derive(Debug)]
2575 : pub(crate) struct TimelineMetrics {
2576 : tenant_id: String,
2577 : shard_id: String,
2578 : timeline_id: String,
2579 : pub flush_time_histo: StorageTimeMetrics,
2580 : pub flush_delay_histo: StorageTimeMetrics,
2581 : pub flush_wait_upload_time_gauge: Gauge,
2582 : pub compact_time_histo: StorageTimeMetrics,
2583 : pub create_images_time_histo: StorageTimeMetrics,
2584 : pub logical_size_histo: StorageTimeMetrics,
2585 : pub imitate_logical_size_histo: StorageTimeMetrics,
2586 : pub load_layer_map_histo: StorageTimeMetrics,
2587 : pub garbage_collect_histo: StorageTimeMetrics,
2588 : pub find_gc_cutoffs_histo: StorageTimeMetrics,
2589 : pub last_record_lsn_gauge: IntGauge,
2590 : pub disk_consistent_lsn_gauge: IntGauge,
2591 : pub pitr_history_size: UIntGauge,
2592 : pub archival_size: UIntGauge,
2593 : pub(crate) layer_size_image: UIntGauge,
2594 : pub(crate) layer_count_image: UIntGauge,
2595 : pub(crate) layer_size_delta: UIntGauge,
2596 : pub(crate) layer_count_delta: UIntGauge,
2597 : pub standby_horizon_gauge: IntGauge,
2598 : pub resident_physical_size_gauge: UIntGauge,
2599 : pub visible_physical_size_gauge: UIntGauge,
2600 : /// copy of LayeredTimeline.current_logical_size
2601 : pub current_logical_size_gauge: UIntGauge,
2602 : pub aux_file_size_gauge: IntGauge,
2603 : pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
2604 : pub evictions: IntCounter,
2605 : pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
2606 : /// Number of valid LSN leases.
2607 : pub valid_lsn_lease_count_gauge: UIntGauge,
2608 : pub wal_records_received: IntCounter,
2609 : shutdown: std::sync::atomic::AtomicBool,
2610 : }
2611 :
2612 : impl TimelineMetrics {
2613 892 : pub fn new(
2614 892 : tenant_shard_id: &TenantShardId,
2615 892 : timeline_id_raw: &TimelineId,
2616 892 : evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
2617 892 : ) -> Self {
2618 892 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2619 892 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2620 892 : let timeline_id = timeline_id_raw.to_string();
2621 892 : let flush_time_histo = StorageTimeMetrics::new(
2622 892 : StorageTimeOperation::LayerFlush,
2623 892 : &tenant_id,
2624 892 : &shard_id,
2625 892 : &timeline_id,
2626 892 : );
2627 892 : let flush_delay_histo = StorageTimeMetrics::new(
2628 892 : StorageTimeOperation::LayerFlushDelay,
2629 892 : &tenant_id,
2630 892 : &shard_id,
2631 892 : &timeline_id,
2632 892 : );
2633 892 : let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
2634 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2635 892 : .unwrap();
2636 892 : let compact_time_histo = StorageTimeMetrics::new(
2637 892 : StorageTimeOperation::Compact,
2638 892 : &tenant_id,
2639 892 : &shard_id,
2640 892 : &timeline_id,
2641 892 : );
2642 892 : let create_images_time_histo = StorageTimeMetrics::new(
2643 892 : StorageTimeOperation::CreateImages,
2644 892 : &tenant_id,
2645 892 : &shard_id,
2646 892 : &timeline_id,
2647 892 : );
2648 892 : let logical_size_histo = StorageTimeMetrics::new(
2649 892 : StorageTimeOperation::LogicalSize,
2650 892 : &tenant_id,
2651 892 : &shard_id,
2652 892 : &timeline_id,
2653 892 : );
2654 892 : let imitate_logical_size_histo = StorageTimeMetrics::new(
2655 892 : StorageTimeOperation::ImitateLogicalSize,
2656 892 : &tenant_id,
2657 892 : &shard_id,
2658 892 : &timeline_id,
2659 892 : );
2660 892 : let load_layer_map_histo = StorageTimeMetrics::new(
2661 892 : StorageTimeOperation::LoadLayerMap,
2662 892 : &tenant_id,
2663 892 : &shard_id,
2664 892 : &timeline_id,
2665 892 : );
2666 892 : let garbage_collect_histo = StorageTimeMetrics::new(
2667 892 : StorageTimeOperation::Gc,
2668 892 : &tenant_id,
2669 892 : &shard_id,
2670 892 : &timeline_id,
2671 892 : );
2672 892 : let find_gc_cutoffs_histo = StorageTimeMetrics::new(
2673 892 : StorageTimeOperation::FindGcCutoffs,
2674 892 : &tenant_id,
2675 892 : &shard_id,
2676 892 : &timeline_id,
2677 892 : );
2678 892 : let last_record_lsn_gauge = LAST_RECORD_LSN
2679 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2680 892 : .unwrap();
2681 892 :
2682 892 : let disk_consistent_lsn_gauge = DISK_CONSISTENT_LSN
2683 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2684 892 : .unwrap();
2685 892 :
2686 892 : let pitr_history_size = PITR_HISTORY_SIZE
2687 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2688 892 : .unwrap();
2689 892 :
2690 892 : let archival_size = TIMELINE_ARCHIVE_SIZE
2691 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2692 892 : .unwrap();
2693 892 :
2694 892 : let layer_size_image = TIMELINE_LAYER_SIZE
2695 892 : .get_metric_with_label_values(&[
2696 892 : &tenant_id,
2697 892 : &shard_id,
2698 892 : &timeline_id,
2699 892 : MetricLayerKind::Image.into(),
2700 892 : ])
2701 892 : .unwrap();
2702 892 :
2703 892 : let layer_count_image = TIMELINE_LAYER_COUNT
2704 892 : .get_metric_with_label_values(&[
2705 892 : &tenant_id,
2706 892 : &shard_id,
2707 892 : &timeline_id,
2708 892 : MetricLayerKind::Image.into(),
2709 892 : ])
2710 892 : .unwrap();
2711 892 :
2712 892 : let layer_size_delta = TIMELINE_LAYER_SIZE
2713 892 : .get_metric_with_label_values(&[
2714 892 : &tenant_id,
2715 892 : &shard_id,
2716 892 : &timeline_id,
2717 892 : MetricLayerKind::Delta.into(),
2718 892 : ])
2719 892 : .unwrap();
2720 892 :
2721 892 : let layer_count_delta = TIMELINE_LAYER_COUNT
2722 892 : .get_metric_with_label_values(&[
2723 892 : &tenant_id,
2724 892 : &shard_id,
2725 892 : &timeline_id,
2726 892 : MetricLayerKind::Delta.into(),
2727 892 : ])
2728 892 : .unwrap();
2729 892 :
2730 892 : let standby_horizon_gauge = STANDBY_HORIZON
2731 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2732 892 : .unwrap();
2733 892 : let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE
2734 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2735 892 : .unwrap();
2736 892 : let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE
2737 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2738 892 : .unwrap();
2739 892 : // TODO: we shouldn't expose this metric
2740 892 : let current_logical_size_gauge = CURRENT_LOGICAL_SIZE
2741 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2742 892 : .unwrap();
2743 892 : let aux_file_size_gauge = AUX_FILE_SIZE
2744 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2745 892 : .unwrap();
2746 892 : // TODO use impl Trait syntax here once we have ability to use it: https://github.com/rust-lang/rust/issues/63065
2747 892 : let directory_entries_count_gauge_closure = {
2748 892 : let tenant_shard_id = *tenant_shard_id;
2749 892 : let timeline_id_raw = *timeline_id_raw;
2750 0 : move || {
2751 0 : let tenant_id = tenant_shard_id.tenant_id.to_string();
2752 0 : let shard_id = format!("{}", tenant_shard_id.shard_slug());
2753 0 : let timeline_id = timeline_id_raw.to_string();
2754 0 : let gauge: UIntGauge = DIRECTORY_ENTRIES_COUNT
2755 0 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2756 0 : .unwrap();
2757 0 : gauge
2758 0 : }
2759 : };
2760 892 : let directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>> =
2761 892 : Lazy::new(Box::new(directory_entries_count_gauge_closure));
2762 892 : let evictions = EVICTIONS
2763 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2764 892 : .unwrap();
2765 892 : let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
2766 892 : .build(&tenant_id, &shard_id, &timeline_id);
2767 892 :
2768 892 : let valid_lsn_lease_count_gauge = VALID_LSN_LEASE_COUNT
2769 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2770 892 : .unwrap();
2771 892 :
2772 892 : let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
2773 892 : .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2774 892 : .unwrap();
2775 892 :
2776 892 : TimelineMetrics {
2777 892 : tenant_id,
2778 892 : shard_id,
2779 892 : timeline_id,
2780 892 : flush_time_histo,
2781 892 : flush_delay_histo,
2782 892 : flush_wait_upload_time_gauge,
2783 892 : compact_time_histo,
2784 892 : create_images_time_histo,
2785 892 : logical_size_histo,
2786 892 : imitate_logical_size_histo,
2787 892 : garbage_collect_histo,
2788 892 : find_gc_cutoffs_histo,
2789 892 : load_layer_map_histo,
2790 892 : last_record_lsn_gauge,
2791 892 : disk_consistent_lsn_gauge,
2792 892 : pitr_history_size,
2793 892 : archival_size,
2794 892 : layer_size_image,
2795 892 : layer_count_image,
2796 892 : layer_size_delta,
2797 892 : layer_count_delta,
2798 892 : standby_horizon_gauge,
2799 892 : resident_physical_size_gauge,
2800 892 : visible_physical_size_gauge,
2801 892 : current_logical_size_gauge,
2802 892 : aux_file_size_gauge,
2803 892 : directory_entries_count_gauge,
2804 892 : evictions,
2805 892 : evictions_with_low_residence_duration: std::sync::RwLock::new(
2806 892 : evictions_with_low_residence_duration,
2807 892 : ),
2808 892 : valid_lsn_lease_count_gauge,
2809 892 : wal_records_received,
2810 892 : shutdown: std::sync::atomic::AtomicBool::default(),
2811 892 : }
2812 892 : }
2813 :
2814 3144 : pub(crate) fn record_new_file_metrics(&self, sz: u64) {
2815 3144 : self.resident_physical_size_add(sz);
2816 3144 : }
2817 :
2818 1052 : pub(crate) fn resident_physical_size_sub(&self, sz: u64) {
2819 1052 : self.resident_physical_size_gauge.sub(sz);
2820 1052 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(sz);
2821 1052 : }
2822 :
2823 3400 : pub(crate) fn resident_physical_size_add(&self, sz: u64) {
2824 3400 : self.resident_physical_size_gauge.add(sz);
2825 3400 : crate::metrics::RESIDENT_PHYSICAL_SIZE_GLOBAL.add(sz);
2826 3400 : }
2827 :
2828 20 : pub(crate) fn resident_physical_size_get(&self) -> u64 {
2829 20 : self.resident_physical_size_gauge.get()
2830 20 : }
2831 :
2832 2344 : pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
2833 2344 : self.flush_wait_upload_time_gauge.add(duration);
2834 2344 : crate::metrics::FLUSH_WAIT_UPLOAD_TIME
2835 2344 : .get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
2836 2344 : .unwrap()
2837 2344 : .add(duration);
2838 2344 : }
2839 :
2840 20 : pub(crate) fn shutdown(&self) {
2841 20 : let was_shutdown = self
2842 20 : .shutdown
2843 20 : .swap(true, std::sync::atomic::Ordering::Relaxed);
2844 20 :
2845 20 : if was_shutdown {
2846 : // this happens on tenant deletion because tenant first shuts down timelines, then
2847 : // invokes timeline deletion which first shuts down the timeline again.
2848 : // TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
2849 0 : return;
2850 20 : }
2851 20 :
2852 20 : let tenant_id = &self.tenant_id;
2853 20 : let timeline_id = &self.timeline_id;
2854 20 : let shard_id = &self.shard_id;
2855 20 : let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2856 20 : let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2857 20 : let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2858 20 : let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2859 20 : {
2860 20 : RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
2861 20 : let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2862 20 : }
2863 20 : let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2864 20 : let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2865 20 : if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) {
2866 0 : let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2867 20 : }
2868 :
2869 20 : let _ = TIMELINE_ARCHIVE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2870 20 : let _ = PITR_HISTORY_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2871 20 :
2872 20 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2873 20 : tenant_id,
2874 20 : shard_id,
2875 20 : timeline_id,
2876 20 : MetricLayerKind::Image.into(),
2877 20 : ]);
2878 20 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2879 20 : tenant_id,
2880 20 : shard_id,
2881 20 : timeline_id,
2882 20 : MetricLayerKind::Image.into(),
2883 20 : ]);
2884 20 : let _ = TIMELINE_LAYER_SIZE.remove_label_values(&[
2885 20 : tenant_id,
2886 20 : shard_id,
2887 20 : timeline_id,
2888 20 : MetricLayerKind::Delta.into(),
2889 20 : ]);
2890 20 : let _ = TIMELINE_LAYER_COUNT.remove_label_values(&[
2891 20 : tenant_id,
2892 20 : shard_id,
2893 20 : timeline_id,
2894 20 : MetricLayerKind::Delta.into(),
2895 20 : ]);
2896 20 :
2897 20 : let _ = EVICTIONS.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2898 20 : let _ = AUX_FILE_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2899 20 : let _ = VALID_LSN_LEASE_COUNT.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2900 20 :
2901 20 : self.evictions_with_low_residence_duration
2902 20 : .write()
2903 20 : .unwrap()
2904 20 : .remove(tenant_id, shard_id, timeline_id);
2905 :
2906 : // The following metrics are born outside of the TimelineMetrics lifecycle but still
2907 : // removed at the end of it. The idea is to have the metrics outlive the
2908 : // entity during which they're observed, e.g., the smgr metrics shall
2909 : // outlive an individual smgr connection, but not the timeline.
2910 :
2911 200 : for op in StorageTimeOperation::VARIANTS {
2912 180 : let _ = STORAGE_TIME_SUM_PER_TIMELINE.remove_label_values(&[
2913 180 : op,
2914 180 : tenant_id,
2915 180 : shard_id,
2916 180 : timeline_id,
2917 180 : ]);
2918 180 : let _ = STORAGE_TIME_COUNT_PER_TIMELINE.remove_label_values(&[
2919 180 : op,
2920 180 : tenant_id,
2921 180 : shard_id,
2922 180 : timeline_id,
2923 180 : ]);
2924 180 : }
2925 :
2926 60 : for op in STORAGE_IO_SIZE_OPERATIONS {
2927 40 : let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
2928 40 : }
2929 :
2930 20 : let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
2931 20 : SmgrQueryType::GetPageAtLsn.into(),
2932 20 : tenant_id,
2933 20 : shard_id,
2934 20 : timeline_id,
2935 20 : ]);
2936 20 : let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
2937 20 : SmgrQueryType::GetPageAtLsn.into(),
2938 20 : tenant_id,
2939 20 : shard_id,
2940 20 : timeline_id,
2941 20 : ]);
2942 20 : let _ = PAGE_SERVICE_BATCH_SIZE_PER_TENANT_TIMELINE.remove_label_values(&[
2943 20 : tenant_id,
2944 20 : shard_id,
2945 20 : timeline_id,
2946 20 : ]);
2947 20 : let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
2948 20 : tenant_id,
2949 20 : shard_id,
2950 20 : timeline_id,
2951 20 : ]);
2952 20 : let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
2953 20 : tenant_id,
2954 20 : shard_id,
2955 20 : timeline_id,
2956 20 : ]);
2957 20 : let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
2958 20 : tenant_id,
2959 20 : shard_id,
2960 20 : timeline_id,
2961 20 : ]);
2962 20 : }
2963 : }
2964 :
2965 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
2966 12 : // Only shard zero deals in synthetic sizes
2967 12 : if tenant_shard_id.is_shard_zero() {
2968 12 : let tid = tenant_shard_id.tenant_id.to_string();
2969 12 : let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
2970 12 : }
2971 :
2972 12 : tenant_throttling::remove_tenant_metrics(tenant_shard_id);
2973 12 :
2974 12 : // we leave the BROKEN_TENANTS_SET entry if any
2975 12 : }
2976 :
2977 : use futures::Future;
2978 : use pin_project_lite::pin_project;
2979 : use std::collections::HashMap;
2980 : use std::num::NonZeroUsize;
2981 : use std::pin::Pin;
2982 : use std::sync::atomic::AtomicU64;
2983 : use std::sync::{Arc, Mutex};
2984 : use std::task::{Context, Poll};
2985 : use std::time::{Duration, Instant};
2986 :
2987 : use crate::config::PageServerConf;
2988 : use crate::context::{PageContentKind, RequestContext};
2989 : use crate::task_mgr::TaskKind;
2990 : use crate::tenant::mgr::TenantSlot;
2991 : use crate::tenant::tasks::BackgroundLoopKind;
2992 : use crate::tenant::throttle::ThrottleResult;
2993 : use crate::tenant::Timeline;
2994 :
2995 : /// Maintain a per timeline gauge in addition to the global gauge.
2996 : pub(crate) struct PerTimelineRemotePhysicalSizeGauge {
2997 : last_set: AtomicU64,
2998 : gauge: UIntGauge,
2999 : }
3000 :
3001 : impl PerTimelineRemotePhysicalSizeGauge {
3002 912 : fn new(per_timeline_gauge: UIntGauge) -> Self {
3003 912 : Self {
3004 912 : last_set: AtomicU64::new(0),
3005 912 : gauge: per_timeline_gauge,
3006 912 : }
3007 912 : }
3008 3843 : pub(crate) fn set(&self, sz: u64) {
3009 3843 : self.gauge.set(sz);
3010 3843 : let prev = self.last_set.swap(sz, std::sync::atomic::Ordering::Relaxed);
3011 3843 : if sz < prev {
3012 69 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(prev - sz);
3013 3774 : } else {
3014 3774 : REMOTE_PHYSICAL_SIZE_GLOBAL.add(sz - prev);
3015 3774 : };
3016 3843 : }
3017 4 : pub(crate) fn get(&self) -> u64 {
3018 4 : self.gauge.get()
3019 4 : }
3020 : }
3021 :
3022 : impl Drop for PerTimelineRemotePhysicalSizeGauge {
3023 40 : fn drop(&mut self) {
3024 40 : REMOTE_PHYSICAL_SIZE_GLOBAL.sub(self.last_set.load(std::sync::atomic::Ordering::Relaxed));
3025 40 : }
3026 : }
3027 :
3028 : pub(crate) struct RemoteTimelineClientMetrics {
3029 : tenant_id: String,
3030 : shard_id: String,
3031 : timeline_id: String,
3032 : pub(crate) remote_physical_size_gauge: PerTimelineRemotePhysicalSizeGauge,
3033 : calls: Mutex<HashMap<(&'static str, &'static str), IntCounterPair>>,
3034 : bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3035 : bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
3036 : pub(crate) projected_remote_consistent_lsn_gauge: UIntGauge,
3037 : }
3038 :
3039 : impl RemoteTimelineClientMetrics {
3040 912 : pub fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
3041 912 : let tenant_id_str = tenant_shard_id.tenant_id.to_string();
3042 912 : let shard_id_str = format!("{}", tenant_shard_id.shard_slug());
3043 912 : let timeline_id_str = timeline_id.to_string();
3044 912 :
3045 912 : let remote_physical_size_gauge = PerTimelineRemotePhysicalSizeGauge::new(
3046 912 : REMOTE_PHYSICAL_SIZE
3047 912 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3048 912 : .unwrap(),
3049 912 : );
3050 912 :
3051 912 : let projected_remote_consistent_lsn_gauge = PROJECTED_REMOTE_CONSISTENT_LSN
3052 912 : .get_metric_with_label_values(&[&tenant_id_str, &shard_id_str, &timeline_id_str])
3053 912 : .unwrap();
3054 912 :
3055 912 : RemoteTimelineClientMetrics {
3056 912 : tenant_id: tenant_id_str,
3057 912 : shard_id: shard_id_str,
3058 912 : timeline_id: timeline_id_str,
3059 912 : calls: Mutex::new(HashMap::default()),
3060 912 : bytes_started_counter: Mutex::new(HashMap::default()),
3061 912 : bytes_finished_counter: Mutex::new(HashMap::default()),
3062 912 : remote_physical_size_gauge,
3063 912 : projected_remote_consistent_lsn_gauge,
3064 912 : }
3065 912 : }
3066 :
3067 5862 : pub fn remote_operation_time(
3068 5862 : &self,
3069 5862 : file_kind: &RemoteOpFileKind,
3070 5862 : op_kind: &RemoteOpKind,
3071 5862 : status: &'static str,
3072 5862 : ) -> Histogram {
3073 5862 : let key = (file_kind.as_str(), op_kind.as_str(), status);
3074 5862 : REMOTE_OPERATION_TIME
3075 5862 : .get_metric_with_label_values(&[key.0, key.1, key.2])
3076 5862 : .unwrap()
3077 5862 : }
3078 :
3079 13940 : fn calls_counter_pair(
3080 13940 : &self,
3081 13940 : file_kind: &RemoteOpFileKind,
3082 13940 : op_kind: &RemoteOpKind,
3083 13940 : ) -> IntCounterPair {
3084 13940 : let mut guard = self.calls.lock().unwrap();
3085 13940 : let key = (file_kind.as_str(), op_kind.as_str());
3086 13940 : let metric = guard.entry(key).or_insert_with(move || {
3087 1634 : REMOTE_TIMELINE_CLIENT_CALLS
3088 1634 : .get_metric_with_label_values(&[
3089 1634 : &self.tenant_id,
3090 1634 : &self.shard_id,
3091 1634 : &self.timeline_id,
3092 1634 : key.0,
3093 1634 : key.1,
3094 1634 : ])
3095 1634 : .unwrap()
3096 13940 : });
3097 13940 : metric.clone()
3098 13940 : }
3099 :
3100 3456 : fn bytes_started_counter(
3101 3456 : &self,
3102 3456 : file_kind: &RemoteOpFileKind,
3103 3456 : op_kind: &RemoteOpKind,
3104 3456 : ) -> IntCounter {
3105 3456 : let mut guard = self.bytes_started_counter.lock().unwrap();
3106 3456 : let key = (file_kind.as_str(), op_kind.as_str());
3107 3456 : let metric = guard.entry(key).or_insert_with(move || {
3108 644 : REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
3109 644 : .get_metric_with_label_values(&[
3110 644 : &self.tenant_id,
3111 644 : &self.shard_id,
3112 644 : &self.timeline_id,
3113 644 : key.0,
3114 644 : key.1,
3115 644 : ])
3116 644 : .unwrap()
3117 3456 : });
3118 3456 : metric.clone()
3119 3456 : }
3120 :
3121 6315 : fn bytes_finished_counter(
3122 6315 : &self,
3123 6315 : file_kind: &RemoteOpFileKind,
3124 6315 : op_kind: &RemoteOpKind,
3125 6315 : ) -> IntCounter {
3126 6315 : let mut guard = self.bytes_finished_counter.lock().unwrap();
3127 6315 : let key = (file_kind.as_str(), op_kind.as_str());
3128 6315 : let metric = guard.entry(key).or_insert_with(move || {
3129 644 : REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
3130 644 : .get_metric_with_label_values(&[
3131 644 : &self.tenant_id,
3132 644 : &self.shard_id,
3133 644 : &self.timeline_id,
3134 644 : key.0,
3135 644 : key.1,
3136 644 : ])
3137 644 : .unwrap()
3138 6315 : });
3139 6315 : metric.clone()
3140 6315 : }
3141 : }
3142 :
3143 : #[cfg(test)]
3144 : impl RemoteTimelineClientMetrics {
3145 12 : pub fn get_bytes_started_counter_value(
3146 12 : &self,
3147 12 : file_kind: &RemoteOpFileKind,
3148 12 : op_kind: &RemoteOpKind,
3149 12 : ) -> Option<u64> {
3150 12 : let guard = self.bytes_started_counter.lock().unwrap();
3151 12 : let key = (file_kind.as_str(), op_kind.as_str());
3152 12 : guard.get(&key).map(|counter| counter.get())
3153 12 : }
3154 :
3155 12 : pub fn get_bytes_finished_counter_value(
3156 12 : &self,
3157 12 : file_kind: &RemoteOpFileKind,
3158 12 : op_kind: &RemoteOpKind,
3159 12 : ) -> Option<u64> {
3160 12 : let guard = self.bytes_finished_counter.lock().unwrap();
3161 12 : let key = (file_kind.as_str(), op_kind.as_str());
3162 12 : guard.get(&key).map(|counter| counter.get())
3163 12 : }
3164 : }
3165 :
3166 : /// See [`RemoteTimelineClientMetrics::call_begin`].
3167 : #[must_use]
3168 : pub(crate) struct RemoteTimelineClientCallMetricGuard {
3169 : /// Decremented on drop.
3170 : calls_counter_pair: Option<IntCounterPair>,
3171 : /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
3172 : bytes_finished: Option<(IntCounter, u64)>,
3173 : }
3174 :
3175 : impl RemoteTimelineClientCallMetricGuard {
3176 : /// Consume this guard object without performing the metric updates it would do on `drop()`.
3177 : /// The caller vouches to do the metric updates manually.
3178 7556 : pub fn will_decrement_manually(mut self) {
3179 7556 : let RemoteTimelineClientCallMetricGuard {
3180 7556 : calls_counter_pair,
3181 7556 : bytes_finished,
3182 7556 : } = &mut self;
3183 7556 : calls_counter_pair.take();
3184 7556 : bytes_finished.take();
3185 7556 : }
3186 : }
3187 :
3188 : impl Drop for RemoteTimelineClientCallMetricGuard {
3189 7608 : fn drop(&mut self) {
3190 7608 : let RemoteTimelineClientCallMetricGuard {
3191 7608 : calls_counter_pair,
3192 7608 : bytes_finished,
3193 7608 : } = self;
3194 7608 : if let Some(guard) = calls_counter_pair.take() {
3195 52 : guard.dec();
3196 7556 : }
3197 7608 : if let Some((bytes_finished_metric, value)) = bytes_finished {
3198 0 : bytes_finished_metric.inc_by(*value);
3199 7608 : }
3200 7608 : }
3201 : }
3202 :
3203 : /// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
3204 : /// track the byte size of this call in applicable metric(s).
3205 : pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
3206 : /// Do not account for this call's byte size in any metrics.
3207 : /// The `reason` field is there to make the call sites self-documenting
3208 : /// about why they don't need the metric.
3209 : DontTrackSize { reason: &'static str },
3210 : /// Track the byte size of the call in applicable metric(s).
3211 : Bytes(u64),
3212 : }
3213 :
3214 : impl RemoteTimelineClientMetrics {
3215 : /// Update the metrics that change when a call to the remote timeline client instance starts.
3216 : ///
3217 : /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
3218 : /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`](Self::call_end) if that
3219 : /// is more suitable.
3220 : /// Never do both.
3221 7608 : pub(crate) fn call_begin(
3222 7608 : &self,
3223 7608 : file_kind: &RemoteOpFileKind,
3224 7608 : op_kind: &RemoteOpKind,
3225 7608 : size: RemoteTimelineClientMetricsCallTrackSize,
3226 7608 : ) -> RemoteTimelineClientCallMetricGuard {
3227 7608 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3228 7608 : calls_counter_pair.inc();
3229 :
3230 7608 : let bytes_finished = match size {
3231 4152 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
3232 4152 : // nothing to do
3233 4152 : None
3234 : }
3235 3456 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3236 3456 : self.bytes_started_counter(file_kind, op_kind).inc_by(size);
3237 3456 : let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
3238 3456 : Some((finished_counter, size))
3239 : }
3240 : };
3241 7608 : RemoteTimelineClientCallMetricGuard {
3242 7608 : calls_counter_pair: Some(calls_counter_pair),
3243 7608 : bytes_finished,
3244 7608 : }
3245 7608 : }
3246 :
3247 : /// Manually udpate the metrics that track completions, instead of using the guard object.
3248 : /// Using the guard object is generally preferable.
3249 : /// See [`call_begin`](Self::call_begin) for more context.
3250 6332 : pub(crate) fn call_end(
3251 6332 : &self,
3252 6332 : file_kind: &RemoteOpFileKind,
3253 6332 : op_kind: &RemoteOpKind,
3254 6332 : size: RemoteTimelineClientMetricsCallTrackSize,
3255 6332 : ) {
3256 6332 : let calls_counter_pair = self.calls_counter_pair(file_kind, op_kind);
3257 6332 : calls_counter_pair.dec();
3258 6332 : match size {
3259 3473 : RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
3260 2859 : RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
3261 2859 : self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
3262 2859 : }
3263 : }
3264 6332 : }
3265 : }
3266 :
3267 : impl Drop for RemoteTimelineClientMetrics {
3268 40 : fn drop(&mut self) {
3269 40 : let RemoteTimelineClientMetrics {
3270 40 : tenant_id,
3271 40 : shard_id,
3272 40 : timeline_id,
3273 40 : remote_physical_size_gauge,
3274 40 : calls,
3275 40 : bytes_started_counter,
3276 40 : bytes_finished_counter,
3277 40 : projected_remote_consistent_lsn_gauge,
3278 40 : } = self;
3279 48 : for ((a, b), _) in calls.get_mut().unwrap().drain() {
3280 48 : let mut res = [Ok(()), Ok(())];
3281 48 : REMOTE_TIMELINE_CLIENT_CALLS
3282 48 : .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id, a, b]);
3283 48 : // don't care about results
3284 48 : }
3285 40 : for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
3286 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
3287 12 : tenant_id,
3288 12 : shard_id,
3289 12 : timeline_id,
3290 12 : a,
3291 12 : b,
3292 12 : ]);
3293 12 : }
3294 40 : for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
3295 12 : let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
3296 12 : tenant_id,
3297 12 : shard_id,
3298 12 : timeline_id,
3299 12 : a,
3300 12 : b,
3301 12 : ]);
3302 12 : }
3303 40 : {
3304 40 : let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
3305 40 : let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]);
3306 40 : }
3307 40 : {
3308 40 : let _ = projected_remote_consistent_lsn_gauge;
3309 40 : let _ = PROJECTED_REMOTE_CONSISTENT_LSN.remove_label_values(&[
3310 40 : tenant_id,
3311 40 : shard_id,
3312 40 : timeline_id,
3313 40 : ]);
3314 40 : }
3315 40 : }
3316 : }
3317 :
3318 : /// Wrapper future that measures the time spent by a remote storage operation,
3319 : /// and records the time and success/failure as a prometheus metric.
3320 : pub(crate) trait MeasureRemoteOp: Sized {
3321 6355 : fn measure_remote_op(
3322 6355 : self,
3323 6355 : file_kind: RemoteOpFileKind,
3324 6355 : op: RemoteOpKind,
3325 6355 : metrics: Arc<RemoteTimelineClientMetrics>,
3326 6355 : ) -> MeasuredRemoteOp<Self> {
3327 6355 : let start = Instant::now();
3328 6355 : MeasuredRemoteOp {
3329 6355 : inner: self,
3330 6355 : file_kind,
3331 6355 : op,
3332 6355 : start,
3333 6355 : metrics,
3334 6355 : }
3335 6355 : }
3336 : }
3337 :
3338 : impl<T: Sized> MeasureRemoteOp for T {}
3339 :
3340 : pin_project! {
3341 : pub(crate) struct MeasuredRemoteOp<F>
3342 : {
3343 : #[pin]
3344 : inner: F,
3345 : file_kind: RemoteOpFileKind,
3346 : op: RemoteOpKind,
3347 : start: Instant,
3348 : metrics: Arc<RemoteTimelineClientMetrics>,
3349 : }
3350 : }
3351 :
3352 : impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
3353 : type Output = Result<O, E>;
3354 :
3355 98222 : fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
3356 98222 : let this = self.project();
3357 98222 : let poll_result = this.inner.poll(cx);
3358 98222 : if let Poll::Ready(ref res) = poll_result {
3359 5862 : let duration = this.start.elapsed();
3360 5862 : let status = if res.is_ok() { &"success" } else { &"failure" };
3361 5862 : this.metrics
3362 5862 : .remote_operation_time(this.file_kind, this.op, status)
3363 5862 : .observe(duration.as_secs_f64());
3364 92360 : }
3365 98222 : poll_result
3366 98222 : }
3367 : }
3368 :
3369 : pub mod tokio_epoll_uring {
3370 : use std::{
3371 : collections::HashMap,
3372 : sync::{Arc, Mutex},
3373 : };
3374 :
3375 : use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge};
3376 : use once_cell::sync::Lazy;
3377 :
3378 : /// Shared storage for tokio-epoll-uring thread local metrics.
3379 : pub(crate) static THREAD_LOCAL_METRICS_STORAGE: Lazy<ThreadLocalMetricsStorage> =
3380 232 : Lazy::new(|| {
3381 232 : let slots_submission_queue_depth = register_histogram!(
3382 232 : "pageserver_tokio_epoll_uring_slots_submission_queue_depth",
3383 232 : "The slots waiters queue depth of each tokio_epoll_uring system",
3384 232 : vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0],
3385 232 : )
3386 232 : .expect("failed to define a metric");
3387 232 : ThreadLocalMetricsStorage {
3388 232 : observers: Mutex::new(HashMap::new()),
3389 232 : slots_submission_queue_depth,
3390 232 : }
3391 232 : });
3392 :
3393 : pub struct ThreadLocalMetricsStorage {
3394 : /// List of thread local metrics observers.
3395 : observers: Mutex<HashMap<u64, Arc<ThreadLocalMetrics>>>,
3396 : /// A histogram shared between all thread local systems
3397 : /// for collecting slots submission queue depth.
3398 : slots_submission_queue_depth: Histogram,
3399 : }
3400 :
3401 : /// Each thread-local [`tokio_epoll_uring::System`] gets one of these as its
3402 : /// [`tokio_epoll_uring::metrics::PerSystemMetrics`] generic.
3403 : ///
3404 : /// The System makes observations into [`Self`] and periodically, the collector
3405 : /// comes along and flushes [`Self`] into the shared storage [`THREAD_LOCAL_METRICS_STORAGE`].
3406 : ///
3407 : /// [`LocalHistogram`] is `!Send`, so, we need to put it behind a [`Mutex`].
3408 : /// But except for the periodic flush, the lock is uncontended so there's no waiting
3409 : /// for cache coherence protocol to get an exclusive cache line.
3410 : pub struct ThreadLocalMetrics {
3411 : /// Local observer of thread local tokio-epoll-uring system's slots waiters queue depth.
3412 : slots_submission_queue_depth: Mutex<LocalHistogram>,
3413 : }
3414 :
3415 : impl ThreadLocalMetricsStorage {
3416 : /// Registers a new thread local system. Returns a thread local metrics observer.
3417 1000 : pub fn register_system(&self, id: u64) -> Arc<ThreadLocalMetrics> {
3418 1000 : let per_system_metrics = Arc::new(ThreadLocalMetrics::new(
3419 1000 : self.slots_submission_queue_depth.local(),
3420 1000 : ));
3421 1000 : let mut g = self.observers.lock().unwrap();
3422 1000 : g.insert(id, Arc::clone(&per_system_metrics));
3423 1000 : per_system_metrics
3424 1000 : }
3425 :
3426 : /// Removes metrics observer for a thread local system.
3427 : /// This should be called before dropping a thread local system.
3428 232 : pub fn remove_system(&self, id: u64) {
3429 232 : let mut g = self.observers.lock().unwrap();
3430 232 : g.remove(&id);
3431 232 : }
3432 :
3433 : /// Flush all thread local metrics to the shared storage.
3434 0 : pub fn flush_thread_local_metrics(&self) {
3435 0 : let g = self.observers.lock().unwrap();
3436 0 : g.values().for_each(|local| {
3437 0 : local.flush();
3438 0 : });
3439 0 : }
3440 : }
3441 :
3442 : impl ThreadLocalMetrics {
3443 1000 : pub fn new(slots_submission_queue_depth: LocalHistogram) -> Self {
3444 1000 : ThreadLocalMetrics {
3445 1000 : slots_submission_queue_depth: Mutex::new(slots_submission_queue_depth),
3446 1000 : }
3447 1000 : }
3448 :
3449 : /// Flushes the thread local metrics to shared aggregator.
3450 0 : pub fn flush(&self) {
3451 0 : let Self {
3452 0 : slots_submission_queue_depth,
3453 0 : } = self;
3454 0 : slots_submission_queue_depth.lock().unwrap().flush();
3455 0 : }
3456 : }
3457 :
3458 : impl tokio_epoll_uring::metrics::PerSystemMetrics for ThreadLocalMetrics {
3459 1820213 : fn observe_slots_submission_queue_depth(&self, queue_depth: u64) {
3460 1820213 : let Self {
3461 1820213 : slots_submission_queue_depth,
3462 1820213 : } = self;
3463 1820213 : slots_submission_queue_depth
3464 1820213 : .lock()
3465 1820213 : .unwrap()
3466 1820213 : .observe(queue_depth as f64);
3467 1820213 : }
3468 : }
3469 :
3470 : pub struct Collector {
3471 : descs: Vec<metrics::core::Desc>,
3472 : systems_created: UIntGauge,
3473 : systems_destroyed: UIntGauge,
3474 : thread_local_metrics_storage: &'static ThreadLocalMetricsStorage,
3475 : }
3476 :
3477 : impl metrics::core::Collector for Collector {
3478 0 : fn desc(&self) -> Vec<&metrics::core::Desc> {
3479 0 : self.descs.iter().collect()
3480 0 : }
3481 :
3482 0 : fn collect(&self) -> Vec<metrics::proto::MetricFamily> {
3483 0 : let mut mfs = Vec::with_capacity(Self::NMETRICS);
3484 0 : let tokio_epoll_uring::metrics::GlobalMetrics {
3485 0 : systems_created,
3486 0 : systems_destroyed,
3487 0 : } = tokio_epoll_uring::metrics::global();
3488 0 : self.systems_created.set(systems_created);
3489 0 : mfs.extend(self.systems_created.collect());
3490 0 : self.systems_destroyed.set(systems_destroyed);
3491 0 : mfs.extend(self.systems_destroyed.collect());
3492 0 :
3493 0 : self.thread_local_metrics_storage
3494 0 : .flush_thread_local_metrics();
3495 0 :
3496 0 : mfs.extend(
3497 0 : self.thread_local_metrics_storage
3498 0 : .slots_submission_queue_depth
3499 0 : .collect(),
3500 0 : );
3501 0 : mfs
3502 0 : }
3503 : }
3504 :
3505 : impl Collector {
3506 : const NMETRICS: usize = 3;
3507 :
3508 : #[allow(clippy::new_without_default)]
3509 0 : pub fn new() -> Self {
3510 0 : let mut descs = Vec::new();
3511 0 :
3512 0 : let systems_created = UIntGauge::new(
3513 0 : "pageserver_tokio_epoll_uring_systems_created",
3514 0 : "counter of tokio-epoll-uring systems that were created",
3515 0 : )
3516 0 : .unwrap();
3517 0 : descs.extend(
3518 0 : metrics::core::Collector::desc(&systems_created)
3519 0 : .into_iter()
3520 0 : .cloned(),
3521 0 : );
3522 0 :
3523 0 : let systems_destroyed = UIntGauge::new(
3524 0 : "pageserver_tokio_epoll_uring_systems_destroyed",
3525 0 : "counter of tokio-epoll-uring systems that were destroyed",
3526 0 : )
3527 0 : .unwrap();
3528 0 : descs.extend(
3529 0 : metrics::core::Collector::desc(&systems_destroyed)
3530 0 : .into_iter()
3531 0 : .cloned(),
3532 0 : );
3533 0 :
3534 0 : Self {
3535 0 : descs,
3536 0 : systems_created,
3537 0 : systems_destroyed,
3538 0 : thread_local_metrics_storage: &THREAD_LOCAL_METRICS_STORAGE,
3539 0 : }
3540 0 : }
3541 : }
3542 :
3543 232 : pub(crate) static THREAD_LOCAL_LAUNCH_SUCCESSES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3544 232 : register_int_counter!(
3545 232 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_success_count",
3546 232 : "Number of times where thread_local_system creation spanned multiple executor threads",
3547 232 : )
3548 232 : .unwrap()
3549 232 : });
3550 :
3551 0 : pub(crate) static THREAD_LOCAL_LAUNCH_FAILURES: Lazy<metrics::IntCounter> = Lazy::new(|| {
3552 0 : register_int_counter!(
3553 0 : "pageserver_tokio_epoll_uring_pageserver_thread_local_launch_failures_count",
3554 0 : "Number of times thread_local_system creation failed and was retried after back-off.",
3555 0 : )
3556 0 : .unwrap()
3557 0 : });
3558 : }
3559 :
3560 : pub(crate) mod tenant_throttling {
3561 : use metrics::{register_int_counter_vec, IntCounter};
3562 : use once_cell::sync::Lazy;
3563 : use utils::shard::TenantShardId;
3564 :
3565 : pub(crate) struct GlobalAndPerTenantIntCounter {
3566 : global: IntCounter,
3567 : per_tenant: IntCounter,
3568 : }
3569 :
3570 : impl GlobalAndPerTenantIntCounter {
3571 : #[inline(always)]
3572 0 : pub(crate) fn inc(&self) {
3573 0 : self.inc_by(1)
3574 0 : }
3575 : #[inline(always)]
3576 0 : pub(crate) fn inc_by(&self, n: u64) {
3577 0 : self.global.inc_by(n);
3578 0 : self.per_tenant.inc_by(n);
3579 0 : }
3580 : }
3581 :
3582 : pub(crate) struct Metrics<const KIND: usize> {
3583 : pub(super) count_accounted_start: GlobalAndPerTenantIntCounter,
3584 : pub(super) count_accounted_finish: GlobalAndPerTenantIntCounter,
3585 : pub(super) wait_time: GlobalAndPerTenantIntCounter,
3586 : pub(super) count_throttled: GlobalAndPerTenantIntCounter,
3587 : }
3588 :
3589 404 : static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3590 404 : register_int_counter_vec!(
3591 404 : "pageserver_tenant_throttling_count_accounted_start_global",
3592 404 : "Count of tenant throttling starts, by kind of throttle.",
3593 404 : &["kind"]
3594 404 : )
3595 404 : .unwrap()
3596 404 : });
3597 404 : static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3598 404 : register_int_counter_vec!(
3599 404 : "pageserver_tenant_throttling_count_accounted_start",
3600 404 : "Count of tenant throttling starts, by kind of throttle.",
3601 404 : &["kind", "tenant_id", "shard_id"]
3602 404 : )
3603 404 : .unwrap()
3604 404 : });
3605 404 : static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3606 404 : register_int_counter_vec!(
3607 404 : "pageserver_tenant_throttling_count_accounted_finish_global",
3608 404 : "Count of tenant throttling finishes, by kind of throttle.",
3609 404 : &["kind"]
3610 404 : )
3611 404 : .unwrap()
3612 404 : });
3613 404 : static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3614 404 : register_int_counter_vec!(
3615 404 : "pageserver_tenant_throttling_count_accounted_finish",
3616 404 : "Count of tenant throttling finishes, by kind of throttle.",
3617 404 : &["kind", "tenant_id", "shard_id"]
3618 404 : )
3619 404 : .unwrap()
3620 404 : });
3621 404 : static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3622 404 : register_int_counter_vec!(
3623 404 : "pageserver_tenant_throttling_wait_usecs_sum_global",
3624 404 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3625 404 : &["kind"]
3626 404 : )
3627 404 : .unwrap()
3628 404 : });
3629 404 : static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3630 404 : register_int_counter_vec!(
3631 404 : "pageserver_tenant_throttling_wait_usecs_sum",
3632 404 : "Sum of microseconds that spent waiting throttle by kind of throttle.",
3633 404 : &["kind", "tenant_id", "shard_id"]
3634 404 : )
3635 404 : .unwrap()
3636 404 : });
3637 :
3638 404 : static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3639 404 : register_int_counter_vec!(
3640 404 : "pageserver_tenant_throttling_count_global",
3641 404 : "Count of tenant throttlings, by kind of throttle.",
3642 404 : &["kind"]
3643 404 : )
3644 404 : .unwrap()
3645 404 : });
3646 404 : static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
3647 404 : register_int_counter_vec!(
3648 404 : "pageserver_tenant_throttling_count",
3649 404 : "Count of tenant throttlings, by kind of throttle.",
3650 404 : &["kind", "tenant_id", "shard_id"]
3651 404 : )
3652 404 : .unwrap()
3653 404 : });
3654 :
3655 : const KINDS: &[&str] = &["pagestream"];
3656 : pub type Pagestream = Metrics<0>;
3657 :
3658 : impl<const KIND: usize> Metrics<KIND> {
3659 440 : pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
3660 440 : let per_tenant_label_values = &[
3661 440 : KINDS[KIND],
3662 440 : &tenant_shard_id.tenant_id.to_string(),
3663 440 : &tenant_shard_id.shard_slug().to_string(),
3664 440 : ];
3665 440 : Metrics {
3666 440 : count_accounted_start: {
3667 440 : GlobalAndPerTenantIntCounter {
3668 440 : global: COUNT_ACCOUNTED_START.with_label_values(&[KINDS[KIND]]),
3669 440 : per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
3670 440 : .with_label_values(per_tenant_label_values),
3671 440 : }
3672 440 : },
3673 440 : count_accounted_finish: {
3674 440 : GlobalAndPerTenantIntCounter {
3675 440 : global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KINDS[KIND]]),
3676 440 : per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
3677 440 : .with_label_values(per_tenant_label_values),
3678 440 : }
3679 440 : },
3680 440 : wait_time: {
3681 440 : GlobalAndPerTenantIntCounter {
3682 440 : global: WAIT_USECS.with_label_values(&[KINDS[KIND]]),
3683 440 : per_tenant: WAIT_USECS_PER_TENANT
3684 440 : .with_label_values(per_tenant_label_values),
3685 440 : }
3686 440 : },
3687 440 : count_throttled: {
3688 440 : GlobalAndPerTenantIntCounter {
3689 440 : global: WAIT_COUNT.with_label_values(&[KINDS[KIND]]),
3690 440 : per_tenant: WAIT_COUNT_PER_TENANT
3691 440 : .with_label_values(per_tenant_label_values),
3692 440 : }
3693 440 : },
3694 440 : }
3695 440 : }
3696 : }
3697 :
3698 0 : pub(crate) fn preinitialize_global_metrics() {
3699 0 : Lazy::force(&COUNT_ACCOUNTED_START);
3700 0 : Lazy::force(&COUNT_ACCOUNTED_FINISH);
3701 0 : Lazy::force(&WAIT_USECS);
3702 0 : Lazy::force(&WAIT_COUNT);
3703 0 : }
3704 :
3705 12 : pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
3706 48 : for m in &[
3707 12 : &COUNT_ACCOUNTED_START_PER_TENANT,
3708 12 : &COUNT_ACCOUNTED_FINISH_PER_TENANT,
3709 12 : &WAIT_USECS_PER_TENANT,
3710 12 : &WAIT_COUNT_PER_TENANT,
3711 12 : ] {
3712 96 : for kind in KINDS {
3713 48 : let _ = m.remove_label_values(&[
3714 48 : kind,
3715 48 : &tenant_shard_id.tenant_id.to_string(),
3716 48 : &tenant_shard_id.shard_slug().to_string(),
3717 48 : ]);
3718 48 : }
3719 : }
3720 12 : }
3721 : }
3722 :
3723 : pub(crate) mod disk_usage_based_eviction {
3724 : use super::*;
3725 :
3726 : pub(crate) struct Metrics {
3727 : pub(crate) tenant_collection_time: Histogram,
3728 : pub(crate) tenant_layer_count: Histogram,
3729 : pub(crate) layers_collected: IntCounter,
3730 : pub(crate) layers_selected: IntCounter,
3731 : pub(crate) layers_evicted: IntCounter,
3732 : }
3733 :
3734 : impl Default for Metrics {
3735 0 : fn default() -> Self {
3736 0 : let tenant_collection_time = register_histogram!(
3737 0 : "pageserver_disk_usage_based_eviction_tenant_collection_seconds",
3738 0 : "Time spent collecting layers from a tenant -- not normalized by collected layer amount",
3739 0 : vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]
3740 0 : )
3741 0 : .unwrap();
3742 0 :
3743 0 : let tenant_layer_count = register_histogram!(
3744 0 : "pageserver_disk_usage_based_eviction_tenant_collected_layers",
3745 0 : "Amount of layers gathered from a tenant",
3746 0 : vec![5.0, 50.0, 500.0, 5000.0, 50000.0]
3747 0 : )
3748 0 : .unwrap();
3749 0 :
3750 0 : let layers_collected = register_int_counter!(
3751 0 : "pageserver_disk_usage_based_eviction_collected_layers_total",
3752 0 : "Amount of layers collected"
3753 0 : )
3754 0 : .unwrap();
3755 0 :
3756 0 : let layers_selected = register_int_counter!(
3757 0 : "pageserver_disk_usage_based_eviction_select_layers_total",
3758 0 : "Amount of layers selected"
3759 0 : )
3760 0 : .unwrap();
3761 0 :
3762 0 : let layers_evicted = register_int_counter!(
3763 0 : "pageserver_disk_usage_based_eviction_evicted_layers_total",
3764 0 : "Amount of layers successfully evicted"
3765 0 : )
3766 0 : .unwrap();
3767 0 :
3768 0 : Self {
3769 0 : tenant_collection_time,
3770 0 : tenant_layer_count,
3771 0 : layers_collected,
3772 0 : layers_selected,
3773 0 : layers_evicted,
3774 0 : }
3775 0 : }
3776 : }
3777 :
3778 : pub(crate) static METRICS: Lazy<Metrics> = Lazy::new(Metrics::default);
3779 : }
3780 :
3781 392 : static TOKIO_EXECUTOR_THREAD_COUNT: Lazy<UIntGaugeVec> = Lazy::new(|| {
3782 392 : register_uint_gauge_vec!(
3783 392 : "pageserver_tokio_executor_thread_configured_count",
3784 392 : "Total number of configued tokio executor threads in the process.
3785 392 : The `setup` label denotes whether we're running with multiple runtimes or a single runtime.",
3786 392 : &["setup"],
3787 392 : )
3788 392 : .unwrap()
3789 392 : });
3790 :
3791 392 : pub(crate) fn set_tokio_runtime_setup(setup: &str, num_threads: NonZeroUsize) {
3792 : static SERIALIZE: std::sync::Mutex<()> = std::sync::Mutex::new(());
3793 392 : let _guard = SERIALIZE.lock().unwrap();
3794 392 : TOKIO_EXECUTOR_THREAD_COUNT.reset();
3795 392 : TOKIO_EXECUTOR_THREAD_COUNT
3796 392 : .get_metric_with_label_values(&[setup])
3797 392 : .unwrap()
3798 392 : .set(u64::try_from(num_threads.get()).unwrap());
3799 392 : }
3800 :
3801 0 : pub fn preinitialize_metrics(conf: &'static PageServerConf) {
3802 0 : set_page_service_config_max_batch_size(&conf.page_service_pipelining);
3803 0 :
3804 0 : // Python tests need these and on some we do alerting.
3805 0 : //
3806 0 : // FIXME(4813): make it so that we have no top level metrics as this fn will easily fall out of
3807 0 : // order:
3808 0 : // - global metrics reside in a Lazy<PageserverMetrics>
3809 0 : // - access via crate::metrics::PS_METRICS.some_metric.inc()
3810 0 : // - could move the statics into TimelineMetrics::new()?
3811 0 :
3812 0 : // counters
3813 0 : [
3814 0 : &UNEXPECTED_ONDEMAND_DOWNLOADS,
3815 0 : &WALRECEIVER_STARTED_CONNECTIONS,
3816 0 : &WALRECEIVER_BROKER_UPDATES,
3817 0 : &WALRECEIVER_CANDIDATES_ADDED,
3818 0 : &WALRECEIVER_CANDIDATES_REMOVED,
3819 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
3820 0 : &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
3821 0 : &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
3822 0 : &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
3823 0 : &CIRCUIT_BREAKERS_BROKEN,
3824 0 : &CIRCUIT_BREAKERS_UNBROKEN,
3825 0 : &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
3826 0 : ]
3827 0 : .into_iter()
3828 0 : .for_each(|c| {
3829 0 : Lazy::force(c);
3830 0 : });
3831 0 :
3832 0 : // Deletion queue stats
3833 0 : Lazy::force(&DELETION_QUEUE);
3834 0 :
3835 0 : // Tenant stats
3836 0 : Lazy::force(&TENANT);
3837 0 :
3838 0 : // Tenant manager stats
3839 0 : Lazy::force(&TENANT_MANAGER);
3840 0 :
3841 0 : Lazy::force(&crate::tenant::storage_layer::layer::LAYER_IMPL_METRICS);
3842 0 : Lazy::force(&disk_usage_based_eviction::METRICS);
3843 :
3844 0 : for state_name in pageserver_api::models::TenantState::VARIANTS {
3845 0 : // initialize the metric for all gauges, otherwise the time series might seemingly show
3846 0 : // values from last restart.
3847 0 : TENANT_STATE_METRIC.with_label_values(&[state_name]).set(0);
3848 0 : }
3849 :
3850 : // countervecs
3851 0 : [
3852 0 : &BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT,
3853 0 : &SMGR_QUERY_STARTED_GLOBAL,
3854 0 : ]
3855 0 : .into_iter()
3856 0 : .for_each(|c| {
3857 0 : Lazy::force(c);
3858 0 : });
3859 0 :
3860 0 : // gauges
3861 0 : WALRECEIVER_ACTIVE_MANAGERS.get();
3862 0 :
3863 0 : // histograms
3864 0 : [
3865 0 : &VEC_READ_NUM_LAYERS_VISITED,
3866 0 : &WAIT_LSN_TIME,
3867 0 : &WAL_REDO_TIME,
3868 0 : &WAL_REDO_RECORDS_HISTOGRAM,
3869 0 : &WAL_REDO_BYTES_HISTOGRAM,
3870 0 : &WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
3871 0 : &PAGE_SERVICE_BATCH_SIZE_GLOBAL,
3872 0 : &PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
3873 0 : ]
3874 0 : .into_iter()
3875 0 : .for_each(|h| {
3876 0 : Lazy::force(h);
3877 0 : });
3878 0 :
3879 0 : // Custom
3880 0 : Lazy::force(&BASEBACKUP_QUERY_TIME);
3881 0 : Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
3882 0 : Lazy::force(&tokio_epoll_uring::THREAD_LOCAL_METRICS_STORAGE);
3883 0 :
3884 0 : tenant_throttling::preinitialize_global_metrics();
3885 0 : }
|