Line data Source code
1 : //!
2 : //! This module provides metric definitions for the storage controller.
3 : //!
4 : //! All metrics are grouped in [`StorageControllerMetricGroup`]. [`StorageControllerMetrics`] holds
5 : //! the mentioned metrics and their encoder. It's globally available via the [`METRICS_REGISTRY`]
6 : //! constant.
7 : //!
8 : //! The rest of the code defines label group types and deals with converting outer types to labels.
9 : //!
10 : use std::sync::Mutex;
11 :
12 : use bytes::Bytes;
13 : use measured::label::LabelValue;
14 : use measured::metric::histogram;
15 : use measured::{FixedCardinalityLabel, MetricGroup};
16 : use metrics::NeonMetrics;
17 : use once_cell::sync::Lazy;
18 : use strum::IntoEnumIterator;
19 :
20 : use crate::persistence::{DatabaseError, DatabaseOperation};
21 : use crate::service::LeadershipStatus;
22 :
23 : pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
24 : Lazy::new(StorageControllerMetrics::default);
25 :
26 0 : pub fn preinitialize_metrics() {
27 0 : Lazy::force(&METRICS_REGISTRY);
28 0 : }
29 :
30 : pub(crate) struct StorageControllerMetrics {
31 : pub(crate) metrics_group: StorageControllerMetricGroup,
32 : encoder: Mutex<measured::text::BufferedTextEncoder>,
33 : }
34 :
35 : #[derive(measured::MetricGroup)]
36 : #[metric(new())]
37 : pub(crate) struct StorageControllerMetricGroup {
38 : /// Count of how many times we spawn a reconcile task
39 : pub(crate) storage_controller_reconcile_spawn: measured::Counter,
40 :
41 : /// Size of the in-memory map of tenant shards
42 : pub(crate) storage_controller_tenant_shards: measured::Gauge,
43 :
44 : /// Size of the in-memory map of pageserver_nodes
45 : pub(crate) storage_controller_pageserver_nodes: measured::Gauge,
46 :
47 : /// Count of how many pageserver nodes from in-memory map have https configured
48 : pub(crate) storage_controller_https_pageserver_nodes: measured::Gauge,
49 :
50 : /// Size of the in-memory map of safekeeper_nodes
51 : pub(crate) storage_controller_safekeeper_nodes: measured::Gauge,
52 :
53 : /// Count of how many safekeeper nodes from in-memory map have https configured
54 : pub(crate) storage_controller_https_safekeeper_nodes: measured::Gauge,
55 :
56 : /// Reconciler tasks completed, broken down by success/failure/cancelled
57 : pub(crate) storage_controller_reconcile_complete:
58 : measured::CounterVec<ReconcileCompleteLabelGroupSet>,
59 :
60 : /// Count of how many times we make an optimization change to a tenant's scheduling
61 : pub(crate) storage_controller_schedule_optimization: measured::Counter,
62 :
63 : /// How many shards are not scheduled into their preferred AZ
64 : pub(crate) storage_controller_schedule_az_violation: measured::Gauge,
65 :
66 : /// How many shard locations (secondary or attached) on each node
67 : pub(crate) storage_controller_node_shards: measured::GaugeVec<NodeLabelGroupSet>,
68 :
69 : /// How many _attached_ shard locations on each node
70 : pub(crate) storage_controller_node_attached_shards: measured::GaugeVec<NodeLabelGroupSet>,
71 :
72 : /// How many _home_ shard locations on each node (i.e. the node's AZ matches the shard's
73 : /// preferred AZ)
74 : pub(crate) storage_controller_node_home_shards: measured::GaugeVec<NodeLabelGroupSet>,
75 :
76 : /// How many shards would like to reconcile but were blocked by concurrency limits
77 : pub(crate) storage_controller_pending_reconciles: measured::Gauge,
78 :
79 : /// How many shards are stuck and will be ignored when considering to run optimizations
80 : pub(crate) storage_controller_stuck_reconciles: measured::Gauge,
81 :
82 : /// HTTP request status counters for handled requests
83 : pub(crate) storage_controller_http_request_status:
84 : measured::CounterVec<HttpRequestStatusLabelGroupSet>,
85 :
86 : /// HTTP request handler latency across all status codes
87 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
88 : pub(crate) storage_controller_http_request_latency:
89 : measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
90 :
91 : /// HTTP rate limiting latency across all tenants and endpoints
92 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 10.0))]
93 : pub(crate) storage_controller_http_request_rate_limited: measured::Histogram<10>,
94 :
95 : /// Count of HTTP requests to the pageserver that resulted in an error,
96 : /// broken down by the pageserver node id, request name and method
97 : pub(crate) storage_controller_pageserver_request_error:
98 : measured::CounterVec<PageserverRequestLabelGroupSet>,
99 :
100 : /// Count of HTTP requests to the safekeeper that resulted in an error,
101 : /// broken down by the safekeeper node id, request name and method
102 : pub(crate) storage_controller_safekeeper_request_error:
103 : measured::CounterVec<SafekeeperRequestLabelGroupSet>,
104 :
105 : /// Latency of HTTP requests to the pageserver, broken down by pageserver
106 : /// node id, request name and method. This include both successful and unsuccessful
107 : /// requests.
108 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
109 : pub(crate) storage_controller_pageserver_request_latency:
110 : measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
111 :
112 : /// Latency of HTTP requests to the safekeeper, broken down by safekeeper
113 : /// node id, request name and method. This include both successful and unsuccessful
114 : /// requests.
115 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
116 : pub(crate) storage_controller_safekeeper_request_latency:
117 : measured::HistogramVec<SafekeeperRequestLabelGroupSet, 5>,
118 :
119 : /// Count of pass-through HTTP requests to the pageserver that resulted in an error,
120 : /// broken down by the pageserver node id, request name and method
121 : pub(crate) storage_controller_passthrough_request_error:
122 : measured::CounterVec<PageserverRequestLabelGroupSet>,
123 :
124 : /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
125 : /// node id, request name and method. This include both successful and unsuccessful
126 : /// requests.
127 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
128 : pub(crate) storage_controller_passthrough_request_latency:
129 : measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
130 :
131 : /// Count of errors in database queries, broken down by error type and operation.
132 : pub(crate) storage_controller_database_query_error:
133 : measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
134 :
135 : /// Latency of database queries, broken down by operation.
136 : #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
137 : pub(crate) storage_controller_database_query_latency:
138 : measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
139 :
140 : pub(crate) storage_controller_leadership_status: measured::GaugeVec<LeadershipStatusGroupSet>,
141 :
142 : /// Indicator of stucked (long-running) reconciles, broken down by tenant, shard and sequence.
143 : /// The metric is automatically removed once the reconciliation completes.
144 : pub(crate) storage_controller_reconcile_long_running:
145 : measured::CounterVec<ReconcileLongRunningLabelGroupSet>,
146 :
147 : /// Indicator of safekeeper reconciler queue depth, broken down by safekeeper, excluding ongoing reconciles.
148 : pub(crate) storage_controller_safekeeper_reconciles_queued:
149 : measured::GaugeVec<SafekeeperReconcilerLabelGroupSet>,
150 :
151 : /// Indicator of completed safekeeper reconciles, broken down by safekeeper.
152 : pub(crate) storage_controller_safekeeper_reconciles_complete:
153 : measured::CounterVec<SafekeeperReconcilerLabelGroupSet>,
154 :
155 : /* BEGIN HADRON */
156 : /// Hadron `config_watcher` reconciliation runs completed, broken down by success/failure.
157 : pub(crate) storage_controller_config_watcher_complete:
158 : measured::CounterVec<ConfigWatcherCompleteLabelGroupSet>,
159 :
160 : /// Hadron long waits for node state changes during drain and fill.
161 : pub(crate) storage_controller_drain_and_fill_long_waits: measured::Counter,
162 :
163 : /// Set to 1 if we detect any page server pods with pending node pool rotation annotations.
164 : /// Requires manual reset after oncall investigation.
165 : pub(crate) storage_controller_ps_node_pool_rotation_pending: measured::Gauge,
166 :
167 : /// Hadron storage scrubber status.
168 : pub(crate) storage_controller_storage_scrub_status:
169 : measured::CounterVec<StorageScrubberLabelGroupSet>,
170 :
171 : /// Desired number of pageservers managed by the storage controller
172 : pub(crate) storage_controller_num_pageservers_desired: measured::Gauge,
173 :
174 : /// Desired number of safekeepers managed by the storage controller
175 : pub(crate) storage_controller_num_safekeeper_desired: measured::Gauge,
176 : /* END HADRON */
177 : }
178 :
179 : impl StorageControllerMetrics {
180 0 : pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
181 0 : let mut encoder = self.encoder.lock().unwrap();
182 0 : neon_metrics
183 0 : .collect_group_into(&mut *encoder)
184 0 : .unwrap_or_else(|infallible| match infallible {});
185 0 : self.metrics_group
186 0 : .collect_group_into(&mut *encoder)
187 0 : .unwrap_or_else(|infallible| match infallible {});
188 0 : encoder.finish()
189 0 : }
190 : }
191 :
192 : impl Default for StorageControllerMetrics {
193 17 : fn default() -> Self {
194 17 : let mut metrics_group = StorageControllerMetricGroup::new();
195 17 : metrics_group
196 17 : .storage_controller_reconcile_complete
197 17 : .init_all_dense();
198 :
199 17 : metrics_group
200 17 : .storage_controller_config_watcher_complete
201 17 : .init_all_dense();
202 :
203 17 : Self {
204 17 : metrics_group,
205 17 : encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
206 17 : }
207 17 : }
208 : }
209 :
210 : #[derive(measured::LabelGroup, Clone)]
211 : #[label(set = NodeLabelGroupSet)]
212 : pub(crate) struct NodeLabelGroup<'a> {
213 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
214 : pub(crate) az: &'a str,
215 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
216 : pub(crate) node_id: &'a str,
217 : }
218 :
219 : #[derive(measured::LabelGroup)]
220 : #[label(set = ReconcileCompleteLabelGroupSet)]
221 : pub(crate) struct ReconcileCompleteLabelGroup {
222 : pub(crate) status: ReconcileOutcome,
223 : }
224 :
225 : #[derive(measured::LabelGroup)]
226 : #[label(set = HttpRequestStatusLabelGroupSet)]
227 : pub(crate) struct HttpRequestStatusLabelGroup<'a> {
228 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
229 : pub(crate) path: &'a str,
230 : pub(crate) method: Method,
231 : pub(crate) status: StatusCode,
232 : }
233 :
234 : #[derive(measured::LabelGroup)]
235 : #[label(set = HttpRequestLatencyLabelGroupSet)]
236 : pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
237 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
238 : pub(crate) path: &'a str,
239 : pub(crate) method: Method,
240 : }
241 :
242 : #[derive(measured::LabelGroup, Clone)]
243 : #[label(set = PageserverRequestLabelGroupSet)]
244 : pub(crate) struct PageserverRequestLabelGroup<'a> {
245 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
246 : pub(crate) pageserver_id: &'a str,
247 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
248 : pub(crate) path: &'a str,
249 : pub(crate) method: Method,
250 : }
251 :
252 : #[derive(measured::LabelGroup, Clone)]
253 : #[label(set = SafekeeperRequestLabelGroupSet)]
254 : pub(crate) struct SafekeeperRequestLabelGroup<'a> {
255 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
256 : pub(crate) safekeeper_id: &'a str,
257 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
258 : pub(crate) path: &'a str,
259 : pub(crate) method: Method,
260 : }
261 :
262 : #[derive(measured::LabelGroup)]
263 : #[label(set = DatabaseQueryErrorLabelGroupSet)]
264 : pub(crate) struct DatabaseQueryErrorLabelGroup {
265 : pub(crate) error_type: DatabaseErrorLabel,
266 : pub(crate) operation: DatabaseOperation,
267 : }
268 :
269 : #[derive(measured::LabelGroup)]
270 : #[label(set = DatabaseQueryLatencyLabelGroupSet)]
271 : pub(crate) struct DatabaseQueryLatencyLabelGroup {
272 : pub(crate) operation: DatabaseOperation,
273 : }
274 :
275 : #[derive(measured::LabelGroup)]
276 : #[label(set = LeadershipStatusGroupSet)]
277 : pub(crate) struct LeadershipStatusGroup {
278 : pub(crate) status: LeadershipStatus,
279 : }
280 :
281 : #[derive(measured::LabelGroup, Clone)]
282 : #[label(set = ReconcileLongRunningLabelGroupSet)]
283 : pub(crate) struct ReconcileLongRunningLabelGroup<'a> {
284 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
285 : pub(crate) tenant_id: &'a str,
286 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
287 : pub(crate) shard_number: &'a str,
288 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
289 : pub(crate) sequence: &'a str,
290 : }
291 :
292 : #[derive(measured::LabelGroup, Clone)]
293 : #[label(set = StorageScrubberLabelGroupSet)]
294 : pub(crate) struct StorageScrubberLabelGroup<'a> {
295 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
296 : pub(crate) tenant_id: &'a str,
297 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
298 : pub(crate) shard_number: &'a str,
299 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
300 : pub(crate) timeline_id: &'a str,
301 : pub(crate) outcome: StorageScrubberOutcome,
302 : }
303 :
304 : #[derive(FixedCardinalityLabel, Clone, Copy)]
305 : pub(crate) enum StorageScrubberOutcome {
306 : PSOk,
307 : PSWarning,
308 : PSError,
309 : PSOrphan,
310 : SKOk,
311 : SKError,
312 : }
313 :
314 : #[derive(measured::LabelGroup)]
315 : #[label(set = ConfigWatcherCompleteLabelGroupSet)]
316 : pub(crate) struct ConfigWatcherCompleteLabelGroup {
317 : // Reuse the ReconcileOutcome from the SC's reconciliation metrics.
318 : pub(crate) status: ReconcileOutcome,
319 : }
320 :
321 : #[derive(FixedCardinalityLabel, Clone, Copy)]
322 : pub(crate) enum ReconcileOutcome {
323 : // Successfully reconciled everything.
324 : #[label(rename = "ok")]
325 : Success,
326 : // Used by tenant-shard reconciler only. Reconciled pageserver state successfully,
327 : // but failed to delivery the compute notificiation. This error is typically transient
328 : // but if its occurance keeps increasing, it should be investigated.
329 : #[label(rename = "ok_no_notify")]
330 : SuccessNoNotify,
331 : // We failed to reconcile some state and the reconcilation will be retried.
332 : Error,
333 : // Reconciliation was cancelled.
334 : Cancel,
335 : }
336 :
337 : #[derive(FixedCardinalityLabel, Copy, Clone)]
338 : pub(crate) enum Method {
339 : Get,
340 : Put,
341 : Post,
342 : Delete,
343 : Other,
344 : }
345 :
346 : #[derive(measured::LabelGroup, Clone)]
347 : #[label(set = SafekeeperReconcilerLabelGroupSet)]
348 : pub(crate) struct SafekeeperReconcilerLabelGroup<'a> {
349 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
350 : pub(crate) sk_az: &'a str,
351 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
352 : pub(crate) sk_node_id: &'a str,
353 : #[label(dynamic_with = lasso::ThreadedRodeo, default)]
354 : pub(crate) sk_hostname: &'a str,
355 : }
356 :
357 : impl From<hyper::Method> for Method {
358 0 : fn from(value: hyper::Method) -> Self {
359 0 : if value == hyper::Method::GET {
360 0 : Method::Get
361 0 : } else if value == hyper::Method::PUT {
362 0 : Method::Put
363 0 : } else if value == hyper::Method::POST {
364 0 : Method::Post
365 0 : } else if value == hyper::Method::DELETE {
366 0 : Method::Delete
367 : } else {
368 0 : Method::Other
369 : }
370 0 : }
371 : }
372 :
373 : #[derive(Clone, Copy)]
374 : pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
375 :
376 : impl LabelValue for StatusCode {
377 0 : fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
378 0 : v.write_int(self.0.as_u16() as i64)
379 0 : }
380 : }
381 :
382 : impl FixedCardinalityLabel for StatusCode {
383 0 : fn cardinality() -> usize {
384 0 : (100..1000).len()
385 0 : }
386 :
387 0 : fn encode(&self) -> usize {
388 0 : self.0.as_u16() as usize
389 0 : }
390 :
391 0 : fn decode(value: usize) -> Self {
392 0 : Self(hyper::http::StatusCode::from_u16(u16::try_from(value).unwrap()).unwrap())
393 0 : }
394 : }
395 :
396 : #[derive(FixedCardinalityLabel, Clone, Copy)]
397 : pub(crate) enum DatabaseErrorLabel {
398 : Query,
399 : Connection,
400 : ConnectionPool,
401 : Logical,
402 : Migration,
403 : Cas,
404 : }
405 :
406 : impl DatabaseError {
407 0 : pub(crate) fn error_label(&self) -> DatabaseErrorLabel {
408 0 : match self {
409 0 : Self::Query(_) => DatabaseErrorLabel::Query,
410 0 : Self::Connection(_) => DatabaseErrorLabel::Connection,
411 0 : Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool,
412 0 : Self::Logical(_) => DatabaseErrorLabel::Logical,
413 0 : Self::Migration(_) => DatabaseErrorLabel::Migration,
414 0 : Self::Cas(_) => DatabaseErrorLabel::Cas,
415 : }
416 0 : }
417 : }
418 :
419 : /// Update the leadership status metric gauges to reflect the requested status
420 0 : pub(crate) fn update_leadership_status(status: LeadershipStatus) {
421 0 : let status_metric = &METRICS_REGISTRY
422 0 : .metrics_group
423 0 : .storage_controller_leadership_status;
424 :
425 0 : for s in LeadershipStatus::iter() {
426 0 : if s == status {
427 0 : status_metric.set(LeadershipStatusGroup { status: s }, 1);
428 0 : } else {
429 0 : status_metric.set(LeadershipStatusGroup { status: s }, 0);
430 0 : }
431 : }
432 0 : }
|