LCOV - code coverage report
Current view: top level - storage_controller/src - metrics.rs (source / functions) Coverage Total Hit
Test: 42f947419473a288706e86ecdf7c2863d760d5d7.info Lines: 30.6 % 62 19
Test Date: 2024-08-02 21:34:27 Functions: 72.4 % 29 21

            Line data    Source code
       1              : //!
       2              : //! This module provides metric definitions for the storage controller.
       3              : //!
       4              : //! All metrics are grouped in [`StorageControllerMetricGroup`]. [`StorageControllerMetrics`] holds
       5              : //! the mentioned metrics and their encoder. It's globally available via the [`METRICS_REGISTRY`]
       6              : //! constant.
       7              : //!
       8              : //! The rest of the code defines label group types and deals with converting outer types to labels.
       9              : //!
      10              : use bytes::Bytes;
      11              : use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
      12              : use metrics::NeonMetrics;
      13              : use once_cell::sync::Lazy;
      14              : use std::sync::Mutex;
      15              : 
      16              : use crate::{
      17              :     persistence::{DatabaseError, DatabaseOperation},
      18              :     service::LeadershipStatus,
      19              : };
      20              : 
      21              : pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
      22              :     Lazy::new(StorageControllerMetrics::default);
      23              : 
      24            0 : pub fn preinitialize_metrics() {
      25            0 :     Lazy::force(&METRICS_REGISTRY);
      26            0 : }
      27              : 
      28              : pub(crate) struct StorageControllerMetrics {
      29              :     pub(crate) metrics_group: StorageControllerMetricGroup,
      30              :     encoder: Mutex<measured::text::BufferedTextEncoder>,
      31              : }
      32              : 
      33            6 : #[derive(measured::MetricGroup)]
      34              : #[metric(new())]
      35              : pub(crate) struct StorageControllerMetricGroup {
      36              :     /// Count of how many times we spawn a reconcile task
      37              :     pub(crate) storage_controller_reconcile_spawn: measured::Counter,
      38              : 
      39              :     /// Reconciler tasks completed, broken down by success/failure/cancelled
      40              :     pub(crate) storage_controller_reconcile_complete:
      41              :         measured::CounterVec<ReconcileCompleteLabelGroupSet>,
      42              : 
      43              :     /// Count of how many times we make an optimization change to a tenant's scheduling
      44              :     pub(crate) storage_controller_schedule_optimization: measured::Counter,
      45              : 
      46              :     /// HTTP request status counters for handled requests
      47              :     pub(crate) storage_controller_http_request_status:
      48              :         measured::CounterVec<HttpRequestStatusLabelGroupSet>,
      49              : 
      50              :     /// HTTP request handler latency across all status codes
      51              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      52              :     pub(crate) storage_controller_http_request_latency:
      53              :         measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
      54              : 
      55              :     /// Count of HTTP requests to the pageserver that resulted in an error,
      56              :     /// broken down by the pageserver node id, request name and method
      57              :     pub(crate) storage_controller_pageserver_request_error:
      58              :         measured::CounterVec<PageserverRequestLabelGroupSet>,
      59              : 
      60              :     /// Latency of HTTP requests to the pageserver, broken down by pageserver
      61              :     /// node id, request name and method. This include both successful and unsuccessful
      62              :     /// requests.
      63              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      64              :     pub(crate) storage_controller_pageserver_request_latency:
      65              :         measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
      66              : 
      67              :     /// Count of pass-through HTTP requests to the pageserver that resulted in an error,
      68              :     /// broken down by the pageserver node id, request name and method
      69              :     pub(crate) storage_controller_passthrough_request_error:
      70              :         measured::CounterVec<PageserverRequestLabelGroupSet>,
      71              : 
      72              :     /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
      73              :     /// node id, request name and method. This include both successful and unsuccessful
      74              :     /// requests.
      75              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      76              :     pub(crate) storage_controller_passthrough_request_latency:
      77              :         measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
      78              : 
      79              :     /// Count of errors in database queries, broken down by error type and operation.
      80              :     pub(crate) storage_controller_database_query_error:
      81              :         measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
      82              : 
      83              :     /// Latency of database queries, broken down by operation.
      84              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      85              :     pub(crate) storage_controller_database_query_latency:
      86              :         measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
      87              : 
      88              :     pub(crate) storage_controller_leadership_status: measured::GaugeVec<LeadershipStatusGroupSet>,
      89              : }
      90              : 
      91              : impl StorageControllerMetrics {
      92            0 :     pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
      93            0 :         let mut encoder = self.encoder.lock().unwrap();
      94            0 :         neon_metrics
      95            0 :             .collect_group_into(&mut *encoder)
      96            0 :             .unwrap_or_else(|infallible| match infallible {});
      97            0 :         self.metrics_group
      98            0 :             .collect_group_into(&mut *encoder)
      99            0 :             .unwrap_or_else(|infallible| match infallible {});
     100            0 :         encoder.finish()
     101            0 :     }
     102              : }
     103              : 
     104              : impl Default for StorageControllerMetrics {
     105            6 :     fn default() -> Self {
     106            6 :         let mut metrics_group = StorageControllerMetricGroup::new();
     107            6 :         metrics_group
     108            6 :             .storage_controller_reconcile_complete
     109            6 :             .init_all_dense();
     110            6 : 
     111            6 :         Self {
     112            6 :             metrics_group,
     113            6 :             encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
     114            6 :         }
     115            6 :     }
     116              : }
     117              : 
     118           18 : #[derive(measured::LabelGroup)]
     119              : #[label(set = ReconcileCompleteLabelGroupSet)]
     120              : pub(crate) struct ReconcileCompleteLabelGroup {
     121              :     pub(crate) status: ReconcileOutcome,
     122              : }
     123              : 
     124           12 : #[derive(measured::LabelGroup)]
     125              : #[label(set = HttpRequestStatusLabelGroupSet)]
     126              : pub(crate) struct HttpRequestStatusLabelGroup<'a> {
     127              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     128              :     pub(crate) path: &'a str,
     129              :     pub(crate) method: Method,
     130              :     pub(crate) status: StatusCode,
     131              : }
     132              : 
     133           12 : #[derive(measured::LabelGroup)]
     134              : #[label(set = HttpRequestLatencyLabelGroupSet)]
     135              : pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
     136              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     137              :     pub(crate) path: &'a str,
     138              :     pub(crate) method: Method,
     139              : }
     140              : 
     141           48 : #[derive(measured::LabelGroup, Clone)]
     142              : #[label(set = PageserverRequestLabelGroupSet)]
     143              : pub(crate) struct PageserverRequestLabelGroup<'a> {
     144              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     145              :     pub(crate) pageserver_id: &'a str,
     146              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     147              :     pub(crate) path: &'a str,
     148              :     pub(crate) method: Method,
     149              : }
     150              : 
     151           24 : #[derive(measured::LabelGroup)]
     152              : #[label(set = DatabaseQueryErrorLabelGroupSet)]
     153              : pub(crate) struct DatabaseQueryErrorLabelGroup {
     154              :     pub(crate) error_type: DatabaseErrorLabel,
     155              :     pub(crate) operation: DatabaseOperation,
     156              : }
     157              : 
     158           18 : #[derive(measured::LabelGroup)]
     159              : #[label(set = DatabaseQueryLatencyLabelGroupSet)]
     160              : pub(crate) struct DatabaseQueryLatencyLabelGroup {
     161              :     pub(crate) operation: DatabaseOperation,
     162              : }
     163              : 
     164           18 : #[derive(measured::LabelGroup)]
     165              : #[label(set = LeadershipStatusGroupSet)]
     166              : pub(crate) struct LeadershipStatusGroup {
     167              :     pub(crate) status: LeadershipStatus,
     168              : }
     169              : 
     170              : #[derive(FixedCardinalityLabel, Clone, Copy)]
     171              : pub(crate) enum ReconcileOutcome {
     172              :     #[label(rename = "ok")]
     173              :     Success,
     174              :     Error,
     175              :     Cancel,
     176              : }
     177              : 
     178              : #[derive(FixedCardinalityLabel, Copy, Clone)]
     179              : pub(crate) enum Method {
     180              :     Get,
     181              :     Put,
     182              :     Post,
     183              :     Delete,
     184              :     Other,
     185              : }
     186              : 
     187              : impl From<hyper::Method> for Method {
     188            0 :     fn from(value: hyper::Method) -> Self {
     189            0 :         if value == hyper::Method::GET {
     190            0 :             Method::Get
     191            0 :         } else if value == hyper::Method::PUT {
     192            0 :             Method::Put
     193            0 :         } else if value == hyper::Method::POST {
     194            0 :             Method::Post
     195            0 :         } else if value == hyper::Method::DELETE {
     196            0 :             Method::Delete
     197              :         } else {
     198            0 :             Method::Other
     199              :         }
     200            0 :     }
     201              : }
     202              : 
     203              : #[derive(Clone, Copy)]
     204              : pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
     205              : 
     206              : impl LabelValue for StatusCode {
     207            0 :     fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
     208            0 :         v.write_int(self.0.as_u16() as i64)
     209            0 :     }
     210              : }
     211              : 
     212              : impl FixedCardinalityLabel for StatusCode {
     213            0 :     fn cardinality() -> usize {
     214            0 :         (100..1000).len()
     215            0 :     }
     216              : 
     217            0 :     fn encode(&self) -> usize {
     218            0 :         self.0.as_u16() as usize
     219            0 :     }
     220              : 
     221            0 :     fn decode(value: usize) -> Self {
     222            0 :         Self(hyper::http::StatusCode::from_u16(u16::try_from(value).unwrap()).unwrap())
     223            0 :     }
     224              : }
     225              : 
     226              : #[derive(FixedCardinalityLabel, Clone, Copy)]
     227              : pub(crate) enum DatabaseErrorLabel {
     228              :     Query,
     229              :     Connection,
     230              :     ConnectionPool,
     231              :     Logical,
     232              : }
     233              : 
     234              : impl DatabaseError {
     235            0 :     pub(crate) fn error_label(&self) -> DatabaseErrorLabel {
     236            0 :         match self {
     237            0 :             Self::Query(_) => DatabaseErrorLabel::Query,
     238            0 :             Self::Connection(_) => DatabaseErrorLabel::Connection,
     239            0 :             Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool,
     240            0 :             Self::Logical(_) => DatabaseErrorLabel::Logical,
     241              :         }
     242            0 :     }
     243              : }
        

Generated by: LCOV version 2.1-beta