LCOV - code coverage report
Current view: top level - storage_controller/src - metrics.rs (source / functions) Coverage Total Hit
Test: 050dd70dd490b28fffe527eae9fb8a1222b5c59c.info Lines: 29.5 % 61 18
Test Date: 2024-06-25 21:28:46 Functions: 69.2 % 26 18

            Line data    Source code
       1              : //!
       2              : //! This module provides metric definitions for the storage controller.
       3              : //!
       4              : //! All metrics are grouped in [`StorageControllerMetricGroup`]. [`StorageControllerMetrics`] holds
       5              : //! the mentioned metrics and their encoder. It's globally available via the [`METRICS_REGISTRY`]
       6              : //! constant.
       7              : //!
       8              : //! The rest of the code defines label group types and deals with converting outer types to labels.
       9              : //!
      10              : use bytes::Bytes;
      11              : use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
      12              : use metrics::NeonMetrics;
      13              : use once_cell::sync::Lazy;
      14              : use std::sync::Mutex;
      15              : 
      16              : use crate::persistence::{DatabaseError, DatabaseOperation};
      17              : 
      18              : pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
      19              :     Lazy::new(StorageControllerMetrics::default);
      20              : 
      21            0 : pub fn preinitialize_metrics() {
      22            0 :     Lazy::force(&METRICS_REGISTRY);
      23            0 : }
      24              : 
      25              : pub(crate) struct StorageControllerMetrics {
      26              :     pub(crate) metrics_group: StorageControllerMetricGroup,
      27              :     encoder: Mutex<measured::text::BufferedTextEncoder>,
      28              : }
      29              : 
      30            6 : #[derive(measured::MetricGroup)]
      31              : #[metric(new())]
      32              : pub(crate) struct StorageControllerMetricGroup {
      33              :     /// Count of how many times we spawn a reconcile task
      34              :     pub(crate) storage_controller_reconcile_spawn: measured::Counter,
      35              : 
      36              :     /// Reconciler tasks completed, broken down by success/failure/cancelled
      37              :     pub(crate) storage_controller_reconcile_complete:
      38              :         measured::CounterVec<ReconcileCompleteLabelGroupSet>,
      39              : 
      40              :     /// Count of how many times we make an optimization change to a tenant's scheduling
      41              :     pub(crate) storage_controller_schedule_optimization: measured::Counter,
      42              : 
      43              :     /// HTTP request status counters for handled requests
      44              :     pub(crate) storage_controller_http_request_status:
      45              :         measured::CounterVec<HttpRequestStatusLabelGroupSet>,
      46              : 
      47              :     /// HTTP request handler latency across all status codes
      48              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      49              :     pub(crate) storage_controller_http_request_latency:
      50              :         measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
      51              : 
      52              :     /// Count of HTTP requests to the pageserver that resulted in an error,
      53              :     /// broken down by the pageserver node id, request name and method
      54              :     pub(crate) storage_controller_pageserver_request_error:
      55              :         measured::CounterVec<PageserverRequestLabelGroupSet>,
      56              : 
      57              :     /// Latency of HTTP requests to the pageserver, broken down by pageserver
      58              :     /// node id, request name and method. This include both successful and unsuccessful
      59              :     /// requests.
      60              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      61              :     pub(crate) storage_controller_pageserver_request_latency:
      62              :         measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
      63              : 
      64              :     /// Count of pass-through HTTP requests to the pageserver that resulted in an error,
      65              :     /// broken down by the pageserver node id, request name and method
      66              :     pub(crate) storage_controller_passthrough_request_error:
      67              :         measured::CounterVec<PageserverRequestLabelGroupSet>,
      68              : 
      69              :     /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
      70              :     /// node id, request name and method. This include both successful and unsuccessful
      71              :     /// requests.
      72              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      73              :     pub(crate) storage_controller_passthrough_request_latency:
      74              :         measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
      75              : 
      76              :     /// Count of errors in database queries, broken down by error type and operation.
      77              :     pub(crate) storage_controller_database_query_error:
      78              :         measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
      79              : 
      80              :     /// Latency of database queries, broken down by operation.
      81              :     #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
      82              :     pub(crate) storage_controller_database_query_latency:
      83              :         measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
      84              : }
      85              : 
      86              : impl StorageControllerMetrics {
      87            0 :     pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
      88            0 :         let mut encoder = self.encoder.lock().unwrap();
      89            0 :         neon_metrics
      90            0 :             .collect_group_into(&mut *encoder)
      91            0 :             .unwrap_or_else(|infallible| match infallible {});
      92            0 :         self.metrics_group
      93            0 :             .collect_group_into(&mut *encoder)
      94            0 :             .unwrap_or_else(|infallible| match infallible {});
      95            0 :         encoder.finish()
      96            0 :     }
      97              : }
      98              : 
      99              : impl Default for StorageControllerMetrics {
     100            6 :     fn default() -> Self {
     101            6 :         let mut metrics_group = StorageControllerMetricGroup::new();
     102            6 :         metrics_group
     103            6 :             .storage_controller_reconcile_complete
     104            6 :             .init_all_dense();
     105            6 : 
     106            6 :         Self {
     107            6 :             metrics_group,
     108            6 :             encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
     109            6 :         }
     110            6 :     }
     111              : }
     112              : 
     113           18 : #[derive(measured::LabelGroup)]
     114              : #[label(set = ReconcileCompleteLabelGroupSet)]
     115              : pub(crate) struct ReconcileCompleteLabelGroup {
     116              :     pub(crate) status: ReconcileOutcome,
     117              : }
     118              : 
     119           12 : #[derive(measured::LabelGroup)]
     120              : #[label(set = HttpRequestStatusLabelGroupSet)]
     121              : pub(crate) struct HttpRequestStatusLabelGroup<'a> {
     122              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     123              :     pub(crate) path: &'a str,
     124              :     pub(crate) method: Method,
     125              :     pub(crate) status: StatusCode,
     126              : }
     127              : 
     128           12 : #[derive(measured::LabelGroup)]
     129              : #[label(set = HttpRequestLatencyLabelGroupSet)]
     130              : pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
     131              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     132              :     pub(crate) path: &'a str,
     133              :     pub(crate) method: Method,
     134              : }
     135              : 
     136           48 : #[derive(measured::LabelGroup, Clone)]
     137              : #[label(set = PageserverRequestLabelGroupSet)]
     138              : pub(crate) struct PageserverRequestLabelGroup<'a> {
     139              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     140              :     pub(crate) pageserver_id: &'a str,
     141              :     #[label(dynamic_with = lasso::ThreadedRodeo, default)]
     142              :     pub(crate) path: &'a str,
     143              :     pub(crate) method: Method,
     144              : }
     145              : 
     146           24 : #[derive(measured::LabelGroup)]
     147              : #[label(set = DatabaseQueryErrorLabelGroupSet)]
     148              : pub(crate) struct DatabaseQueryErrorLabelGroup {
     149              :     pub(crate) error_type: DatabaseErrorLabel,
     150              :     pub(crate) operation: DatabaseOperation,
     151              : }
     152              : 
     153           18 : #[derive(measured::LabelGroup)]
     154              : #[label(set = DatabaseQueryLatencyLabelGroupSet)]
     155              : pub(crate) struct DatabaseQueryLatencyLabelGroup {
     156              :     pub(crate) operation: DatabaseOperation,
     157              : }
     158              : 
     159              : #[derive(FixedCardinalityLabel, Clone, Copy)]
     160              : pub(crate) enum ReconcileOutcome {
     161              :     #[label(rename = "ok")]
     162              :     Success,
     163              :     Error,
     164              :     Cancel,
     165              : }
     166              : 
     167              : #[derive(FixedCardinalityLabel, Copy, Clone)]
     168              : pub(crate) enum Method {
     169              :     Get,
     170              :     Put,
     171              :     Post,
     172              :     Delete,
     173              :     Other,
     174              : }
     175              : 
     176              : impl From<hyper::Method> for Method {
     177            0 :     fn from(value: hyper::Method) -> Self {
     178            0 :         if value == hyper::Method::GET {
     179            0 :             Method::Get
     180            0 :         } else if value == hyper::Method::PUT {
     181            0 :             Method::Put
     182            0 :         } else if value == hyper::Method::POST {
     183            0 :             Method::Post
     184            0 :         } else if value == hyper::Method::DELETE {
     185            0 :             Method::Delete
     186              :         } else {
     187            0 :             Method::Other
     188              :         }
     189            0 :     }
     190              : }
     191              : 
     192              : #[derive(Clone, Copy)]
     193              : pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
     194              : 
     195              : impl LabelValue for StatusCode {
     196            0 :     fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
     197            0 :         v.write_int(self.0.as_u16() as i64)
     198            0 :     }
     199              : }
     200              : 
     201              : impl FixedCardinalityLabel for StatusCode {
     202            0 :     fn cardinality() -> usize {
     203            0 :         (100..1000).len()
     204            0 :     }
     205              : 
     206            0 :     fn encode(&self) -> usize {
     207            0 :         self.0.as_u16() as usize
     208            0 :     }
     209              : 
     210            0 :     fn decode(value: usize) -> Self {
     211            0 :         Self(hyper::http::StatusCode::from_u16(u16::try_from(value).unwrap()).unwrap())
     212            0 :     }
     213              : }
     214              : 
     215              : #[derive(FixedCardinalityLabel, Clone, Copy)]
     216              : pub(crate) enum DatabaseErrorLabel {
     217              :     Query,
     218              :     Connection,
     219              :     ConnectionPool,
     220              :     Logical,
     221              : }
     222              : 
     223              : impl DatabaseError {
     224            0 :     pub(crate) fn error_label(&self) -> DatabaseErrorLabel {
     225            0 :         match self {
     226            0 :             Self::Query(_) => DatabaseErrorLabel::Query,
     227            0 :             Self::Connection(_) => DatabaseErrorLabel::Connection,
     228            0 :             Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool,
     229            0 :             Self::Logical(_) => DatabaseErrorLabel::Logical,
     230              :         }
     231            0 :     }
     232              : }
        

Generated by: LCOV version 2.1-beta