LCOV - code coverage report
Current view: top level - compute_tools/src - metrics.rs (source / functions) Coverage Total Hit
Test: 472031e0b71f3195f7f21b1f2b20de09fd07bb56.info Lines: 0.0 % 93 0
Test Date: 2025-05-26 10:37:33 Functions: 0.0 % 12 0

            Line data    Source code
       1              : use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};
       2              : use metrics::proto::MetricFamily;
       3              : use metrics::{
       4              :     IntCounter, IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
       5              :     register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,
       6              : };
       7              : use once_cell::sync::Lazy;
       8              : 
       9            0 : pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
      10            0 :     register_uint_gauge_vec!(
      11            0 :         "compute_installed_extensions",
      12            0 :         "Number of databases where the version of extension is installed",
      13            0 :         &["extension_name", "version", "owned_by_superuser"]
      14            0 :     )
      15            0 :     .expect("failed to define a metric")
      16            0 : });
      17              : 
      18              : // Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,
      19              : // but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.
      20              : // And it's fair to call it a 'RPC' (Remote Procedure Call).
      21              : pub enum CPlaneRequestRPC {
      22              :     GetConfig,
      23              : }
      24              : 
      25              : impl CPlaneRequestRPC {
      26            0 :     pub fn as_str(&self) -> &str {
      27            0 :         match self {
      28            0 :             CPlaneRequestRPC::GetConfig => "GetConfig",
      29            0 :         }
      30            0 :     }
      31              : }
      32              : 
      33              : pub const UNKNOWN_HTTP_STATUS: &str = "unknown";
      34              : 
      35            0 : pub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
      36            0 :     register_int_counter_vec!(
      37            0 :         "compute_ctl_cplane_requests_total",
      38            0 :         "Total number of control plane requests made by compute_ctl by status",
      39            0 :         &["rpc", "http_status"]
      40            0 :     )
      41            0 :     .expect("failed to define a metric")
      42            0 : });
      43              : 
      44              : /// Total number of failed database migrations. Per-compute, this is actually a boolean metric,
      45              : /// either empty or with a single value (1, migration_id) because we stop at the first failure.
      46              : /// Yet, the sum over the fleet will provide the total number of failures.
      47            0 : pub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {
      48            0 :     register_int_counter_vec!(
      49            0 :         "compute_ctl_db_migration_failed_total",
      50            0 :         "Total number of failed database migrations",
      51            0 :         &["migration_id"]
      52            0 :     )
      53            0 :     .expect("failed to define a metric")
      54            0 : });
      55              : 
      56            0 : pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
      57            0 :     register_int_counter_vec!(
      58            0 :         "compute_ctl_remote_ext_requests_total",
      59            0 :         "Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
      60            0 :         &["http_status", "filename"]
      61            0 :     )
      62            0 :     .expect("failed to define a metric")
      63            0 : });
      64              : 
      65              : // Size of audit log directory in bytes
      66            0 : pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
      67            0 :     register_gauge!(
      68            0 :         "compute_audit_log_dir_size",
      69            0 :         "Size of audit log directory in bytes",
      70            0 :     )
      71            0 :     .expect("failed to define a metric")
      72            0 : });
      73              : 
      74              : // Report that `compute_ctl` is up and what's the current compute status.
      75            0 : pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
      76            0 :     register_int_gauge_vec!(
      77            0 :         "compute_ctl_up",
      78            0 :         "Whether compute_ctl is running",
      79            0 :         &["build_tag", "status"]
      80            0 :     )
      81            0 :     .expect("failed to define a metric")
      82            0 : });
      83              : 
      84            0 : pub(crate) static PG_CURR_DOWNTIME_MS: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
      85            0 :     register_gauge!(
      86            0 :         "compute_pg_current_downtime_ms",
      87            0 :         "Non-cumulative duration of Postgres downtime in ms; resets after successful check",
      88            0 :     )
      89            0 :     .expect("failed to define a metric")
      90            0 : });
      91              : 
      92            0 : pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::new(|| {
      93            0 :     register_int_counter!(
      94            0 :         "compute_pg_downtime_ms_total",
      95            0 :         "Cumulative duration of Postgres downtime in ms",
      96            0 :     )
      97            0 :     .expect("failed to define a metric")
      98            0 : });
      99              : 
     100              : /// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm.
     101              : /// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
     102            0 : pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
     103            0 :     register_int_counter!(
     104            0 :         "compute_ctl_lfc_prewarm_requests_total",
     105            0 :         "Total number of LFC prewarm requests made by compute_ctl",
     106            0 :     )
     107            0 :     .expect("failed to define a metric")
     108            0 : });
     109              : 
     110            0 : pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
     111            0 :     register_int_counter!(
     112            0 :         "compute_ctl_lfc_offload_requests_total",
     113            0 :         "Total number of LFC offload requests made by compute_ctl",
     114            0 :     )
     115            0 :     .expect("failed to define a metric")
     116            0 : });
     117              : 
     118            0 : pub fn collect() -> Vec<MetricFamily> {
     119            0 :     let mut metrics = COMPUTE_CTL_UP.collect();
     120            0 :     metrics.extend(INSTALLED_EXTENSIONS.collect());
     121            0 :     metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
     122            0 :     metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
     123            0 :     metrics.extend(DB_MIGRATION_FAILED.collect());
     124            0 :     metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
     125            0 :     metrics.extend(PG_CURR_DOWNTIME_MS.collect());
     126            0 :     metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
     127            0 :     metrics.extend(LFC_PREWARM_REQUESTS.collect());
     128            0 :     metrics.extend(LFC_OFFLOAD_REQUESTS.collect());
     129            0 :     metrics
     130            0 : }
        

Generated by: LCOV version 2.1-beta