Line data Source code
1 : use metrics::core::{AtomicF64, Collector, GenericGauge};
2 : use metrics::proto::MetricFamily;
3 : use metrics::{
4 : IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter_vec,
5 : register_int_gauge_vec, register_uint_gauge_vec,
6 : };
7 : use once_cell::sync::Lazy;
8 :
9 0 : pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
10 0 : register_uint_gauge_vec!(
11 0 : "compute_installed_extensions",
12 0 : "Number of databases where the version of extension is installed",
13 0 : &["extension_name", "version", "owned_by_superuser"]
14 0 : )
15 0 : .expect("failed to define a metric")
16 0 : });
17 :
18 : // Normally, any HTTP API request is described by METHOD (e.g. GET, POST, etc.) + PATH,
19 : // but for all our APIs we defined a 'slug'/method/operationId in the OpenAPI spec.
20 : // And it's fair to call it a 'RPC' (Remote Procedure Call).
21 : pub enum CPlaneRequestRPC {
22 : GetSpec,
23 : }
24 :
25 : impl CPlaneRequestRPC {
26 0 : pub fn as_str(&self) -> &str {
27 0 : match self {
28 0 : CPlaneRequestRPC::GetSpec => "GetSpec",
29 0 : }
30 0 : }
31 : }
32 :
33 : pub const UNKNOWN_HTTP_STATUS: &str = "unknown";
34 :
35 0 : pub(crate) static CPLANE_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
36 0 : register_int_counter_vec!(
37 0 : "compute_ctl_cplane_requests_total",
38 0 : "Total number of control plane requests made by compute_ctl by status",
39 0 : &["rpc", "http_status"]
40 0 : )
41 0 : .expect("failed to define a metric")
42 0 : });
43 :
44 : /// Total number of failed database migrations. Per-compute, this is actually a boolean metric,
45 : /// either empty or with a single value (1, migration_id) because we stop at the first failure.
46 : /// Yet, the sum over the fleet will provide the total number of failures.
47 0 : pub(crate) static DB_MIGRATION_FAILED: Lazy<IntCounterVec> = Lazy::new(|| {
48 0 : register_int_counter_vec!(
49 0 : "compute_ctl_db_migration_failed_total",
50 0 : "Total number of failed database migrations",
51 0 : &["migration_id"]
52 0 : )
53 0 : .expect("failed to define a metric")
54 0 : });
55 :
56 0 : pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy<IntCounterVec> = Lazy::new(|| {
57 0 : register_int_counter_vec!(
58 0 : "compute_ctl_remote_ext_requests_total",
59 0 : "Total number of requests made by compute_ctl to download extensions from S3 proxy by status",
60 0 : &["http_status", "filename"]
61 0 : )
62 0 : .expect("failed to define a metric")
63 0 : });
64 :
65 : // Size of audit log directory in bytes
66 0 : pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy<GenericGauge<AtomicF64>> = Lazy::new(|| {
67 0 : register_gauge!(
68 0 : "compute_audit_log_dir_size",
69 0 : "Size of audit log directory in bytes",
70 0 : )
71 0 : .expect("failed to define a metric")
72 0 : });
73 :
74 : // Report that `compute_ctl` is up and what's the current compute status.
75 0 : pub(crate) static COMPUTE_CTL_UP: Lazy<IntGaugeVec> = Lazy::new(|| {
76 0 : register_int_gauge_vec!(
77 0 : "compute_ctl_up",
78 0 : "Whether compute_ctl is running",
79 0 : &["build_tag", "status"]
80 0 : )
81 0 : .expect("failed to define a metric")
82 0 : });
83 :
84 0 : pub fn collect() -> Vec<MetricFamily> {
85 0 : let mut metrics = COMPUTE_CTL_UP.collect();
86 0 : metrics.extend(INSTALLED_EXTENSIONS.collect());
87 0 : metrics.extend(CPLANE_REQUESTS_TOTAL.collect());
88 0 : metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect());
89 0 : metrics.extend(DB_MIGRATION_FAILED.collect());
90 0 : metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
91 0 : metrics
92 0 : }
|