Line data Source code
1 : use ::metrics::{
2 : exponential_buckets, register_histogram, register_histogram_vec, register_hll_vec,
3 : register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge,
4 : register_int_gauge_vec, Histogram, HistogramVec, HyperLogLogVec, IntCounterPairVec,
5 : IntCounterVec, IntGauge, IntGaugeVec,
6 : };
7 : use metrics::{
8 : register_hll, register_int_counter, register_int_counter_pair, HyperLogLog, IntCounter,
9 : IntCounterPair,
10 : };
11 :
12 : use once_cell::sync::Lazy;
13 : use tokio::time::{self, Instant};
14 :
15 : use crate::console::messages::ColdStartInfo;
16 :
17 0 : pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
18 0 : register_int_counter_pair_vec!(
19 0 : "proxy_opened_db_connections_total",
20 0 : "Number of opened connections to a database.",
21 0 : "proxy_closed_db_connections_total",
22 0 : "Number of closed connections to a database.",
23 0 : &["protocol"],
24 0 : )
25 0 : .unwrap()
26 0 : });
27 :
28 0 : pub static NUM_CLIENT_CONNECTION_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
29 0 : register_int_counter_pair_vec!(
30 0 : "proxy_opened_client_connections_total",
31 0 : "Number of opened connections from a client.",
32 0 : "proxy_closed_client_connections_total",
33 0 : "Number of closed connections from a client.",
34 0 : &["protocol"],
35 0 : )
36 0 : .unwrap()
37 0 : });
38 :
39 0 : pub static NUM_CONNECTION_REQUESTS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
40 0 : register_int_counter_pair_vec!(
41 0 : "proxy_accepted_connections_total",
42 0 : "Number of client connections accepted.",
43 0 : "proxy_closed_connections_total",
44 0 : "Number of client connections closed.",
45 0 : &["protocol"],
46 0 : )
47 0 : .unwrap()
48 0 : });
49 :
50 68 : pub static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
51 68 : register_histogram_vec!(
52 68 : "proxy_compute_connection_latency_seconds",
53 68 : "Time it took for proxy to establish a connection to the compute endpoint",
54 68 : // http/ws/tcp, true/false, true/false, success/failure, client/client_and_cplane
55 68 : // 3 * 6 * 2 * 2 = 72 counters
56 68 : &["protocol", "cold_start_info", "outcome", "excluded"],
57 68 : // largest bucket = 2^16 * 0.5ms = 32s
58 68 : exponential_buckets(0.0005, 2.0, 16).unwrap(),
59 68 : )
60 68 : .unwrap()
61 68 : });
62 :
63 0 : pub static CONSOLE_REQUEST_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
64 0 : register_histogram_vec!(
65 0 : "proxy_console_request_latency",
66 0 : "Time it took for proxy to establish a connection to the compute endpoint",
67 0 : // proxy_wake_compute/proxy_get_role_info
68 0 : &["request"],
69 0 : // largest bucket = 2^16 * 0.2ms = 13s
70 0 : exponential_buckets(0.0002, 2.0, 16).unwrap(),
71 0 : )
72 0 : .unwrap()
73 0 : });
74 :
75 0 : pub static ALLOWED_IPS_BY_CACHE_OUTCOME: Lazy<IntCounterVec> = Lazy::new(|| {
76 0 : register_int_counter_vec!(
77 0 : "proxy_allowed_ips_cache_misses",
78 0 : "Number of cache hits/misses for allowed ips",
79 0 : // hit/miss
80 0 : &["outcome"],
81 0 : )
82 0 : .unwrap()
83 0 : });
84 :
85 2 : pub static RATE_LIMITER_ACQUIRE_LATENCY: Lazy<Histogram> = Lazy::new(|| {
86 2 : register_histogram!(
87 2 : "proxy_control_plane_token_acquire_seconds",
88 2 : "Time it took for proxy to establish a connection to the compute endpoint",
89 2 : // largest bucket = 3^16 * 0.05ms = 2.15s
90 2 : exponential_buckets(0.00005, 3.0, 16).unwrap(),
91 2 : )
92 2 : .unwrap()
93 2 : });
94 :
95 16 : pub static RATE_LIMITER_LIMIT: Lazy<IntGaugeVec> = Lazy::new(|| {
96 16 : register_int_gauge_vec!(
97 16 : "semaphore_control_plane_limit",
98 16 : "Current limit of the semaphore control plane",
99 16 : &["limit"], // 2 counters
100 16 : )
101 16 : .unwrap()
102 16 : });
103 :
104 20 : pub static NUM_CONNECTION_ACCEPTED_BY_SNI: Lazy<IntCounterVec> = Lazy::new(|| {
105 20 : register_int_counter_vec!(
106 20 : "proxy_accepted_connections_by_sni",
107 20 : "Number of connections (per sni).",
108 20 : &["kind"],
109 20 : )
110 20 : .unwrap()
111 20 : });
112 :
113 0 : pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {
114 0 : register_histogram!(
115 0 : "proxy_allowed_ips_number",
116 0 : "Number of allowed ips",
117 0 : vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0],
118 0 : )
119 0 : .unwrap()
120 0 : });
121 :
122 0 : pub static HTTP_CONTENT_LENGTH: Lazy<HistogramVec> = Lazy::new(|| {
123 0 : register_histogram_vec!(
124 0 : "proxy_http_conn_content_length_bytes",
125 0 : "Number of bytes the HTTP response content consumes",
126 0 : // request/response
127 0 : &["direction"],
128 0 : // smallest bucket = 16 bytes
129 0 : // largest bucket = 4^12 * 16 bytes = 256MB
130 0 : exponential_buckets(16.0, 4.0, 12).unwrap()
131 0 : )
132 0 : .unwrap()
133 0 : });
134 :
135 2 : pub static GC_LATENCY: Lazy<Histogram> = Lazy::new(|| {
136 2 : register_histogram!(
137 2 : "proxy_http_pool_reclaimation_lag_seconds",
138 2 : "Time it takes to reclaim unused connection pools",
139 2 : // 1us -> 65ms
140 2 : exponential_buckets(1e-6, 2.0, 16).unwrap(),
141 2 : )
142 2 : .unwrap()
143 2 : });
144 :
145 2 : pub static ENDPOINT_POOLS: Lazy<IntCounterPair> = Lazy::new(|| {
146 4 : register_int_counter_pair!(
147 4 : "proxy_http_pool_endpoints_registered_total",
148 4 : "Number of endpoints we have registered pools for",
149 4 : "proxy_http_pool_endpoints_unregistered_total",
150 4 : "Number of endpoints we have unregistered pools for",
151 4 : )
152 2 : .unwrap()
153 2 : });
154 :
155 2 : pub static NUM_OPEN_CLIENTS_IN_HTTP_POOL: Lazy<IntGauge> = Lazy::new(|| {
156 2 : register_int_gauge!(
157 2 : "proxy_http_pool_opened_connections",
158 2 : "Number of opened connections to a database.",
159 2 : )
160 2 : .unwrap()
161 2 : });
162 :
163 2 : pub static NUM_CANCELLATION_REQUESTS: Lazy<IntCounterVec> = Lazy::new(|| {
164 2 : register_int_counter_vec!(
165 2 : "proxy_cancellation_requests_total",
166 2 : "Number of cancellation requests (per found/not_found).",
167 2 : &["source", "kind"],
168 2 : )
169 2 : .unwrap()
170 2 : });
171 :
172 : pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_CLIENT: &str = "from_client";
173 : pub const NUM_CANCELLATION_REQUESTS_SOURCE_FROM_REDIS: &str = "from_redis";
174 :
175 : pub enum Waiting {
176 : Cplane,
177 : Client,
178 : Compute,
179 : }
180 :
181 : #[derive(Default)]
182 : struct Accumulated {
183 : cplane: time::Duration,
184 : client: time::Duration,
185 : compute: time::Duration,
186 : }
187 :
188 : enum Outcome {
189 : Success,
190 : Failed,
191 : }
192 :
193 : impl Outcome {
194 140 : fn as_str(&self) -> &'static str {
195 140 : match self {
196 16 : Outcome::Success => "success",
197 124 : Outcome::Failed => "failed",
198 : }
199 140 : }
200 : }
201 :
202 : pub struct LatencyTimer {
203 : // time since the stopwatch was started
204 : start: time::Instant,
205 : // time since the stopwatch was stopped
206 : stop: Option<time::Instant>,
207 : // accumulated time on the stopwatch
208 : accumulated: Accumulated,
209 : // label data
210 : protocol: &'static str,
211 : cold_start_info: ColdStartInfo,
212 : outcome: Outcome,
213 : }
214 :
215 : pub struct LatencyTimerPause<'a> {
216 : timer: &'a mut LatencyTimer,
217 : start: time::Instant,
218 : waiting_for: Waiting,
219 : }
220 :
221 : impl LatencyTimer {
222 70 : pub fn new(protocol: &'static str) -> Self {
223 70 : Self {
224 70 : start: time::Instant::now(),
225 70 : stop: None,
226 70 : accumulated: Accumulated::default(),
227 70 : protocol,
228 70 : cold_start_info: ColdStartInfo::Unknown,
229 70 : // assume failed unless otherwise specified
230 70 : outcome: Outcome::Failed,
231 70 : }
232 70 : }
233 :
234 30 : pub fn pause(&mut self, waiting_for: Waiting) -> LatencyTimerPause<'_> {
235 30 : LatencyTimerPause {
236 30 : timer: self,
237 30 : start: Instant::now(),
238 30 : waiting_for,
239 30 : }
240 30 : }
241 :
242 0 : pub fn cold_start_info(&mut self, cold_start_info: ColdStartInfo) {
243 0 : self.cold_start_info = cold_start_info;
244 0 : }
245 :
246 8 : pub fn success(&mut self) {
247 8 : // stop the stopwatch and record the time that we have accumulated
248 8 : self.stop = Some(time::Instant::now());
249 8 :
250 8 : // success
251 8 : self.outcome = Outcome::Success;
252 8 : }
253 : }
254 :
255 : impl Drop for LatencyTimerPause<'_> {
256 30 : fn drop(&mut self) {
257 30 : let dur = self.start.elapsed();
258 30 : match self.waiting_for {
259 0 : Waiting::Cplane => self.timer.accumulated.cplane += dur,
260 30 : Waiting::Client => self.timer.accumulated.client += dur,
261 0 : Waiting::Compute => self.timer.accumulated.compute += dur,
262 : }
263 30 : }
264 : }
265 :
266 : impl Drop for LatencyTimer {
267 70 : fn drop(&mut self) {
268 70 : let duration = self
269 70 : .stop
270 70 : .unwrap_or_else(time::Instant::now)
271 70 : .duration_since(self.start);
272 70 : // Excluding cplane communication from the accumulated time.
273 70 : COMPUTE_CONNECTION_LATENCY
274 70 : .with_label_values(&[
275 70 : self.protocol,
276 70 : self.cold_start_info.as_str(),
277 70 : self.outcome.as_str(),
278 70 : "client",
279 70 : ])
280 70 : .observe((duration.saturating_sub(self.accumulated.client)).as_secs_f64());
281 70 : // Exclude client and cplane communication from the accumulated time.
282 70 : let accumulated_total = self.accumulated.client + self.accumulated.cplane;
283 70 : COMPUTE_CONNECTION_LATENCY
284 70 : .with_label_values(&[
285 70 : self.protocol,
286 70 : self.cold_start_info.as_str(),
287 70 : self.outcome.as_str(),
288 70 : "client_and_cplane",
289 70 : ])
290 70 : .observe((duration.saturating_sub(accumulated_total)).as_secs_f64());
291 70 : }
292 : }
293 :
294 8 : pub static NUM_CONNECTION_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
295 8 : register_int_counter_vec!(
296 8 : "proxy_connection_failures_total",
297 8 : "Number of connection failures (per kind).",
298 8 : &["kind"],
299 8 : )
300 8 : .unwrap()
301 8 : });
302 :
303 4 : pub static NUM_WAKEUP_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
304 4 : register_int_counter_vec!(
305 4 : "proxy_connection_failures_breakdown",
306 4 : "Number of wake-up failures (per kind).",
307 4 : &["retry", "kind"],
308 4 : )
309 4 : .unwrap()
310 4 : });
311 :
312 0 : pub static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
313 0 : register_int_counter_vec!(
314 0 : "proxy_io_bytes",
315 0 : "Number of bytes sent/received between all clients and backends.",
316 0 : &["direction"],
317 0 : )
318 0 : .unwrap()
319 0 : });
320 :
321 6 : pub const fn bool_to_str(x: bool) -> &'static str {
322 6 : if x {
323 4 : "true"
324 : } else {
325 2 : "false"
326 : }
327 6 : }
328 :
329 14 : pub static CONNECTING_ENDPOINTS: Lazy<HyperLogLogVec<32>> = Lazy::new(|| {
330 14 : register_hll_vec!(
331 14 : 32,
332 14 : "proxy_connecting_endpoints",
333 14 : "HLL approximate cardinality of endpoints that are connecting",
334 14 : &["protocol"],
335 14 : )
336 14 : .unwrap()
337 14 : });
338 :
339 0 : pub static ERROR_BY_KIND: Lazy<IntCounterVec> = Lazy::new(|| {
340 0 : register_int_counter_vec!(
341 0 : "proxy_errors_total",
342 0 : "Number of errors by a given classification",
343 0 : &["type"],
344 0 : )
345 0 : .unwrap()
346 0 : });
347 :
348 0 : pub static ENDPOINT_ERRORS_BY_KIND: Lazy<HyperLogLogVec<32>> = Lazy::new(|| {
349 0 : register_hll_vec!(
350 0 : 32,
351 0 : "proxy_endpoints_affected_by_errors",
352 0 : "Number of endpoints affected by errors of a given classification",
353 0 : &["type"],
354 0 : )
355 0 : .unwrap()
356 0 : });
357 :
358 0 : pub static REDIS_BROKEN_MESSAGES: Lazy<IntCounterVec> = Lazy::new(|| {
359 0 : register_int_counter_vec!(
360 0 : "proxy_redis_errors_total",
361 0 : "Number of errors by a given classification",
362 0 : &["channel"],
363 0 : )
364 0 : .unwrap()
365 0 : });
366 :
367 0 : pub static TLS_HANDSHAKE_FAILURES: Lazy<IntCounter> = Lazy::new(|| {
368 0 : register_int_counter!(
369 0 : "proxy_tls_handshake_failures",
370 0 : "Number of TLS handshake failures",
371 0 : )
372 0 : .unwrap()
373 0 : });
374 :
375 0 : pub static ENDPOINTS_AUTH_RATE_LIMITED: Lazy<HyperLogLog<32>> = Lazy::new(|| {
376 0 : register_hll!(
377 0 : 32,
378 0 : "proxy_endpoints_auth_rate_limits",
379 0 : "Number of endpoints affected by authentication rate limits",
380 0 : )
381 0 : .unwrap()
382 0 : });
383 :
384 0 : pub static AUTH_RATE_LIMIT_HITS: Lazy<IntCounter> = Lazy::new(|| {
385 0 : register_int_counter!(
386 0 : "proxy_requests_auth_rate_limits_total",
387 0 : "Number of connection requests affected by authentication rate limits",
388 0 : )
389 0 : .unwrap()
390 0 : });
|