Line data Source code
1 : use std::time::Duration;
2 :
3 : use pageserver_api::models::detach_ancestor::AncestorDetached;
4 : use pageserver_api::models::{
5 : DetachBehavior, LocationConfig, LocationConfigListResponse, LsnLease, PageserverUtilization,
6 : SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest,
7 : TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest,
8 : TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
9 : };
10 : use pageserver_api::shard::TenantShardId;
11 : use pageserver_client::BlockUnblock;
12 : use pageserver_client::mgmt_api::{Client, Result};
13 : use reqwest::StatusCode;
14 : use utils::id::{NodeId, TenantId, TimelineId};
15 : use utils::lsn::Lsn;
16 :
17 : use crate::hadron_utils::TenantShardSizeMap;
18 :
19 : /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
20 : /// controller to collect metrics in a non-intrusive manner.
21 : #[derive(Debug, Clone)]
22 : pub(crate) struct PageserverClient {
23 : inner: Client,
24 : node_id_label: String,
25 : }
26 :
27 : macro_rules! measured_request {
28 : ($name:literal, $method:expr, $node_id: expr, $invoke:expr) => {{
29 : let labels = crate::metrics::PageserverRequestLabelGroup {
30 : pageserver_id: $node_id,
31 : path: $name,
32 : method: $method,
33 : };
34 :
35 : let latency = &crate::metrics::METRICS_REGISTRY
36 : .metrics_group
37 : .storage_controller_pageserver_request_latency;
38 : let _timer_guard = latency.start_timer(labels.clone());
39 :
40 : let res = $invoke;
41 :
42 : if res.is_err() {
43 : let error_counters = &crate::metrics::METRICS_REGISTRY
44 : .metrics_group
45 : .storage_controller_pageserver_request_error;
46 : error_counters.inc(labels)
47 : }
48 :
49 : res
50 : }};
51 : }
52 :
53 : impl PageserverClient {
54 0 : pub(crate) fn new(
55 0 : node_id: NodeId,
56 0 : raw_client: reqwest::Client,
57 0 : mgmt_api_endpoint: String,
58 0 : jwt: Option<&str>,
59 0 : ) -> Self {
60 0 : Self {
61 0 : inner: Client::new(raw_client, mgmt_api_endpoint, jwt),
62 0 : node_id_label: node_id.0.to_string(),
63 0 : }
64 0 : }
65 :
66 0 : pub(crate) async fn tenant_delete(&self, tenant_shard_id: TenantShardId) -> Result<StatusCode> {
67 0 : measured_request!(
68 0 : "tenant",
69 0 : crate::metrics::Method::Delete,
70 0 : &self.node_id_label,
71 0 : self.inner.tenant_delete(tenant_shard_id).await
72 : )
73 0 : }
74 :
75 0 : pub(crate) async fn tenant_time_travel_remote_storage(
76 0 : &self,
77 0 : tenant_shard_id: TenantShardId,
78 0 : timestamp: &str,
79 0 : done_if_after: &str,
80 0 : ) -> Result<()> {
81 0 : measured_request!(
82 0 : "tenant_time_travel_remote_storage",
83 0 : crate::metrics::Method::Put,
84 0 : &self.node_id_label,
85 0 : self.inner
86 0 : .tenant_time_travel_remote_storage(tenant_shard_id, timestamp, done_if_after)
87 0 : .await
88 : )
89 0 : }
90 :
91 : #[expect(dead_code)]
92 0 : pub(crate) async fn tenant_timeline_compact(
93 0 : &self,
94 0 : tenant_shard_id: TenantShardId,
95 0 : timeline_id: TimelineId,
96 0 : force_image_layer_creation: bool,
97 0 : wait_until_done: bool,
98 0 : ) -> Result<()> {
99 0 : measured_request!(
100 0 : "tenant_timeline_compact",
101 0 : crate::metrics::Method::Put,
102 0 : &self.node_id_label,
103 0 : self.inner
104 0 : .tenant_timeline_compact(
105 0 : tenant_shard_id,
106 0 : timeline_id,
107 0 : force_image_layer_creation,
108 0 : true,
109 0 : false,
110 0 : wait_until_done,
111 0 : )
112 0 : .await
113 : )
114 0 : }
115 :
116 : /* BEGIN_HADRON */
117 0 : pub(crate) async fn tenant_timeline_describe(
118 0 : &self,
119 0 : tenant_shard_id: &TenantShardId,
120 0 : timeline_id: &TimelineId,
121 0 : ) -> Result<TimelineInfo> {
122 0 : measured_request!(
123 0 : "tenant_timeline_describe",
124 0 : crate::metrics::Method::Get,
125 0 : &self.node_id_label,
126 0 : self.inner
127 0 : .tenant_timeline_describe(tenant_shard_id, timeline_id,)
128 0 : .await
129 : )
130 0 : }
131 :
132 : #[expect(dead_code)]
133 0 : pub(crate) async fn list_tenant_visible_size(&self) -> Result<TenantShardSizeMap> {
134 0 : measured_request!(
135 0 : "list_tenant_visible_size",
136 0 : crate::metrics::Method::Get,
137 0 : &self.node_id_label,
138 0 : self.inner.list_tenant_visible_size().await
139 : )
140 0 : .map(TenantShardSizeMap::new)
141 0 : }
142 : /* END_HADRON */
143 :
144 0 : pub(crate) async fn tenant_scan_remote_storage(
145 0 : &self,
146 0 : tenant_id: TenantId,
147 0 : ) -> Result<TenantScanRemoteStorageResponse> {
148 0 : measured_request!(
149 0 : "tenant_scan_remote_storage",
150 0 : crate::metrics::Method::Get,
151 0 : &self.node_id_label,
152 0 : self.inner.tenant_scan_remote_storage(tenant_id).await
153 : )
154 0 : }
155 :
156 0 : pub(crate) async fn tenant_secondary_download(
157 0 : &self,
158 0 : tenant_id: TenantShardId,
159 0 : wait: Option<std::time::Duration>,
160 0 : ) -> Result<(StatusCode, SecondaryProgress)> {
161 0 : measured_request!(
162 0 : "tenant_secondary_download",
163 0 : crate::metrics::Method::Post,
164 0 : &self.node_id_label,
165 0 : self.inner.tenant_secondary_download(tenant_id, wait).await
166 : )
167 0 : }
168 :
169 0 : pub(crate) async fn tenant_secondary_status(
170 0 : &self,
171 0 : tenant_shard_id: TenantShardId,
172 0 : ) -> Result<SecondaryProgress> {
173 0 : measured_request!(
174 0 : "tenant_secondary_status",
175 0 : crate::metrics::Method::Get,
176 0 : &self.node_id_label,
177 0 : self.inner.tenant_secondary_status(tenant_shard_id).await
178 : )
179 0 : }
180 :
181 0 : pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
182 0 : measured_request!(
183 0 : "tenant_heatmap_upload",
184 0 : crate::metrics::Method::Post,
185 0 : &self.node_id_label,
186 0 : self.inner.tenant_heatmap_upload(tenant_id).await
187 : )
188 0 : }
189 :
190 0 : pub(crate) async fn location_config(
191 0 : &self,
192 0 : tenant_shard_id: TenantShardId,
193 0 : config: LocationConfig,
194 0 : flush_ms: Option<std::time::Duration>,
195 0 : lazy: bool,
196 0 : ) -> Result<()> {
197 0 : measured_request!(
198 0 : "location_config",
199 0 : crate::metrics::Method::Put,
200 0 : &self.node_id_label,
201 0 : self.inner
202 0 : .location_config(tenant_shard_id, config, flush_ms, lazy)
203 0 : .await
204 : )
205 0 : }
206 :
207 0 : pub(crate) async fn list_location_config(&self) -> Result<LocationConfigListResponse> {
208 0 : measured_request!(
209 0 : "location_configs",
210 0 : crate::metrics::Method::Get,
211 0 : &self.node_id_label,
212 0 : self.inner.list_location_config().await
213 : )
214 0 : }
215 :
216 0 : pub(crate) async fn get_location_config(
217 0 : &self,
218 0 : tenant_shard_id: TenantShardId,
219 0 : ) -> Result<Option<LocationConfig>> {
220 0 : measured_request!(
221 0 : "location_config",
222 0 : crate::metrics::Method::Get,
223 0 : &self.node_id_label,
224 0 : self.inner.get_location_config(tenant_shard_id).await
225 : )
226 0 : }
227 :
228 0 : pub(crate) async fn timeline_create(
229 0 : &self,
230 0 : tenant_shard_id: TenantShardId,
231 0 : req: &TimelineCreateRequest,
232 0 : ) -> Result<TimelineInfo> {
233 0 : measured_request!(
234 0 : "timeline",
235 0 : crate::metrics::Method::Post,
236 0 : &self.node_id_label,
237 0 : self.inner.timeline_create(tenant_shard_id, req).await
238 : )
239 0 : }
240 :
241 0 : pub(crate) async fn timeline_delete(
242 0 : &self,
243 0 : tenant_shard_id: TenantShardId,
244 0 : timeline_id: TimelineId,
245 0 : ) -> Result<StatusCode> {
246 0 : measured_request!(
247 0 : "timeline",
248 0 : crate::metrics::Method::Delete,
249 0 : &self.node_id_label,
250 0 : self.inner
251 0 : .timeline_delete(tenant_shard_id, timeline_id)
252 0 : .await
253 : )
254 0 : }
255 :
256 0 : pub(crate) async fn timeline_lease_lsn(
257 0 : &self,
258 0 : tenant_shard_id: TenantShardId,
259 0 : timeline_id: TimelineId,
260 0 : lsn: Lsn,
261 0 : ) -> Result<LsnLease> {
262 0 : measured_request!(
263 0 : "timeline_lease_lsn",
264 0 : crate::metrics::Method::Post,
265 0 : &self.node_id_label,
266 0 : self.inner
267 0 : .timeline_init_lsn_lease(tenant_shard_id, timeline_id, lsn)
268 0 : .await
269 : )
270 0 : }
271 :
272 : #[allow(unused)]
273 0 : pub(crate) async fn timeline_detail(
274 0 : &self,
275 0 : tenant_shard_id: TenantShardId,
276 0 : timeline_id: TimelineId,
277 0 : ) -> Result<TimelineInfo> {
278 0 : measured_request!(
279 0 : "timeline_detail",
280 0 : crate::metrics::Method::Get,
281 0 : &self.node_id_label,
282 0 : self.inner
283 0 : .timeline_detail(tenant_shard_id, timeline_id)
284 0 : .await
285 : )
286 0 : }
287 :
288 0 : pub(crate) async fn tenant_shard_split(
289 0 : &self,
290 0 : tenant_shard_id: TenantShardId,
291 0 : req: TenantShardSplitRequest,
292 0 : ) -> Result<TenantShardSplitResponse> {
293 0 : measured_request!(
294 0 : "tenant_shard_split",
295 0 : crate::metrics::Method::Put,
296 0 : &self.node_id_label,
297 0 : self.inner.tenant_shard_split(tenant_shard_id, req).await
298 : )
299 0 : }
300 :
301 0 : pub(crate) async fn timeline_list(
302 0 : &self,
303 0 : tenant_shard_id: &TenantShardId,
304 0 : ) -> Result<Vec<TimelineInfo>> {
305 0 : measured_request!(
306 0 : "timelines",
307 0 : crate::metrics::Method::Get,
308 0 : &self.node_id_label,
309 0 : self.inner.timeline_list(tenant_shard_id).await
310 : )
311 0 : }
312 :
313 0 : pub(crate) async fn timeline_archival_config(
314 0 : &self,
315 0 : tenant_shard_id: TenantShardId,
316 0 : timeline_id: TimelineId,
317 0 : req: &TimelineArchivalConfigRequest,
318 0 : ) -> Result<()> {
319 0 : measured_request!(
320 0 : "timeline_archival_config",
321 0 : crate::metrics::Method::Put,
322 0 : &self.node_id_label,
323 0 : self.inner
324 0 : .timeline_archival_config(tenant_shard_id, timeline_id, req)
325 0 : .await
326 : )
327 0 : }
328 :
329 0 : pub(crate) async fn timeline_detach_ancestor(
330 0 : &self,
331 0 : tenant_shard_id: TenantShardId,
332 0 : timeline_id: TimelineId,
333 0 : behavior: Option<DetachBehavior>,
334 0 : ) -> Result<AncestorDetached> {
335 0 : measured_request!(
336 0 : "timeline_detach_ancestor",
337 0 : crate::metrics::Method::Put,
338 0 : &self.node_id_label,
339 0 : self.inner
340 0 : .timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior)
341 0 : .await
342 : )
343 0 : }
344 :
345 0 : pub(crate) async fn timeline_block_unblock_gc(
346 0 : &self,
347 0 : tenant_shard_id: TenantShardId,
348 0 : timeline_id: TimelineId,
349 0 : dir: BlockUnblock,
350 0 : ) -> Result<()> {
351 : // measuring these makes no sense because we synchronize with the gc loop and remote
352 : // storage on block_gc so there should be huge outliers
353 0 : measured_request!(
354 0 : "timeline_block_unblock_gc",
355 0 : crate::metrics::Method::Post,
356 0 : &self.node_id_label,
357 0 : self.inner
358 0 : .timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
359 0 : .await
360 : )
361 0 : }
362 :
363 0 : pub(crate) async fn timeline_download_heatmap_layers(
364 0 : &self,
365 0 : tenant_shard_id: TenantShardId,
366 0 : timeline_id: TimelineId,
367 0 : concurrency: Option<usize>,
368 0 : recurse: bool,
369 0 : ) -> Result<()> {
370 0 : measured_request!(
371 0 : "download_heatmap_layers",
372 0 : crate::metrics::Method::Post,
373 0 : &self.node_id_label,
374 0 : self.inner
375 0 : .timeline_download_heatmap_layers(
376 0 : tenant_shard_id,
377 0 : timeline_id,
378 0 : concurrency,
379 0 : recurse
380 0 : )
381 0 : .await
382 : )
383 0 : }
384 :
385 0 : pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
386 0 : measured_request!(
387 0 : "utilization",
388 0 : crate::metrics::Method::Get,
389 0 : &self.node_id_label,
390 0 : self.inner.get_utilization().await
391 : )
392 0 : }
393 :
394 0 : pub(crate) async fn top_tenant_shards(
395 0 : &self,
396 0 : request: TopTenantShardsRequest,
397 0 : ) -> Result<TopTenantShardsResponse> {
398 0 : measured_request!(
399 0 : "top_tenants",
400 0 : crate::metrics::Method::Post,
401 0 : &self.node_id_label,
402 0 : self.inner.top_tenant_shards(request).await
403 : )
404 0 : }
405 :
406 : #[expect(dead_code)]
407 0 : pub(crate) async fn reset_alert_gauges(&self) -> Result<()> {
408 0 : measured_request!(
409 0 : "reset_alert_gauges",
410 0 : crate::metrics::Method::Post,
411 0 : &self.node_id_label,
412 0 : self.inner.reset_alert_gauges().await
413 : )
414 0 : }
415 :
416 0 : pub(crate) async fn wait_lsn(
417 0 : &self,
418 0 : tenant_shard_id: TenantShardId,
419 0 : request: TenantWaitLsnRequest,
420 0 : ) -> Result<StatusCode> {
421 0 : measured_request!(
422 0 : "wait_lsn",
423 0 : crate::metrics::Method::Post,
424 0 : &self.node_id_label,
425 0 : self.inner.wait_lsn(tenant_shard_id, request).await
426 : )
427 0 : }
428 :
429 0 : pub(crate) async fn activate_post_import(
430 0 : &self,
431 0 : tenant_shard_id: TenantShardId,
432 0 : timeline_id: TimelineId,
433 0 : timeline_activate_timeout: Duration,
434 0 : ) -> Result<TimelineInfo> {
435 0 : measured_request!(
436 0 : "activate_post_import",
437 0 : crate::metrics::Method::Put,
438 0 : &self.node_id_label,
439 0 : self.inner
440 0 : .activate_post_import(tenant_shard_id, timeline_id, timeline_activate_timeout)
441 0 : .await
442 : )
443 0 : }
444 :
445 0 : pub(crate) async fn update_feature_flag_spec(&self, spec: String) -> Result<()> {
446 0 : measured_request!(
447 0 : "update_feature_flag_spec",
448 0 : crate::metrics::Method::Post,
449 0 : &self.node_id_label,
450 0 : self.inner.update_feature_flag_spec(spec).await
451 : )
452 0 : }
453 : }
|