|             Line data    Source code 
       1              : use pageserver_api::models::detach_ancestor::AncestorDetached;
       2              : use pageserver_api::models::{
       3              :     LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
       4              :     TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
       5              :     TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo,
       6              :     TopTenantShardsRequest, TopTenantShardsResponse,
       7              : };
       8              : use pageserver_api::shard::TenantShardId;
       9              : use pageserver_client::BlockUnblock;
      10              : use pageserver_client::mgmt_api::{Client, Result};
      11              : use reqwest::{Certificate, StatusCode};
      12              : use utils::id::{NodeId, TenantId, TimelineId};
      13              : 
      14              : /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
      15              : /// controller to collect metrics in a non-intrusive manner.
      16              : #[derive(Debug, Clone)]
      17              : pub(crate) struct PageserverClient {
      18              :     inner: Client,
      19              :     node_id_label: String,
      20              : }
      21              : 
      22              : macro_rules! measured_request {
      23              :     ($name:literal, $method:expr, $node_id: expr, $invoke:expr) => {{
      24              :         let labels = crate::metrics::PageserverRequestLabelGroup {
      25              :             pageserver_id: $node_id,
      26              :             path: $name,
      27              :             method: $method,
      28              :         };
      29              : 
      30              :         let latency = &crate::metrics::METRICS_REGISTRY
      31              :             .metrics_group
      32              :             .storage_controller_pageserver_request_latency;
      33              :         let _timer_guard = latency.start_timer(labels.clone());
      34              : 
      35              :         let res = $invoke;
      36              : 
      37              :         if res.is_err() {
      38              :             let error_counters = &crate::metrics::METRICS_REGISTRY
      39              :                 .metrics_group
      40              :                 .storage_controller_pageserver_request_error;
      41              :             error_counters.inc(labels)
      42              :         }
      43              : 
      44              :         res
      45              :     }};
      46              : }
      47              : 
      48              : impl PageserverClient {
      49            0 :     pub(crate) fn new(
      50            0 :         node_id: NodeId,
      51            0 :         mgmt_api_endpoint: String,
      52            0 :         jwt: Option<&str>,
      53            0 :         ssl_ca_cert: Option<Certificate>,
      54            0 :     ) -> Result<Self> {
      55            0 :         Ok(Self {
      56            0 :             inner: Client::new(mgmt_api_endpoint, jwt, ssl_ca_cert)?,
      57            0 :             node_id_label: node_id.0.to_string(),
      58              :         })
      59            0 :     }
      60              : 
      61            0 :     pub(crate) fn from_client(
      62            0 :         node_id: NodeId,
      63            0 :         raw_client: reqwest::Client,
      64            0 :         mgmt_api_endpoint: String,
      65            0 :         jwt: Option<&str>,
      66            0 :     ) -> Self {
      67            0 :         Self {
      68            0 :             inner: Client::from_client(raw_client, mgmt_api_endpoint, jwt),
      69            0 :             node_id_label: node_id.0.to_string(),
      70            0 :         }
      71            0 :     }
      72              : 
      73            0 :     pub(crate) async fn tenant_delete(&self, tenant_shard_id: TenantShardId) -> Result<StatusCode> {
      74            0 :         measured_request!(
      75            0 :             "tenant",
      76            0 :             crate::metrics::Method::Delete,
      77            0 :             &self.node_id_label,
      78            0 :             self.inner.tenant_delete(tenant_shard_id).await
      79            0 :         )
      80            0 :     }
      81              : 
      82            0 :     pub(crate) async fn tenant_time_travel_remote_storage(
      83            0 :         &self,
      84            0 :         tenant_shard_id: TenantShardId,
      85            0 :         timestamp: &str,
      86            0 :         done_if_after: &str,
      87            0 :     ) -> Result<()> {
      88            0 :         measured_request!(
      89            0 :             "tenant_time_travel_remote_storage",
      90            0 :             crate::metrics::Method::Put,
      91            0 :             &self.node_id_label,
      92            0 :             self.inner
      93            0 :                 .tenant_time_travel_remote_storage(tenant_shard_id, timestamp, done_if_after)
      94            0 :                 .await
      95            0 :         )
      96            0 :     }
      97              : 
      98            0 :     pub(crate) async fn tenant_scan_remote_storage(
      99            0 :         &self,
     100            0 :         tenant_id: TenantId,
     101            0 :     ) -> Result<TenantScanRemoteStorageResponse> {
     102            0 :         measured_request!(
     103            0 :             "tenant_scan_remote_storage",
     104            0 :             crate::metrics::Method::Get,
     105            0 :             &self.node_id_label,
     106            0 :             self.inner.tenant_scan_remote_storage(tenant_id).await
     107            0 :         )
     108            0 :     }
     109              : 
     110            0 :     pub(crate) async fn tenant_secondary_download(
     111            0 :         &self,
     112            0 :         tenant_id: TenantShardId,
     113            0 :         wait: Option<std::time::Duration>,
     114            0 :     ) -> Result<(StatusCode, SecondaryProgress)> {
     115            0 :         measured_request!(
     116            0 :             "tenant_secondary_download",
     117            0 :             crate::metrics::Method::Post,
     118            0 :             &self.node_id_label,
     119            0 :             self.inner.tenant_secondary_download(tenant_id, wait).await
     120            0 :         )
     121            0 :     }
     122              : 
     123            0 :     pub(crate) async fn tenant_secondary_status(
     124            0 :         &self,
     125            0 :         tenant_shard_id: TenantShardId,
     126            0 :     ) -> Result<SecondaryProgress> {
     127            0 :         measured_request!(
     128            0 :             "tenant_secondary_status",
     129            0 :             crate::metrics::Method::Get,
     130            0 :             &self.node_id_label,
     131            0 :             self.inner.tenant_secondary_status(tenant_shard_id).await
     132            0 :         )
     133            0 :     }
     134              : 
     135            0 :     pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
     136            0 :         measured_request!(
     137            0 :             "tenant_heatmap_upload",
     138            0 :             crate::metrics::Method::Post,
     139            0 :             &self.node_id_label,
     140            0 :             self.inner.tenant_heatmap_upload(tenant_id).await
     141            0 :         )
     142            0 :     }
     143              : 
     144            0 :     pub(crate) async fn location_config(
     145            0 :         &self,
     146            0 :         tenant_shard_id: TenantShardId,
     147            0 :         config: LocationConfig,
     148            0 :         flush_ms: Option<std::time::Duration>,
     149            0 :         lazy: bool,
     150            0 :     ) -> Result<()> {
     151            0 :         measured_request!(
     152            0 :             "location_config",
     153            0 :             crate::metrics::Method::Put,
     154            0 :             &self.node_id_label,
     155            0 :             self.inner
     156            0 :                 .location_config(tenant_shard_id, config, flush_ms, lazy)
     157            0 :                 .await
     158            0 :         )
     159            0 :     }
     160              : 
     161            0 :     pub(crate) async fn list_location_config(&self) -> Result<LocationConfigListResponse> {
     162            0 :         measured_request!(
     163            0 :             "location_configs",
     164            0 :             crate::metrics::Method::Get,
     165            0 :             &self.node_id_label,
     166            0 :             self.inner.list_location_config().await
     167            0 :         )
     168            0 :     }
     169              : 
     170            0 :     pub(crate) async fn get_location_config(
     171            0 :         &self,
     172            0 :         tenant_shard_id: TenantShardId,
     173            0 :     ) -> Result<Option<LocationConfig>> {
     174            0 :         measured_request!(
     175            0 :             "location_config",
     176            0 :             crate::metrics::Method::Get,
     177            0 :             &self.node_id_label,
     178            0 :             self.inner.get_location_config(tenant_shard_id).await
     179            0 :         )
     180            0 :     }
     181              : 
     182            0 :     pub(crate) async fn timeline_create(
     183            0 :         &self,
     184            0 :         tenant_shard_id: TenantShardId,
     185            0 :         req: &TimelineCreateRequest,
     186            0 :     ) -> Result<TimelineInfo> {
     187            0 :         measured_request!(
     188            0 :             "timeline",
     189            0 :             crate::metrics::Method::Post,
     190            0 :             &self.node_id_label,
     191            0 :             self.inner.timeline_create(tenant_shard_id, req).await
     192            0 :         )
     193            0 :     }
     194              : 
     195            0 :     pub(crate) async fn timeline_delete(
     196            0 :         &self,
     197            0 :         tenant_shard_id: TenantShardId,
     198            0 :         timeline_id: TimelineId,
     199            0 :     ) -> Result<StatusCode> {
     200            0 :         measured_request!(
     201            0 :             "timeline",
     202            0 :             crate::metrics::Method::Delete,
     203            0 :             &self.node_id_label,
     204            0 :             self.inner
     205            0 :                 .timeline_delete(tenant_shard_id, timeline_id)
     206            0 :                 .await
     207            0 :         )
     208            0 :     }
     209              : 
     210            0 :     pub(crate) async fn tenant_shard_split(
     211            0 :         &self,
     212            0 :         tenant_shard_id: TenantShardId,
     213            0 :         req: TenantShardSplitRequest,
     214            0 :     ) -> Result<TenantShardSplitResponse> {
     215            0 :         measured_request!(
     216            0 :             "tenant_shard_split",
     217            0 :             crate::metrics::Method::Put,
     218            0 :             &self.node_id_label,
     219            0 :             self.inner.tenant_shard_split(tenant_shard_id, req).await
     220            0 :         )
     221            0 :     }
     222              : 
     223            0 :     pub(crate) async fn timeline_list(
     224            0 :         &self,
     225            0 :         tenant_shard_id: &TenantShardId,
     226            0 :     ) -> Result<Vec<TimelineInfo>> {
     227            0 :         measured_request!(
     228            0 :             "timelines",
     229            0 :             crate::metrics::Method::Get,
     230            0 :             &self.node_id_label,
     231            0 :             self.inner.timeline_list(tenant_shard_id).await
     232            0 :         )
     233            0 :     }
     234              : 
     235            0 :     pub(crate) async fn timeline_archival_config(
     236            0 :         &self,
     237            0 :         tenant_shard_id: TenantShardId,
     238            0 :         timeline_id: TimelineId,
     239            0 :         req: &TimelineArchivalConfigRequest,
     240            0 :     ) -> Result<()> {
     241            0 :         measured_request!(
     242            0 :             "timeline_archival_config",
     243            0 :             crate::metrics::Method::Put,
     244            0 :             &self.node_id_label,
     245            0 :             self.inner
     246            0 :                 .timeline_archival_config(tenant_shard_id, timeline_id, req)
     247            0 :                 .await
     248            0 :         )
     249            0 :     }
     250              : 
     251            0 :     pub(crate) async fn timeline_detach_ancestor(
     252            0 :         &self,
     253            0 :         tenant_shard_id: TenantShardId,
     254            0 :         timeline_id: TimelineId,
     255            0 :     ) -> Result<AncestorDetached> {
     256            0 :         measured_request!(
     257            0 :             "timeline_detach_ancestor",
     258            0 :             crate::metrics::Method::Put,
     259            0 :             &self.node_id_label,
     260            0 :             self.inner
     261            0 :                 .timeline_detach_ancestor(tenant_shard_id, timeline_id)
     262            0 :                 .await
     263            0 :         )
     264            0 :     }
     265              : 
     266            0 :     pub(crate) async fn timeline_block_unblock_gc(
     267            0 :         &self,
     268            0 :         tenant_shard_id: TenantShardId,
     269            0 :         timeline_id: TimelineId,
     270            0 :         dir: BlockUnblock,
     271            0 :     ) -> Result<()> {
     272            0 :         // measuring these makes no sense because we synchronize with the gc loop and remote
     273            0 :         // storage on block_gc so there should be huge outliers
     274            0 :         measured_request!(
     275            0 :             "timeline_block_unblock_gc",
     276            0 :             crate::metrics::Method::Post,
     277            0 :             &self.node_id_label,
     278            0 :             self.inner
     279            0 :                 .timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir)
     280            0 :                 .await
     281            0 :         )
     282            0 :     }
     283              : 
     284            0 :     pub(crate) async fn timeline_download_heatmap_layers(
     285            0 :         &self,
     286            0 :         tenant_shard_id: TenantShardId,
     287            0 :         timeline_id: TimelineId,
     288            0 :         concurrency: Option<usize>,
     289            0 :         recurse: bool,
     290            0 :     ) -> Result<()> {
     291            0 :         measured_request!(
     292            0 :             "download_heatmap_layers",
     293            0 :             crate::metrics::Method::Post,
     294            0 :             &self.node_id_label,
     295            0 :             self.inner
     296            0 :                 .timeline_download_heatmap_layers(
     297            0 :                     tenant_shard_id,
     298            0 :                     timeline_id,
     299            0 :                     concurrency,
     300            0 :                     recurse
     301            0 :                 )
     302            0 :                 .await
     303            0 :         )
     304            0 :     }
     305              : 
     306            0 :     pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
     307            0 :         measured_request!(
     308            0 :             "utilization",
     309            0 :             crate::metrics::Method::Get,
     310            0 :             &self.node_id_label,
     311            0 :             self.inner.get_utilization().await
     312            0 :         )
     313            0 :     }
     314              : 
     315            0 :     pub(crate) async fn top_tenant_shards(
     316            0 :         &self,
     317            0 :         request: TopTenantShardsRequest,
     318            0 :     ) -> Result<TopTenantShardsResponse> {
     319            0 :         measured_request!(
     320            0 :             "top_tenants",
     321            0 :             crate::metrics::Method::Post,
     322            0 :             &self.node_id_label,
     323            0 :             self.inner.top_tenant_shards(request).await
     324            0 :         )
     325            0 :     }
     326              : 
     327            0 :     pub(crate) async fn wait_lsn(
     328            0 :         &self,
     329            0 :         tenant_shard_id: TenantShardId,
     330            0 :         request: TenantWaitLsnRequest,
     331            0 :     ) -> Result<StatusCode> {
     332            0 :         measured_request!(
     333            0 :             "wait_lsn",
     334            0 :             crate::metrics::Method::Post,
     335            0 :             &self.node_id_label,
     336            0 :             self.inner.wait_lsn(tenant_shard_id, request).await
     337            0 :         )
     338            0 :     }
     339              : }
         |