LCOV - b4ae4c4857f9ef3e144e982a35ee23bc84c71983.info

LCOV - code coverage report

Current view:	top level - storage_controller/src - http.rs (source / functions)		Coverage	Total	Hit
Test:	b4ae4c4857f9ef3e144e982a35ee23bc84c71983.info	Lines:	0.0 %	1399	0
Test Date:	2024-10-22 22:13:45	Functions:	0.0 %	382	0

            Line data    Source code

       1              : use crate::http;
       2              : use crate::metrics::{
       3              :     HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup,
       4              :     METRICS_REGISTRY,
       5              : };
       6              : use crate::persistence::SafekeeperPersistence;
       7              : use crate::reconciler::ReconcileError;
       8              : use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
       9              : use anyhow::Context;
      10              : use futures::Future;
      11              : use hyper::header::CONTENT_TYPE;
      12              : use hyper::{Body, Request, Response};
      13              : use hyper::{StatusCode, Uri};
      14              : use metrics::{BuildInfo, NeonMetrics};
      15              : use pageserver_api::controller_api::{
      16              :     MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
      17              :     MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
      18              :     ShardsPreferredAzsRequest, TenantCreateRequest,
      19              : };
      20              : use pageserver_api::models::{
      21              :     TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
      22              :     TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest,
      23              : };
      24              : use pageserver_api::shard::TenantShardId;
      25              : use pageserver_client::{mgmt_api, BlockUnblock};
      26              : use std::str::FromStr;
      27              : use std::sync::Arc;
      28              : use std::time::{Duration, Instant};
      29              : use tokio_util::sync::CancellationToken;
      30              : use utils::auth::{Scope, SwappableJwtAuth};
      31              : use utils::failpoint_support::failpoints_handler;
      32              : use utils::http::endpoint::{auth_middleware, check_permission_with, request_span};
      33              : use utils::http::request::{must_get_query_param, parse_query_param, parse_request_param};
      34              : use utils::id::{TenantId, TimelineId};
      35              : 
      36              : use utils::{
      37              :     http::{
      38              :         endpoint::{self},
      39              :         error::ApiError,
      40              :         json::{json_request, json_response},
      41              :         RequestExt, RouterBuilder,
      42              :     },
      43              :     id::NodeId,
      44              : };
      45              : 
      46              : use pageserver_api::controller_api::{
      47              :     NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest,
      48              :     TenantShardMigrateRequest,
      49              : };
      50              : use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
      51              : 
      52              : use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
      53              : 
      54              : use routerify::Middleware;
      55              : 
      56              : /// State available to HTTP request handlers
      57              : pub struct HttpState {
      58              :     service: Arc<crate::service::Service>,
      59              :     auth: Option<Arc<SwappableJwtAuth>>,
      60              :     neon_metrics: NeonMetrics,
      61              :     allowlist_routes: Vec<Uri>,
      62              : }
      63              : 
      64              : impl HttpState {
      65            0 :     pub fn new(
      66            0 :         service: Arc<crate::service::Service>,
      67            0 :         auth: Option<Arc<SwappableJwtAuth>>,
      68            0 :         build_info: BuildInfo,
      69            0 :     ) -> Self {
      70            0 :         let allowlist_routes = ["/status", "/ready", "/metrics"]
      71            0 :             .iter()
      72            0 :             .map(|v| v.parse().unwrap())
      73            0 :             .collect::<Vec<_>>();
      74            0 :         Self {
      75            0 :             service,
      76            0 :             auth,
      77            0 :             neon_metrics: NeonMetrics::new(build_info),
      78            0 :             allowlist_routes,
      79            0 :         }
      80            0 :     }
      81              : }
      82              : 
      83              : #[inline(always)]
      84            0 : fn get_state(request: &Request<Body>) -> &HttpState {
      85            0 :     request
      86            0 :         .data::<Arc<HttpState>>()
      87            0 :         .expect("unknown state type")
      88            0 :         .as_ref()
      89            0 : }
      90              : 
      91              : /// Pageserver calls into this on startup, to learn which tenants it should attach
      92            0 : async fn handle_re_attach(req: Request<Body>) -> Result<Response<Body>, ApiError> {
      93            0 :     check_permissions(&req, Scope::GenerationsApi)?;
      94              : 
      95            0 :     let mut req = match maybe_forward(req).await {
      96            0 :         ForwardOutcome::Forwarded(res) => {
      97            0 :             return res;
      98              :         }
      99            0 :         ForwardOutcome::NotForwarded(req) => req,
     100              :     };
     101              : 
     102            0 :     let reattach_req = json_request::<ReAttachRequest>(&mut req).await?;
     103            0 :     let state = get_state(&req);
     104            0 :     json_response(StatusCode::OK, state.service.re_attach(reattach_req).await?)
     105            0 : }
     106              : 
     107              : /// Pageserver calls into this before doing deletions, to confirm that it still
     108              : /// holds the latest generation for the tenants with deletions enqueued
     109            0 : async fn handle_validate(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     110            0 :     check_permissions(&req, Scope::GenerationsApi)?;
     111              : 
     112            0 :     let mut req = match maybe_forward(req).await {
     113            0 :         ForwardOutcome::Forwarded(res) => {
     114            0 :             return res;
     115              :         }
     116            0 :         ForwardOutcome::NotForwarded(req) => req,
     117              :     };
     118              : 
     119            0 :     let validate_req = json_request::<ValidateRequest>(&mut req).await?;
     120            0 :     let state = get_state(&req);
     121            0 :     json_response(StatusCode::OK, state.service.validate(validate_req).await?)
     122            0 : }
     123              : 
     124              : /// Call into this before attaching a tenant to a pageserver, to acquire a generation number
     125              : /// (in the real control plane this is unnecessary, because the same program is managing
     126              : ///  generation numbers and doing attachments).
     127            0 : async fn handle_attach_hook(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     128            0 :     check_permissions(&req, Scope::Admin)?;
     129              : 
     130            0 :     let mut req = match maybe_forward(req).await {
     131            0 :         ForwardOutcome::Forwarded(res) => {
     132            0 :             return res;
     133              :         }
     134            0 :         ForwardOutcome::NotForwarded(req) => req,
     135              :     };
     136              : 
     137            0 :     let attach_req = json_request::<AttachHookRequest>(&mut req).await?;
     138            0 :     let state = get_state(&req);
     139            0 : 
     140            0 :     json_response(
     141            0 :         StatusCode::OK,
     142            0 :         state
     143            0 :             .service
     144            0 :             .attach_hook(attach_req)
     145            0 :             .await
     146            0 :             .map_err(ApiError::InternalServerError)?,
     147              :     )
     148            0 : }
     149              : 
     150            0 : async fn handle_inspect(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     151            0 :     check_permissions(&req, Scope::Admin)?;
     152              : 
     153            0 :     let mut req = match maybe_forward(req).await {
     154            0 :         ForwardOutcome::Forwarded(res) => {
     155            0 :             return res;
     156              :         }
     157            0 :         ForwardOutcome::NotForwarded(req) => req,
     158              :     };
     159              : 
     160            0 :     let inspect_req = json_request::<InspectRequest>(&mut req).await?;
     161              : 
     162            0 :     let state = get_state(&req);
     163            0 : 
     164            0 :     json_response(StatusCode::OK, state.service.inspect(inspect_req))
     165            0 : }
     166              : 
     167            0 : async fn handle_tenant_create(
     168            0 :     service: Arc<Service>,
     169            0 :     req: Request<Body>,
     170            0 : ) -> Result<Response<Body>, ApiError> {
     171            0 :     check_permissions(&req, Scope::PageServerApi)?;
     172              : 
     173            0 :     let mut req = match maybe_forward(req).await {
     174            0 :         ForwardOutcome::Forwarded(res) => {
     175            0 :             return res;
     176              :         }
     177            0 :         ForwardOutcome::NotForwarded(req) => req,
     178              :     };
     179              : 
     180            0 :     let create_req = json_request::<TenantCreateRequest>(&mut req).await?;
     181              : 
     182              :     json_response(
     183              :         StatusCode::CREATED,
     184            0 :         service.tenant_create(create_req).await?,
     185              :     )
     186            0 : }
     187              : 
     188            0 : async fn handle_tenant_location_config(
     189            0 :     service: Arc<Service>,
     190            0 :     req: Request<Body>,
     191            0 : ) -> Result<Response<Body>, ApiError> {
     192            0 :     let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
     193            0 :     check_permissions(&req, Scope::PageServerApi)?;
     194              : 
     195            0 :     let mut req = match maybe_forward(req).await {
     196            0 :         ForwardOutcome::Forwarded(res) => {
     197            0 :             return res;
     198              :         }
     199            0 :         ForwardOutcome::NotForwarded(req) => req,
     200              :     };
     201              : 
     202            0 :     let config_req = json_request::<TenantLocationConfigRequest>(&mut req).await?;
     203              :     json_response(
     204              :         StatusCode::OK,
     205            0 :         service
     206            0 :             .tenant_location_config(tenant_shard_id, config_req)
     207            0 :             .await?,
     208              :     )
     209            0 : }
     210              : 
     211            0 : async fn handle_tenant_config_set(
     212            0 :     service: Arc<Service>,
     213            0 :     req: Request<Body>,
     214            0 : ) -> Result<Response<Body>, ApiError> {
     215            0 :     check_permissions(&req, Scope::PageServerApi)?;
     216              : 
     217            0 :     let mut req = match maybe_forward(req).await {
     218            0 :         ForwardOutcome::Forwarded(res) => {
     219            0 :             return res;
     220              :         }
     221            0 :         ForwardOutcome::NotForwarded(req) => req,
     222              :     };
     223              : 
     224            0 :     let config_req = json_request::<TenantConfigRequest>(&mut req).await?;
     225              : 
     226            0 :     json_response(StatusCode::OK, service.tenant_config_set(config_req).await?)
     227            0 : }
     228              : 
     229            0 : async fn handle_tenant_config_get(
     230            0 :     service: Arc<Service>,
     231            0 :     req: Request<Body>,
     232            0 : ) -> Result<Response<Body>, ApiError> {
     233            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     234            0 :     check_permissions(&req, Scope::PageServerApi)?;
     235              : 
     236            0 :     match maybe_forward(req).await {
     237            0 :         ForwardOutcome::Forwarded(res) => {
     238            0 :             return res;
     239              :         }
     240            0 :         ForwardOutcome::NotForwarded(_req) => {}
     241            0 :     };
     242            0 : 
     243            0 :     json_response(StatusCode::OK, service.tenant_config_get(tenant_id)?)
     244            0 : }
     245              : 
     246            0 : async fn handle_tenant_time_travel_remote_storage(
     247            0 :     service: Arc<Service>,
     248            0 :     req: Request<Body>,
     249            0 : ) -> Result<Response<Body>, ApiError> {
     250            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     251            0 :     check_permissions(&req, Scope::PageServerApi)?;
     252              : 
     253            0 :     let mut req = match maybe_forward(req).await {
     254            0 :         ForwardOutcome::Forwarded(res) => {
     255            0 :             return res;
     256              :         }
     257            0 :         ForwardOutcome::NotForwarded(req) => req,
     258              :     };
     259              : 
     260            0 :     let time_travel_req = json_request::<TenantTimeTravelRequest>(&mut req).await?;
     261              : 
     262            0 :     let timestamp_raw = must_get_query_param(&req, "travel_to")?;
     263            0 :     let _timestamp = humantime::parse_rfc3339(&timestamp_raw).map_err(|_e| {
     264            0 :         ApiError::BadRequest(anyhow::anyhow!(
     265            0 :             "Invalid time for travel_to: {timestamp_raw:?}"
     266            0 :         ))
     267            0 :     })?;
     268              : 
     269            0 :     let done_if_after_raw = must_get_query_param(&req, "done_if_after")?;
     270            0 :     let _done_if_after = humantime::parse_rfc3339(&done_if_after_raw).map_err(|_e| {
     271            0 :         ApiError::BadRequest(anyhow::anyhow!(
     272            0 :             "Invalid time for done_if_after: {done_if_after_raw:?}"
     273            0 :         ))
     274            0 :     })?;
     275              : 
     276            0 :     service
     277            0 :         .tenant_time_travel_remote_storage(
     278            0 :             &time_travel_req,
     279            0 :             tenant_id,
     280            0 :             timestamp_raw,
     281            0 :             done_if_after_raw,
     282            0 :         )
     283            0 :         .await?;
     284            0 :     json_response(StatusCode::OK, ())
     285            0 : }
     286              : 
     287            0 : fn map_reqwest_hyper_status(status: reqwest::StatusCode) -> Result<hyper::StatusCode, ApiError> {
     288            0 :     hyper::StatusCode::from_u16(status.as_u16())
     289            0 :         .context("invalid status code")
     290            0 :         .map_err(ApiError::InternalServerError)
     291            0 : }
     292              : 
     293            0 : async fn handle_tenant_secondary_download(
     294            0 :     service: Arc<Service>,
     295            0 :     req: Request<Body>,
     296            0 : ) -> Result<Response<Body>, ApiError> {
     297            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     298            0 :     let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);
     299            0 : 
     300            0 :     match maybe_forward(req).await {
     301            0 :         ForwardOutcome::Forwarded(res) => {
     302            0 :             return res;
     303              :         }
     304            0 :         ForwardOutcome::NotForwarded(_req) => {}
     305              :     };
     306              : 
     307            0 :     let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
     308            0 :     json_response(map_reqwest_hyper_status(status)?, progress)
     309            0 : }
     310              : 
     311            0 : async fn handle_tenant_delete(
     312            0 :     service: Arc<Service>,
     313            0 :     req: Request<Body>,
     314            0 : ) -> Result<Response<Body>, ApiError> {
     315            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     316            0 :     check_permissions(&req, Scope::PageServerApi)?;
     317              : 
     318            0 :     match maybe_forward(req).await {
     319            0 :         ForwardOutcome::Forwarded(res) => {
     320            0 :             return res;
     321              :         }
     322            0 :         ForwardOutcome::NotForwarded(_req) => {}
     323              :     };
     324              : 
     325            0 :     let status_code = service
     326            0 :         .tenant_delete(tenant_id)
     327            0 :         .await
     328            0 :         .and_then(map_reqwest_hyper_status)?;
     329              : 
     330            0 :     if status_code == StatusCode::NOT_FOUND {
     331              :         // The pageserver uses 404 for successful deletion, but we use 200
     332            0 :         json_response(StatusCode::OK, ())
     333              :     } else {
     334            0 :         json_response(status_code, ())
     335              :     }
     336            0 : }
     337              : 
     338            0 : async fn handle_tenant_timeline_create(
     339            0 :     service: Arc<Service>,
     340            0 :     req: Request<Body>,
     341            0 : ) -> Result<Response<Body>, ApiError> {
     342            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     343            0 :     check_permissions(&req, Scope::PageServerApi)?;
     344              : 
     345            0 :     let mut req = match maybe_forward(req).await {
     346            0 :         ForwardOutcome::Forwarded(res) => {
     347            0 :             return res;
     348              :         }
     349            0 :         ForwardOutcome::NotForwarded(req) => req,
     350              :     };
     351              : 
     352            0 :     let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
     353              :     json_response(
     354              :         StatusCode::CREATED,
     355            0 :         service
     356            0 :             .tenant_timeline_create(tenant_id, create_req)
     357            0 :             .await?,
     358              :     )
     359            0 : }
     360              : 
     361            0 : async fn handle_tenant_timeline_delete(
     362            0 :     service: Arc<Service>,
     363            0 :     req: Request<Body>,
     364            0 : ) -> Result<Response<Body>, ApiError> {
     365            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     366            0 :     let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
     367              : 
     368            0 :     check_permissions(&req, Scope::PageServerApi)?;
     369              : 
     370            0 :     match maybe_forward(req).await {
     371            0 :         ForwardOutcome::Forwarded(res) => {
     372            0 :             return res;
     373              :         }
     374            0 :         ForwardOutcome::NotForwarded(_req) => {}
     375              :     };
     376              : 
     377              :     // For timeline deletions, which both implement an "initially return 202, then 404 once
     378              :     // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
     379            0 :     async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
     380            0 :     where
     381            0 :         R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
     382            0 :         F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
     383            0 :     {
     384              :         // On subsequent retries, wait longer.
     385              :         // Enable callers with a 25 second request timeout to reliably get a response
     386              :         const MAX_WAIT: Duration = Duration::from_secs(25);
     387              :         const MAX_RETRY_PERIOD: Duration = Duration::from_secs(5);
     388              : 
     389            0 :         let started_at = Instant::now();
     390            0 : 
     391            0 :         // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
     392            0 :         // completed.
     393            0 :         let mut retry_period = Duration::from_secs(1);
     394              : 
     395              :         loop {
     396            0 :             let status = f(service.clone()).await?;
     397            0 :             match status {
     398              :                 StatusCode::ACCEPTED => {
     399            0 :                     tracing::info!("Deletion accepted, waiting to try again...");
     400            0 :                     tokio::time::sleep(retry_period).await;
     401            0 :                     retry_period = MAX_RETRY_PERIOD;
     402              :                 }
     403              :                 StatusCode::CONFLICT => {
     404            0 :                     tracing::info!("Deletion already in progress, waiting to try again...");
     405            0 :                     tokio::time::sleep(retry_period).await;
     406              :                 }
     407              :                 StatusCode::NOT_FOUND => {
     408            0 :                     tracing::info!("Deletion complete");
     409            0 :                     return json_response(StatusCode::OK, ());
     410              :                 }
     411              :                 _ => {
     412            0 :                     tracing::warn!("Unexpected status {status}");
     413            0 :                     return json_response(status, ());
     414              :                 }
     415              :             }
     416              : 
     417            0 :             let now = Instant::now();
     418            0 :             if now + retry_period > started_at + MAX_WAIT {
     419            0 :                 tracing::info!("Deletion timed out waiting for 404");
     420              :                 // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
     421              :                 // the pageserver's swagger definition for this endpoint, and has the same desired
     422              :                 // effect of causing the control plane to retry later.
     423            0 :                 return json_response(StatusCode::CONFLICT, ());
     424            0 :             }
     425              :         }
     426            0 :     }
     427              : 
     428            0 :     deletion_wrapper(service, move |service| async move {
     429            0 :         service
     430            0 :             .tenant_timeline_delete(tenant_id, timeline_id)
     431            0 :             .await
     432            0 :             .and_then(map_reqwest_hyper_status)
     433            0 :     })
     434            0 :     .await
     435            0 : }
     436              : 
     437            0 : async fn handle_tenant_timeline_archival_config(
     438            0 :     service: Arc<Service>,
     439            0 :     req: Request<Body>,
     440            0 : ) -> Result<Response<Body>, ApiError> {
     441            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     442            0 :     let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
     443              : 
     444            0 :     check_permissions(&req, Scope::PageServerApi)?;
     445              : 
     446            0 :     let mut req = match maybe_forward(req).await {
     447            0 :         ForwardOutcome::Forwarded(res) => {
     448            0 :             return res;
     449              :         }
     450            0 :         ForwardOutcome::NotForwarded(req) => req,
     451              :     };
     452              : 
     453            0 :     let create_req = json_request::<TimelineArchivalConfigRequest>(&mut req).await?;
     454              : 
     455            0 :     service
     456            0 :         .tenant_timeline_archival_config(tenant_id, timeline_id, create_req)
     457            0 :         .await?;
     458              : 
     459            0 :     json_response(StatusCode::OK, ())
     460            0 : }
     461              : 
     462            0 : async fn handle_tenant_timeline_detach_ancestor(
     463            0 :     service: Arc<Service>,
     464            0 :     req: Request<Body>,
     465            0 : ) -> Result<Response<Body>, ApiError> {
     466            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     467            0 :     let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
     468              : 
     469            0 :     check_permissions(&req, Scope::PageServerApi)?;
     470              : 
     471            0 :     match maybe_forward(req).await {
     472            0 :         ForwardOutcome::Forwarded(res) => {
     473            0 :             return res;
     474              :         }
     475            0 :         ForwardOutcome::NotForwarded(_req) => {}
     476              :     };
     477              : 
     478            0 :     let res = service
     479            0 :         .tenant_timeline_detach_ancestor(tenant_id, timeline_id)
     480            0 :         .await?;
     481              : 
     482            0 :     json_response(StatusCode::OK, res)
     483            0 : }
     484              : 
     485            0 : async fn handle_tenant_timeline_block_unblock_gc(
     486            0 :     service: Arc<Service>,
     487            0 :     req: Request<Body>,
     488            0 :     dir: BlockUnblock,
     489            0 : ) -> Result<Response<Body>, ApiError> {
     490            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     491            0 :     check_permissions(&req, Scope::PageServerApi)?;
     492              : 
     493            0 :     let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
     494              : 
     495            0 :     service
     496            0 :         .tenant_timeline_block_unblock_gc(tenant_id, timeline_id, dir)
     497            0 :         .await?;
     498              : 
     499            0 :     json_response(StatusCode::OK, ())
     500            0 : }
     501              : 
     502            0 : async fn handle_tenant_timeline_passthrough(
     503            0 :     service: Arc<Service>,
     504            0 :     req: Request<Body>,
     505            0 : ) -> Result<Response<Body>, ApiError> {
     506            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     507            0 :     check_permissions(&req, Scope::PageServerApi)?;
     508              : 
     509            0 :     let req = match maybe_forward(req).await {
     510            0 :         ForwardOutcome::Forwarded(res) => {
     511            0 :             return res;
     512              :         }
     513            0 :         ForwardOutcome::NotForwarded(req) => req,
     514              :     };
     515              : 
     516            0 :     let Some(path) = req.uri().path_and_query() else {
     517              :         // This should never happen, our request router only calls us if there is a path
     518            0 :         return Err(ApiError::BadRequest(anyhow::anyhow!("Missing path")));
     519              :     };
     520              : 
     521            0 :     tracing::info!("Proxying request for tenant {} ({})", tenant_id, path);
     522              : 
     523              :     // Find the node that holds shard zero
     524            0 :     let (node, tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?;
     525              : 
     526              :     // Callers will always pass an unsharded tenant ID.  Before proxying, we must
     527              :     // rewrite this to a shard-aware shard zero ID.
     528            0 :     let path = format!("{}", path);
     529            0 :     let tenant_str = tenant_id.to_string();
     530            0 :     let tenant_shard_str = format!("{}", tenant_shard_id);
     531            0 :     let path = path.replace(&tenant_str, &tenant_shard_str);
     532            0 : 
     533            0 :     let latency = &METRICS_REGISTRY
     534            0 :         .metrics_group
     535            0 :         .storage_controller_passthrough_request_latency;
     536            0 : 
     537            0 :     // This is a bit awkward. We remove the param from the request
     538            0 :     // and join the words by '_' to get a label for the request.
     539            0 :     let just_path = path.replace(&tenant_shard_str, "");
     540            0 :     let path_label = just_path
     541            0 :         .split('/')
     542            0 :         .filter(|token| !token.is_empty())
     543            0 :         .collect::<Vec<_>>()
     544            0 :         .join("_");
     545            0 :     let labels = PageserverRequestLabelGroup {
     546            0 :         pageserver_id: &node.get_id().to_string(),
     547            0 :         path: &path_label,
     548            0 :         method: crate::metrics::Method::Get,
     549            0 :     };
     550            0 : 
     551            0 :     let _timer = latency.start_timer(labels.clone());
     552            0 : 
     553            0 :     let client = mgmt_api::Client::new(node.base_url(), service.get_config().jwt_token.as_deref());
     554            0 :     let resp = client.get_raw(path).await.map_err(|e|
     555              :         // We return 503 here because if we can't successfully send a request to the pageserver,
     556              :         // either we aren't available or the pageserver is unavailable.
     557            0 :         ApiError::ResourceUnavailable(format!("Error sending pageserver API request to {node}: {e}").into()))?;
     558              : 
     559            0 :     if !resp.status().is_success() {
     560            0 :         let error_counter = &METRICS_REGISTRY
     561            0 :             .metrics_group
     562            0 :             .storage_controller_passthrough_request_error;
     563            0 :         error_counter.inc(labels);
     564            0 :     }
     565              : 
     566              :     // Transform 404 into 503 if we raced with a migration
     567            0 :     if resp.status() == reqwest::StatusCode::NOT_FOUND {
     568              :         // Look up node again: if we migrated it will be different
     569            0 :         let (new_node, _tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?;
     570            0 :         if new_node.get_id() != node.get_id() {
     571              :             // Rather than retry here, send the client a 503 to prompt a retry: this matches
     572              :             // the pageserver's use of 503, and all clients calling this API should retry on 503.
     573            0 :             return Err(ApiError::ResourceUnavailable(
     574            0 :                 format!("Pageserver {node} returned 404, was migrated to {new_node}").into(),
     575            0 :             ));
     576            0 :         }
     577            0 :     }
     578              : 
     579              :     // We have a reqest::Response, would like a http::Response
     580            0 :     let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?);
     581            0 :     for (k, v) in resp.headers() {
     582            0 :         builder = builder.header(k.as_str(), v.as_bytes());
     583            0 :     }
     584              : 
     585            0 :     let response = builder
     586            0 :         .body(Body::wrap_stream(resp.bytes_stream()))
     587            0 :         .map_err(|e| ApiError::InternalServerError(e.into()))?;
     588              : 
     589            0 :     Ok(response)
     590            0 : }
     591              : 
     592            0 : async fn handle_tenant_locate(
     593            0 :     service: Arc<Service>,
     594            0 :     req: Request<Body>,
     595            0 : ) -> Result<Response<Body>, ApiError> {
     596            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     597              : 
     598            0 :     check_permissions(&req, Scope::Admin)?;
     599              : 
     600            0 :     match maybe_forward(req).await {
     601            0 :         ForwardOutcome::Forwarded(res) => {
     602            0 :             return res;
     603              :         }
     604            0 :         ForwardOutcome::NotForwarded(_req) => {}
     605            0 :     };
     606            0 : 
     607            0 :     json_response(StatusCode::OK, service.tenant_locate(tenant_id)?)
     608            0 : }
     609              : 
     610            0 : async fn handle_tenant_describe(
     611            0 :     service: Arc<Service>,
     612            0 :     req: Request<Body>,
     613            0 : ) -> Result<Response<Body>, ApiError> {
     614            0 :     check_permissions(&req, Scope::Scrubber)?;
     615              : 
     616            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     617              : 
     618            0 :     match maybe_forward(req).await {
     619            0 :         ForwardOutcome::Forwarded(res) => {
     620            0 :             return res;
     621              :         }
     622            0 :         ForwardOutcome::NotForwarded(_req) => {}
     623            0 :     };
     624            0 : 
     625            0 :     json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
     626            0 : }
     627              : 
     628            0 : async fn handle_tenant_list(
     629            0 :     service: Arc<Service>,
     630            0 :     req: Request<Body>,
     631            0 : ) -> Result<Response<Body>, ApiError> {
     632            0 :     check_permissions(&req, Scope::Admin)?;
     633              : 
     634            0 :     match maybe_forward(req).await {
     635            0 :         ForwardOutcome::Forwarded(res) => {
     636            0 :             return res;
     637              :         }
     638            0 :         ForwardOutcome::NotForwarded(_req) => {}
     639            0 :     };
     640            0 : 
     641            0 :     json_response(StatusCode::OK, service.tenant_list())
     642            0 : }
     643              : 
     644            0 : async fn handle_node_register(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     645            0 :     check_permissions(&req, Scope::Infra)?;
     646              : 
     647            0 :     let mut req = match maybe_forward(req).await {
     648            0 :         ForwardOutcome::Forwarded(res) => {
     649            0 :             return res;
     650              :         }
     651            0 :         ForwardOutcome::NotForwarded(req) => req,
     652              :     };
     653              : 
     654            0 :     let register_req = json_request::<NodeRegisterRequest>(&mut req).await?;
     655            0 :     let state = get_state(&req);
     656            0 :     state.service.node_register(register_req).await?;
     657            0 :     json_response(StatusCode::OK, ())
     658            0 : }
     659              : 
     660            0 : async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     661            0 :     check_permissions(&req, Scope::Admin)?;
     662              : 
     663            0 :     let req = match maybe_forward(req).await {
     664            0 :         ForwardOutcome::Forwarded(res) => {
     665            0 :             return res;
     666              :         }
     667            0 :         ForwardOutcome::NotForwarded(req) => req,
     668            0 :     };
     669            0 : 
     670            0 :     let state = get_state(&req);
     671            0 :     let nodes = state.service.node_list().await?;
     672            0 :     let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
     673            0 : 
     674            0 :     json_response(StatusCode::OK, api_nodes)
     675            0 : }
     676              : 
     677            0 : async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     678            0 :     check_permissions(&req, Scope::Admin)?;
     679              : 
     680            0 :     let req = match maybe_forward(req).await {
     681            0 :         ForwardOutcome::Forwarded(res) => {
     682            0 :             return res;
     683              :         }
     684            0 :         ForwardOutcome::NotForwarded(req) => req,
     685            0 :     };
     686            0 : 
     687            0 :     let state = get_state(&req);
     688            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     689            0 :     json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
     690            0 : }
     691              : 
     692            0 : async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     693            0 :     check_permissions(&req, Scope::Admin)?;
     694              : 
     695            0 :     let req = match maybe_forward(req).await {
     696            0 :         ForwardOutcome::Forwarded(res) => {
     697            0 :             return res;
     698              :         }
     699            0 :         ForwardOutcome::NotForwarded(req) => req,
     700            0 :     };
     701            0 : 
     702            0 :     let state = get_state(&req);
     703            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     704            0 :     json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
     705            0 : }
     706              : 
     707            0 : async fn handle_node_configure(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     708            0 :     check_permissions(&req, Scope::Admin)?;
     709              : 
     710            0 :     let mut req = match maybe_forward(req).await {
     711            0 :         ForwardOutcome::Forwarded(res) => {
     712            0 :             return res;
     713              :         }
     714            0 :         ForwardOutcome::NotForwarded(req) => req,
     715              :     };
     716              : 
     717            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     718            0 :     let config_req = json_request::<NodeConfigureRequest>(&mut req).await?;
     719            0 :     if node_id != config_req.node_id {
     720            0 :         return Err(ApiError::BadRequest(anyhow::anyhow!(
     721            0 :             "Path and body node_id differ"
     722            0 :         )));
     723            0 :     }
     724            0 :     let state = get_state(&req);
     725            0 : 
     726            0 :     json_response(
     727            0 :         StatusCode::OK,
     728            0 :         state
     729            0 :             .service
     730            0 :             .external_node_configure(
     731            0 :                 config_req.node_id,
     732            0 :                 config_req.availability.map(NodeAvailability::from),
     733            0 :                 config_req.scheduling,
     734            0 :             )
     735            0 :             .await?,
     736              :     )
     737            0 : }
     738              : 
     739            0 : async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     740            0 :     check_permissions(&req, Scope::Admin)?;
     741              : 
     742            0 :     let req = match maybe_forward(req).await {
     743            0 :         ForwardOutcome::Forwarded(res) => {
     744            0 :             return res;
     745              :         }
     746            0 :         ForwardOutcome::NotForwarded(req) => req,
     747            0 :     };
     748            0 : 
     749            0 :     let state = get_state(&req);
     750            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     751              : 
     752            0 :     let node_status = state.service.get_node(node_id).await?;
     753              : 
     754            0 :     json_response(StatusCode::OK, node_status)
     755            0 : }
     756              : 
     757            0 : async fn handle_node_shards(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     758            0 :     check_permissions(&req, Scope::Admin)?;
     759              : 
     760            0 :     let state = get_state(&req);
     761            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     762              : 
     763            0 :     let node_status = state.service.get_node_shards(node_id).await?;
     764              : 
     765            0 :     json_response(StatusCode::OK, node_status)
     766            0 : }
     767              : 
     768            0 : async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     769            0 :     check_permissions(&req, Scope::Admin)?;
     770              : 
     771            0 :     let req = match maybe_forward(req).await {
     772            0 :         ForwardOutcome::Forwarded(res) => {
     773            0 :             return res;
     774              :         }
     775            0 :         ForwardOutcome::NotForwarded(req) => req,
     776            0 :     };
     777            0 : 
     778            0 :     let state = get_state(&req);
     779            0 :     let leader = state.service.get_leader().await.map_err(|err| {
     780            0 :         ApiError::InternalServerError(anyhow::anyhow!(
     781            0 :             "Failed to read leader from database: {err}"
     782            0 :         ))
     783            0 :     })?;
     784              : 
     785            0 :     json_response(StatusCode::OK, leader)
     786            0 : }
     787              : 
     788            0 : async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     789            0 :     check_permissions(&req, Scope::Admin)?;
     790              : 
     791            0 :     let req = match maybe_forward(req).await {
     792            0 :         ForwardOutcome::Forwarded(res) => {
     793            0 :             return res;
     794              :         }
     795            0 :         ForwardOutcome::NotForwarded(req) => req,
     796            0 :     };
     797            0 : 
     798            0 :     let state = get_state(&req);
     799            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     800              : 
     801            0 :     state.service.start_node_drain(node_id).await?;
     802              : 
     803            0 :     json_response(StatusCode::ACCEPTED, ())
     804            0 : }
     805              : 
     806            0 : async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     807            0 :     check_permissions(&req, Scope::Admin)?;
     808              : 
     809            0 :     let req = match maybe_forward(req).await {
     810            0 :         ForwardOutcome::Forwarded(res) => {
     811            0 :             return res;
     812              :         }
     813            0 :         ForwardOutcome::NotForwarded(req) => req,
     814            0 :     };
     815            0 : 
     816            0 :     let state = get_state(&req);
     817            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     818              : 
     819            0 :     state.service.cancel_node_drain(node_id).await?;
     820              : 
     821            0 :     json_response(StatusCode::ACCEPTED, ())
     822            0 : }
     823              : 
     824            0 : async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     825            0 :     check_permissions(&req, Scope::Admin)?;
     826              : 
     827            0 :     let req = match maybe_forward(req).await {
     828            0 :         ForwardOutcome::Forwarded(res) => {
     829            0 :             return res;
     830              :         }
     831            0 :         ForwardOutcome::NotForwarded(req) => req,
     832            0 :     };
     833            0 : 
     834            0 :     let state = get_state(&req);
     835            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     836              : 
     837            0 :     state.service.start_node_fill(node_id).await?;
     838              : 
     839            0 :     json_response(StatusCode::ACCEPTED, ())
     840            0 : }
     841              : 
     842            0 : async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     843            0 :     check_permissions(&req, Scope::Admin)?;
     844              : 
     845            0 :     let req = match maybe_forward(req).await {
     846            0 :         ForwardOutcome::Forwarded(res) => {
     847            0 :             return res;
     848              :         }
     849            0 :         ForwardOutcome::NotForwarded(req) => req,
     850            0 :     };
     851            0 : 
     852            0 :     let state = get_state(&req);
     853            0 :     let node_id: NodeId = parse_request_param(&req, "node_id")?;
     854              : 
     855            0 :     state.service.cancel_node_fill(node_id).await?;
     856              : 
     857            0 :     json_response(StatusCode::ACCEPTED, ())
     858            0 : }
     859              : 
     860            0 : async fn handle_metadata_health_update(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     861            0 :     check_permissions(&req, Scope::Scrubber)?;
     862              : 
     863            0 :     let mut req = match maybe_forward(req).await {
     864            0 :         ForwardOutcome::Forwarded(res) => {
     865            0 :             return res;
     866              :         }
     867            0 :         ForwardOutcome::NotForwarded(req) => req,
     868              :     };
     869              : 
     870            0 :     let update_req = json_request::<MetadataHealthUpdateRequest>(&mut req).await?;
     871            0 :     let state = get_state(&req);
     872            0 : 
     873            0 :     state.service.metadata_health_update(update_req).await?;
     874              : 
     875            0 :     json_response(StatusCode::OK, MetadataHealthUpdateResponse {})
     876            0 : }
     877              : 
     878            0 : async fn handle_metadata_health_list_unhealthy(
     879            0 :     req: Request<Body>,
     880            0 : ) -> Result<Response<Body>, ApiError> {
     881            0 :     check_permissions(&req, Scope::Admin)?;
     882              : 
     883            0 :     let req = match maybe_forward(req).await {
     884            0 :         ForwardOutcome::Forwarded(res) => {
     885            0 :             return res;
     886              :         }
     887            0 :         ForwardOutcome::NotForwarded(req) => req,
     888            0 :     };
     889            0 : 
     890            0 :     let state = get_state(&req);
     891            0 :     let unhealthy_tenant_shards = state.service.metadata_health_list_unhealthy().await?;
     892              : 
     893            0 :     json_response(
     894            0 :         StatusCode::OK,
     895            0 :         MetadataHealthListUnhealthyResponse {
     896            0 :             unhealthy_tenant_shards,
     897            0 :         },
     898            0 :     )
     899            0 : }
     900              : 
     901            0 : async fn handle_metadata_health_list_outdated(
     902            0 :     req: Request<Body>,
     903            0 : ) -> Result<Response<Body>, ApiError> {
     904            0 :     check_permissions(&req, Scope::Admin)?;
     905              : 
     906            0 :     let mut req = match maybe_forward(req).await {
     907            0 :         ForwardOutcome::Forwarded(res) => {
     908            0 :             return res;
     909              :         }
     910            0 :         ForwardOutcome::NotForwarded(req) => req,
     911              :     };
     912              : 
     913            0 :     let list_outdated_req = json_request::<MetadataHealthListOutdatedRequest>(&mut req).await?;
     914            0 :     let state = get_state(&req);
     915            0 :     let health_records = state
     916            0 :         .service
     917            0 :         .metadata_health_list_outdated(list_outdated_req.not_scrubbed_for)
     918            0 :         .await?;
     919              : 
     920            0 :     json_response(
     921            0 :         StatusCode::OK,
     922            0 :         MetadataHealthListOutdatedResponse { health_records },
     923            0 :     )
     924            0 : }
     925              : 
     926            0 : async fn handle_tenant_shard_split(
     927            0 :     service: Arc<Service>,
     928            0 :     req: Request<Body>,
     929            0 : ) -> Result<Response<Body>, ApiError> {
     930            0 :     check_permissions(&req, Scope::Admin)?;
     931              : 
     932            0 :     let mut req = match maybe_forward(req).await {
     933            0 :         ForwardOutcome::Forwarded(res) => {
     934            0 :             return res;
     935              :         }
     936            0 :         ForwardOutcome::NotForwarded(req) => req,
     937              :     };
     938              : 
     939            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     940            0 :     let split_req = json_request::<TenantShardSplitRequest>(&mut req).await?;
     941              : 
     942              :     json_response(
     943              :         StatusCode::OK,
     944            0 :         service.tenant_shard_split(tenant_id, split_req).await?,
     945              :     )
     946            0 : }
     947              : 
     948            0 : async fn handle_tenant_shard_migrate(
     949            0 :     service: Arc<Service>,
     950            0 :     req: Request<Body>,
     951            0 : ) -> Result<Response<Body>, ApiError> {
     952            0 :     check_permissions(&req, Scope::Admin)?;
     953              : 
     954            0 :     let mut req = match maybe_forward(req).await {
     955            0 :         ForwardOutcome::Forwarded(res) => {
     956            0 :             return res;
     957              :         }
     958            0 :         ForwardOutcome::NotForwarded(req) => req,
     959              :     };
     960              : 
     961            0 :     let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
     962            0 :     let migrate_req = json_request::<TenantShardMigrateRequest>(&mut req).await?;
     963              :     json_response(
     964              :         StatusCode::OK,
     965            0 :         service
     966            0 :             .tenant_shard_migrate(tenant_shard_id, migrate_req)
     967            0 :             .await?,
     968              :     )
     969            0 : }
     970              : 
     971            0 : async fn handle_tenant_update_policy(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     972            0 :     check_permissions(&req, Scope::Admin)?;
     973              : 
     974            0 :     let mut req = match maybe_forward(req).await {
     975            0 :         ForwardOutcome::Forwarded(res) => {
     976            0 :             return res;
     977              :         }
     978            0 :         ForwardOutcome::NotForwarded(req) => req,
     979              :     };
     980              : 
     981            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
     982            0 :     let update_req = json_request::<TenantPolicyRequest>(&mut req).await?;
     983            0 :     let state = get_state(&req);
     984            0 : 
     985            0 :     json_response(
     986            0 :         StatusCode::OK,
     987            0 :         state
     988            0 :             .service
     989            0 :             .tenant_update_policy(tenant_id, update_req)
     990            0 :             .await?,
     991              :     )
     992            0 : }
     993              : 
     994            0 : async fn handle_update_preferred_azs(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     995            0 :     check_permissions(&req, Scope::Admin)?;
     996              : 
     997            0 :     let mut req = match maybe_forward(req).await {
     998            0 :         ForwardOutcome::Forwarded(res) => {
     999            0 :             return res;
    1000              :         }
    1001            0 :         ForwardOutcome::NotForwarded(req) => req,
    1002              :     };
    1003              : 
    1004            0 :     let azs_req = json_request::<ShardsPreferredAzsRequest>(&mut req).await?;
    1005            0 :     let state = get_state(&req);
    1006            0 : 
    1007            0 :     json_response(
    1008            0 :         StatusCode::OK,
    1009            0 :         state.service.update_shards_preferred_azs(azs_req).await?,
    1010              :     )
    1011            0 : }
    1012              : 
    1013            0 : async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1014            0 :     check_permissions(&req, Scope::Admin)?;
    1015              : 
    1016            0 :     let req = match maybe_forward(req).await {
    1017            0 :         ForwardOutcome::Forwarded(res) => {
    1018            0 :             return res;
    1019              :         }
    1020            0 :         ForwardOutcome::NotForwarded(req) => req,
    1021            0 :     };
    1022            0 : 
    1023            0 :     let state = get_state(&req);
    1024            0 :     json_response(StatusCode::OK, state.service.step_down().await)
    1025            0 : }
    1026              : 
    1027            0 : async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1028            0 :     check_permissions(&req, Scope::PageServerApi)?;
    1029              : 
    1030            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    1031              : 
    1032            0 :     let req = match maybe_forward(req).await {
    1033            0 :         ForwardOutcome::Forwarded(res) => {
    1034            0 :             return res;
    1035              :         }
    1036            0 :         ForwardOutcome::NotForwarded(req) => req,
    1037            0 :     };
    1038            0 : 
    1039            0 :     let state = get_state(&req);
    1040            0 : 
    1041            0 :     json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
    1042            0 : }
    1043              : 
    1044            0 : async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1045            0 :     check_permissions(&req, Scope::PageServerApi)?;
    1046              : 
    1047            0 :     let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    1048              : 
    1049            0 :     let req = match maybe_forward(req).await {
    1050            0 :         ForwardOutcome::Forwarded(res) => {
    1051            0 :             return res;
    1052              :         }
    1053            0 :         ForwardOutcome::NotForwarded(req) => req,
    1054            0 :     };
    1055            0 : 
    1056            0 :     let state = get_state(&req);
    1057            0 : 
    1058            0 :     json_response(
    1059            0 :         StatusCode::OK,
    1060            0 :         state.service.tenant_import(tenant_id).await?,
    1061              :     )
    1062            0 : }
    1063              : 
    1064            0 : async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1065            0 :     check_permissions(&req, Scope::Admin)?;
    1066              : 
    1067            0 :     let req = match maybe_forward(req).await {
    1068            0 :         ForwardOutcome::Forwarded(res) => {
    1069            0 :             return res;
    1070              :         }
    1071            0 :         ForwardOutcome::NotForwarded(req) => req,
    1072            0 :     };
    1073            0 : 
    1074            0 :     let state = get_state(&req);
    1075            0 :     state.service.tenants_dump()
    1076            0 : }
    1077              : 
    1078            0 : async fn handle_scheduler_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1079            0 :     check_permissions(&req, Scope::Admin)?;
    1080              : 
    1081            0 :     let req = match maybe_forward(req).await {
    1082            0 :         ForwardOutcome::Forwarded(res) => {
    1083            0 :             return res;
    1084              :         }
    1085            0 :         ForwardOutcome::NotForwarded(req) => req,
    1086            0 :     };
    1087            0 : 
    1088            0 :     let state = get_state(&req);
    1089            0 :     state.service.scheduler_dump()
    1090            0 : }
    1091              : 
    1092            0 : async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1093            0 :     check_permissions(&req, Scope::Admin)?;
    1094              : 
    1095            0 :     let req = match maybe_forward(req).await {
    1096            0 :         ForwardOutcome::Forwarded(res) => {
    1097            0 :             return res;
    1098              :         }
    1099            0 :         ForwardOutcome::NotForwarded(req) => req,
    1100            0 :     };
    1101            0 : 
    1102            0 :     let state = get_state(&req);
    1103            0 : 
    1104            0 :     json_response(StatusCode::OK, state.service.consistency_check().await?)
    1105            0 : }
    1106              : 
    1107            0 : async fn handle_reconcile_all(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1108            0 :     check_permissions(&req, Scope::Admin)?;
    1109              : 
    1110            0 :     let req = match maybe_forward(req).await {
    1111            0 :         ForwardOutcome::Forwarded(res) => {
    1112            0 :             return res;
    1113              :         }
    1114            0 :         ForwardOutcome::NotForwarded(req) => req,
    1115            0 :     };
    1116            0 : 
    1117            0 :     let state = get_state(&req);
    1118            0 : 
    1119            0 :     json_response(StatusCode::OK, state.service.reconcile_all_now().await?)
    1120            0 : }
    1121              : 
    1122              : /// Status endpoint is just used for checking that our HTTP listener is up
    1123            0 : async fn handle_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1124            0 :     match maybe_forward(req).await {
    1125            0 :         ForwardOutcome::Forwarded(res) => {
    1126            0 :             return res;
    1127              :         }
    1128            0 :         ForwardOutcome::NotForwarded(_req) => {}
    1129            0 :     };
    1130            0 : 
    1131            0 :     json_response(StatusCode::OK, ())
    1132            0 : }
    1133              : 
    1134              : /// Readiness endpoint indicates when we're done doing startup I/O (e.g. reconciling
    1135              : /// with remote pageserver nodes).  This is intended for use as a kubernetes readiness probe.
    1136            0 : async fn handle_ready(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1137            0 :     let req = match maybe_forward(req).await {
    1138            0 :         ForwardOutcome::Forwarded(res) => {
    1139            0 :             return res;
    1140              :         }
    1141            0 :         ForwardOutcome::NotForwarded(req) => req,
    1142            0 :     };
    1143            0 : 
    1144            0 :     let state = get_state(&req);
    1145            0 :     if state.service.startup_complete.is_ready() {
    1146            0 :         json_response(StatusCode::OK, ())
    1147              :     } else {
    1148            0 :         json_response(StatusCode::SERVICE_UNAVAILABLE, ())
    1149              :     }
    1150            0 : }
    1151              : 
    1152              : impl From<ReconcileError> for ApiError {
    1153            0 :     fn from(value: ReconcileError) -> Self {
    1154            0 :         ApiError::Conflict(format!("Reconciliation error: {}", value))
    1155            0 :     }
    1156              : }
    1157              : 
    1158              : /// Return the safekeeper record by instance id, or 404.
    1159              : ///
    1160              : /// Not used by anything except manual testing.
    1161            0 : async fn handle_get_safekeeper(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1162            0 :     check_permissions(&req, Scope::Admin)?;
    1163              : 
    1164            0 :     let id = parse_request_param::<i64>(&req, "id")?;
    1165              : 
    1166            0 :     let req = match maybe_forward(req).await {
    1167            0 :         ForwardOutcome::Forwarded(res) => {
    1168            0 :             return res;
    1169              :         }
    1170            0 :         ForwardOutcome::NotForwarded(req) => req,
    1171            0 :     };
    1172            0 : 
    1173            0 :     let state = get_state(&req);
    1174              : 
    1175            0 :     let res = state.service.get_safekeeper(id).await;
    1176              : 
    1177            0 :     match res {
    1178            0 :         Ok(b) => json_response(StatusCode::OK, b),
    1179              :         Err(crate::persistence::DatabaseError::Query(diesel::result::Error::NotFound)) => {
    1180            0 :             Err(ApiError::NotFound("unknown instance_id".into()))
    1181              :         }
    1182            0 :         Err(other) => Err(other.into()),
    1183              :     }
    1184            0 : }
    1185              : 
    1186              : /// Used as part of deployment scripts.
    1187              : ///
    1188              : /// Assumes information is only relayed to storage controller after first selecting an unique id on
    1189              : /// control plane database, which means we have an id field in the request and payload.
    1190            0 : async fn handle_upsert_safekeeper(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1191            0 :     check_permissions(&req, Scope::Infra)?;
    1192              : 
    1193            0 :     let body = json_request::<SafekeeperPersistence>(&mut req).await?;
    1194            0 :     let id = parse_request_param::<i64>(&req, "id")?;
    1195              : 
    1196            0 :     if id != body.id {
    1197              :         // it should be repeated
    1198            0 :         return Err(ApiError::BadRequest(anyhow::anyhow!(
    1199            0 :             "id mismatch: url={id:?}, body={:?}",
    1200            0 :             body.id
    1201            0 :         )));
    1202            0 :     }
    1203              : 
    1204            0 :     let req = match maybe_forward(req).await {
    1205            0 :         ForwardOutcome::Forwarded(res) => {
    1206            0 :             return res;
    1207              :         }
    1208            0 :         ForwardOutcome::NotForwarded(req) => req,
    1209            0 :     };
    1210            0 : 
    1211            0 :     let state = get_state(&req);
    1212            0 : 
    1213            0 :     state.service.upsert_safekeeper(body).await?;
    1214              : 
    1215            0 :     Ok(Response::builder()
    1216            0 :         .status(StatusCode::NO_CONTENT)
    1217            0 :         .body(Body::empty())
    1218            0 :         .unwrap())
    1219            0 : }
    1220              : 
    1221              : /// Common wrapper for request handlers that call into Service and will operate on tenants: they must only
    1222              : /// be allowed to run if Service has finished its initial reconciliation.
    1223            0 : async fn tenant_service_handler<R, H>(
    1224            0 :     request: Request<Body>,
    1225            0 :     handler: H,
    1226            0 :     request_name: RequestName,
    1227            0 : ) -> R::Output
    1228            0 : where
    1229            0 :     R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
    1230            0 :     H: FnOnce(Arc<Service>, Request<Body>) -> R + Send + Sync + 'static,
    1231            0 : {
    1232            0 :     let state = get_state(&request);
    1233            0 :     let service = state.service.clone();
    1234            0 : 
    1235            0 :     let startup_complete = service.startup_complete.clone();
    1236            0 :     if tokio::time::timeout(STARTUP_RECONCILE_TIMEOUT, startup_complete.wait())
    1237            0 :         .await
    1238            0 :         .is_err()
    1239              :     {
    1240              :         // This shouldn't happen: it is the responsibilty of [`Service::startup_reconcile`] to use appropriate
    1241              :         // timeouts around its remote calls, to bound its runtime.
    1242            0 :         return Err(ApiError::Timeout(
    1243            0 :             "Timed out waiting for service readiness".into(),
    1244            0 :         ));
    1245            0 :     }
    1246            0 : 
    1247            0 :     named_request_span(
    1248            0 :         request,
    1249            0 :         |request| async move { handler(service, request).await },
    1250            0 :         request_name,
    1251            0 :     )
    1252            0 :     .await
    1253            0 : }
    1254              : 
    1255              : /// Check if the required scope is held in the request's token, or if the request has
    1256              : /// a token with 'admin' scope then always permit it.
    1257            0 : fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
    1258            0 :     check_permission_with(request, |claims| {
    1259            0 :         match crate::auth::check_permission(claims, required_scope) {
    1260            0 :             Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
    1261            0 :                 Ok(()) => Ok(()),
    1262            0 :                 Err(_) => Err(e),
    1263              :             },
    1264            0 :             Ok(()) => Ok(()),
    1265              :         }
    1266            0 :     })
    1267            0 : }
    1268              : 
    1269              : #[derive(Clone, Debug)]
    1270              : struct RequestMeta {
    1271              :     method: hyper::http::Method,
    1272              :     at: Instant,
    1273              : }
    1274              : 
    1275            0 : pub fn prologue_leadership_status_check_middleware<
    1276            0 :     B: hyper::body::HttpBody + Send + Sync + 'static,
    1277            0 : >() -> Middleware<B, ApiError> {
    1278            0 :     Middleware::pre(move |req| async move {
    1279            0 :         let state = get_state(&req);
    1280            0 :         let leadership_status = state.service.get_leadership_status();
    1281              : 
    1282              :         enum AllowedRoutes<'a> {
    1283              :             All,
    1284              :             Some(Vec<&'a str>),
    1285              :         }
    1286              : 
    1287            0 :         let allowed_routes = match leadership_status {
    1288            0 :             LeadershipStatus::Leader => AllowedRoutes::All,
    1289            0 :             LeadershipStatus::SteppedDown => AllowedRoutes::All,
    1290              :             LeadershipStatus::Candidate => {
    1291            0 :                 AllowedRoutes::Some(["/ready", "/status", "/metrics"].to_vec())
    1292              :             }
    1293              :         };
    1294              : 
    1295            0 :         let uri = req.uri().to_string();
    1296            0 :         match allowed_routes {
    1297            0 :             AllowedRoutes::All => Ok(req),
    1298            0 :             AllowedRoutes::Some(allowed) if allowed.contains(&uri.as_str()) => Ok(req),
    1299              :             _ => {
    1300            0 :                 tracing::info!(
    1301            0 :                     "Request {} not allowed due to current leadership state",
    1302            0 :                     req.uri()
    1303              :                 );
    1304              : 
    1305            0 :                 Err(ApiError::ResourceUnavailable(
    1306            0 :                     format!("Current leadership status is {leadership_status}").into(),
    1307            0 :                 ))
    1308              :             }
    1309              :         }
    1310            0 :     })
    1311            0 : }
    1312              : 
    1313            0 : fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
    1314            0 : ) -> Middleware<B, ApiError> {
    1315            0 :     Middleware::pre(move |req| async move {
    1316            0 :         let meta = RequestMeta {
    1317            0 :             method: req.method().clone(),
    1318            0 :             at: Instant::now(),
    1319            0 :         };
    1320            0 : 
    1321            0 :         req.set_context(meta);
    1322            0 : 
    1323            0 :         Ok(req)
    1324            0 :     })
    1325            0 : }
    1326              : 
    1327            0 : fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
    1328            0 : ) -> Middleware<B, ApiError> {
    1329            0 :     Middleware::post_with_info(move |resp, req_info| async move {
    1330            0 :         let request_name = match req_info.context::<RequestName>() {
    1331            0 :             Some(name) => name,
    1332              :             None => {
    1333            0 :                 return Ok(resp);
    1334              :             }
    1335              :         };
    1336              : 
    1337            0 :         if let Some(meta) = req_info.context::<RequestMeta>() {
    1338            0 :             let status = &crate::metrics::METRICS_REGISTRY
    1339            0 :                 .metrics_group
    1340            0 :                 .storage_controller_http_request_status;
    1341            0 :             let latency = &crate::metrics::METRICS_REGISTRY
    1342            0 :                 .metrics_group
    1343            0 :                 .storage_controller_http_request_latency;
    1344            0 : 
    1345            0 :             status.inc(HttpRequestStatusLabelGroup {
    1346            0 :                 path: request_name.0,
    1347            0 :                 method: meta.method.clone().into(),
    1348            0 :                 status: crate::metrics::StatusCode(resp.status()),
    1349            0 :             });
    1350            0 : 
    1351            0 :             latency.observe(
    1352            0 :                 HttpRequestLatencyLabelGroup {
    1353            0 :                     path: request_name.0,
    1354            0 :                     method: meta.method.into(),
    1355            0 :                 },
    1356            0 :                 meta.at.elapsed().as_secs_f64(),
    1357            0 :             );
    1358            0 :         }
    1359            0 :         Ok(resp)
    1360            0 :     })
    1361            0 : }
    1362              : 
    1363            0 : pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    1364              :     pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";
    1365              : 
    1366            0 :     let req = match maybe_forward(req).await {
    1367            0 :         ForwardOutcome::Forwarded(res) => {
    1368            0 :             return res;
    1369              :         }
    1370            0 :         ForwardOutcome::NotForwarded(req) => req,
    1371            0 :     };
    1372            0 : 
    1373            0 :     let state = get_state(&req);
    1374            0 :     let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
    1375            0 :     let response = Response::builder()
    1376            0 :         .status(200)
    1377            0 :         .header(CONTENT_TYPE, TEXT_FORMAT)
    1378            0 :         .body(payload.into())
    1379            0 :         .unwrap();
    1380            0 : 
    1381            0 :     Ok(response)
    1382            0 : }
    1383              : 
    1384              : #[derive(Clone)]
    1385              : struct RequestName(&'static str);
    1386              : 
    1387            0 : async fn named_request_span<R, H>(
    1388            0 :     request: Request<Body>,
    1389            0 :     handler: H,
    1390            0 :     name: RequestName,
    1391            0 : ) -> R::Output
    1392            0 : where
    1393            0 :     R: Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
    1394            0 :     H: FnOnce(Request<Body>) -> R + Send + Sync + 'static,
    1395            0 : {
    1396            0 :     request.set_context(name);
    1397            0 :     request_span(request, handler).await
    1398            0 : }
    1399              : 
    1400              : enum ForwardOutcome {
    1401              :     Forwarded(Result<Response<Body>, ApiError>),
    1402              :     NotForwarded(Request<Body>),
    1403              : }
    1404              : 
    1405              : /// Potentially forward the request to the current storage controler leader.
    1406              : /// More specifically we forward when:
    1407              : /// 1. Request is not one of ["/control/v1/step_down", "/status", "/ready", "/metrics"]
    1408              : /// 2. Current instance is in [`LeadershipStatus::SteppedDown`] state
    1409              : /// 3. There is a leader in the database to forward to
    1410              : /// 4. Leader from step (3) is not the current instance
    1411              : ///
    1412              : /// Why forward?
    1413              : /// It turns out that we can't rely on external orchestration to promptly route trafic to the
    1414              : /// new leader. This is downtime inducing. Forwarding provides a safe way out.
    1415              : ///
    1416              : /// Why is it safe?
    1417              : /// If a storcon instance is persisted in the database, then we know that it is the current leader.
    1418              : /// There's one exception: time between handling step-down request and the new leader updating the
    1419              : /// database.
    1420              : ///
    1421              : /// Let's treat the happy case first. The stepped down node does not produce any side effects,
    1422              : /// since all request handling happens on the leader.
    1423              : ///
    1424              : /// As for the edge case, we are guaranteed to always have a maximum of two running instances.
    1425              : /// Hence, if we are in the edge case scenario the leader persisted in the database is the
    1426              : /// stepped down instance that received the request. Condition (4) above covers this scenario.
    1427            0 : async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
    1428              :     const NOT_FOR_FORWARD: [&str; 4] = ["/control/v1/step_down", "/status", "/ready", "/metrics"];
    1429              : 
    1430            0 :     let uri = req.uri().to_string();
    1431            0 :     let uri_for_forward = !NOT_FOR_FORWARD.contains(&uri.as_str());
    1432            0 : 
    1433            0 :     let state = get_state(&req);
    1434            0 :     let leadership_status = state.service.get_leadership_status();
    1435            0 : 
    1436            0 :     if leadership_status != LeadershipStatus::SteppedDown || !uri_for_forward {
    1437            0 :         return ForwardOutcome::NotForwarded(req);
    1438            0 :     }
    1439              : 
    1440            0 :     let leader = state.service.get_leader().await;
    1441            0 :     let leader = {
    1442            0 :         match leader {
    1443            0 :             Ok(Some(leader)) => leader,
    1444              :             Ok(None) => {
    1445            0 :                 return ForwardOutcome::Forwarded(Err(ApiError::ResourceUnavailable(
    1446            0 :                     "No leader to forward to while in stepped down state".into(),
    1447            0 :                 )));
    1448              :             }
    1449            0 :             Err(err) => {
    1450            0 :                 return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(
    1451            0 :                     anyhow::anyhow!(
    1452            0 :                         "Failed to get leader for forwarding while in stepped down state: {err}"
    1453            0 :                     ),
    1454            0 :                 )));
    1455              :             }
    1456              :         }
    1457              :     };
    1458              : 
    1459            0 :     let cfg = state.service.get_config();
    1460            0 :     if let Some(ref self_addr) = cfg.address_for_peers {
    1461            0 :         let leader_addr = match Uri::from_str(leader.address.as_str()) {
    1462            0 :             Ok(uri) => uri,
    1463            0 :             Err(err) => {
    1464            0 :                 return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(
    1465            0 :                     anyhow::anyhow!(
    1466            0 :                     "Failed to parse leader uri for forwarding while in stepped down state: {err}"
    1467            0 :                 ),
    1468            0 :                 )));
    1469              :             }
    1470              :         };
    1471              : 
    1472            0 :         if *self_addr == leader_addr {
    1473            0 :             return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
    1474            0 :                 "Leader is stepped down instance"
    1475            0 :             ))));
    1476            0 :         }
    1477            0 :     }
    1478              : 
    1479            0 :     tracing::info!("Forwarding {} to leader at {}", uri, leader.address);
    1480              : 
    1481              :     // Use [`RECONCILE_TIMEOUT`] as the max amount of time a request should block for and
    1482              :     // include some leeway to get the timeout for proxied requests.
    1483              :     const PROXIED_REQUEST_TIMEOUT: Duration = Duration::from_secs(RECONCILE_TIMEOUT.as_secs() + 10);
    1484            0 :     let client = reqwest::ClientBuilder::new()
    1485            0 :         .timeout(PROXIED_REQUEST_TIMEOUT)
    1486            0 :         .build();
    1487            0 :     let client = match client {
    1488            0 :         Ok(client) => client,
    1489            0 :         Err(err) => {
    1490            0 :             return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
    1491            0 :                 "Failed to build leader client for forwarding while in stepped down state: {err}"
    1492            0 :             ))));
    1493              :         }
    1494              :     };
    1495              : 
    1496            0 :     let request: reqwest::Request = match convert_request(req, &client, leader.address).await {
    1497            0 :         Ok(r) => r,
    1498            0 :         Err(err) => {
    1499            0 :             return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
    1500            0 :                 "Failed to convert request for forwarding while in stepped down state: {err}"
    1501            0 :             ))));
    1502              :         }
    1503              :     };
    1504              : 
    1505            0 :     let response = match client.execute(request).await {
    1506            0 :         Ok(r) => r,
    1507            0 :         Err(err) => {
    1508            0 :             return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
    1509            0 :                 "Failed to forward while in stepped down state: {err}"
    1510            0 :             ))));
    1511              :         }
    1512              :     };
    1513              : 
    1514            0 :     ForwardOutcome::Forwarded(convert_response(response).await)
    1515            0 : }
    1516              : 
    1517              : /// Convert a [`reqwest::Response`] to a [hyper::Response`] by passing through
    1518              : /// a stable representation (string, bytes or integer)
    1519              : ///
    1520              : /// Ideally, we would not have to do this since both types use the http crate
    1521              : /// under the hood. However, they use different versions of the crate and keeping
    1522              : /// second order dependencies in sync is difficult.
    1523            0 : async fn convert_response(resp: reqwest::Response) -> Result<hyper::Response<Body>, ApiError> {
    1524              :     use std::str::FromStr;
    1525              : 
    1526            0 :     let mut builder = hyper::Response::builder().status(resp.status().as_u16());
    1527            0 :     for (key, value) in resp.headers().into_iter() {
    1528            0 :         let key = hyper::header::HeaderName::from_str(key.as_str()).map_err(|err| {
    1529            0 :             ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
    1530            0 :         })?;
    1531              : 
    1532            0 :         let value = hyper::header::HeaderValue::from_bytes(value.as_bytes()).map_err(|err| {
    1533            0 :             ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
    1534            0 :         })?;
    1535              : 
    1536            0 :         builder = builder.header(key, value);
    1537              :     }
    1538              : 
    1539            0 :     let body = http::Body::wrap_stream(resp.bytes_stream());
    1540            0 : 
    1541            0 :     builder.body(body).map_err(|err| {
    1542            0 :         ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
    1543            0 :     })
    1544            0 : }
    1545              : 
    1546              : /// Convert a [`reqwest::Request`] to a [hyper::Request`] by passing through
    1547              : /// a stable representation (string, bytes or integer)
    1548              : ///
    1549              : /// See [`convert_response`] for why we are doing it this way.
    1550            0 : async fn convert_request(
    1551            0 :     req: hyper::Request<Body>,
    1552            0 :     client: &reqwest::Client,
    1553            0 :     to_address: String,
    1554            0 : ) -> Result<reqwest::Request, ApiError> {
    1555              :     use std::str::FromStr;
    1556              : 
    1557            0 :     let (parts, body) = req.into_parts();
    1558            0 :     let method = reqwest::Method::from_str(parts.method.as_str()).map_err(|err| {
    1559            0 :         ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1560            0 :     })?;
    1561              : 
    1562            0 :     let path_and_query = parts.uri.path_and_query().ok_or_else(|| {
    1563            0 :         ApiError::InternalServerError(anyhow::anyhow!(
    1564            0 :             "Request conversion failed: no path and query"
    1565            0 :         ))
    1566            0 :     })?;
    1567              : 
    1568            0 :     let uri = reqwest::Url::from_str(
    1569            0 :         format!(
    1570            0 :             "{}{}",
    1571            0 :             to_address.trim_end_matches("/"),
    1572            0 :             path_and_query.as_str()
    1573            0 :         )
    1574            0 :         .as_str(),
    1575            0 :     )
    1576            0 :     .map_err(|err| {
    1577            0 :         ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1578            0 :     })?;
    1579              : 
    1580            0 :     let mut headers = reqwest::header::HeaderMap::new();
    1581            0 :     for (key, value) in parts.headers.into_iter() {
    1582            0 :         let key = match key {
    1583            0 :             Some(k) => k,
    1584              :             None => {
    1585            0 :                 continue;
    1586              :             }
    1587              :         };
    1588              : 
    1589            0 :         let key = reqwest::header::HeaderName::from_str(key.as_str()).map_err(|err| {
    1590            0 :             ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1591            0 :         })?;
    1592              : 
    1593            0 :         let value = reqwest::header::HeaderValue::from_bytes(value.as_bytes()).map_err(|err| {
    1594            0 :             ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1595            0 :         })?;
    1596              : 
    1597            0 :         headers.insert(key, value);
    1598              :     }
    1599              : 
    1600            0 :     let body = hyper::body::to_bytes(body).await.map_err(|err| {
    1601            0 :         ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1602            0 :     })?;
    1603              : 
    1604            0 :     client
    1605            0 :         .request(method, uri)
    1606            0 :         .headers(headers)
    1607            0 :         .body(body)
    1608            0 :         .build()
    1609            0 :         .map_err(|err| {
    1610            0 :             ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
    1611            0 :         })
    1612            0 : }
    1613              : 
    1614            0 : pub fn make_router(
    1615            0 :     service: Arc<Service>,
    1616            0 :     auth: Option<Arc<SwappableJwtAuth>>,
    1617            0 :     build_info: BuildInfo,
    1618            0 : ) -> RouterBuilder<hyper::Body, ApiError> {
    1619            0 :     let mut router = endpoint::make_router()
    1620            0 :         .middleware(prologue_leadership_status_check_middleware())
    1621            0 :         .middleware(prologue_metrics_middleware())
    1622            0 :         .middleware(epilogue_metrics_middleware());
    1623            0 :     if auth.is_some() {
    1624            0 :         router = router.middleware(auth_middleware(|request| {
    1625            0 :             let state = get_state(request);
    1626            0 :             if state.allowlist_routes.contains(request.uri()) {
    1627            0 :                 None
    1628              :             } else {
    1629            0 :                 state.auth.as_deref()
    1630              :             }
    1631            0 :         }));
    1632            0 :     }
    1633              : 
    1634            0 :     router
    1635            0 :         .data(Arc::new(HttpState::new(service, auth, build_info)))
    1636            0 :         .get("/metrics", |r| {
    1637            0 :             named_request_span(r, measured_metrics_handler, RequestName("metrics"))
    1638            0 :         })
    1639            0 :         // Non-prefixed generic endpoints (status, metrics)
    1640            0 :         .get("/status", |r| {
    1641            0 :             named_request_span(r, handle_status, RequestName("status"))
    1642            0 :         })
    1643            0 :         .get("/ready", |r| {
    1644            0 :             named_request_span(r, handle_ready, RequestName("ready"))
    1645            0 :         })
    1646            0 :         // Upcalls for the pageserver: point the pageserver's `control_plane_api` config to this prefix
    1647            0 :         .post("/upcall/v1/re-attach", |r| {
    1648            0 :             named_request_span(r, handle_re_attach, RequestName("upcall_v1_reattach"))
    1649            0 :         })
    1650            0 :         .post("/upcall/v1/validate", |r| {
    1651            0 :             named_request_span(r, handle_validate, RequestName("upcall_v1_validate"))
    1652            0 :         })
    1653            0 :         // Test/dev/debug endpoints
    1654            0 :         .post("/debug/v1/attach-hook", |r| {
    1655            0 :             named_request_span(r, handle_attach_hook, RequestName("debug_v1_attach_hook"))
    1656            0 :         })
    1657            0 :         .post("/debug/v1/inspect", |r| {
    1658            0 :             named_request_span(r, handle_inspect, RequestName("debug_v1_inspect"))
    1659            0 :         })
    1660            0 :         .post("/debug/v1/tenant/:tenant_id/drop", |r| {
    1661            0 :             named_request_span(r, handle_tenant_drop, RequestName("debug_v1_tenant_drop"))
    1662            0 :         })
    1663            0 :         .post("/debug/v1/node/:node_id/drop", |r| {
    1664            0 :             named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
    1665            0 :         })
    1666            0 :         .post("/debug/v1/tenant/:tenant_id/import", |r| {
    1667            0 :             named_request_span(
    1668            0 :                 r,
    1669            0 :                 handle_tenant_import,
    1670            0 :                 RequestName("debug_v1_tenant_import"),
    1671            0 :             )
    1672            0 :         })
    1673            0 :         .get("/debug/v1/tenant", |r| {
    1674            0 :             named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
    1675            0 :         })
    1676            0 :         .get("/debug/v1/tenant/:tenant_id/locate", |r| {
    1677            0 :             tenant_service_handler(
    1678            0 :                 r,
    1679            0 :                 handle_tenant_locate,
    1680            0 :                 RequestName("debug_v1_tenant_locate"),
    1681            0 :             )
    1682            0 :         })
    1683            0 :         .get("/debug/v1/scheduler", |r| {
    1684            0 :             named_request_span(r, handle_scheduler_dump, RequestName("debug_v1_scheduler"))
    1685            0 :         })
    1686            0 :         .post("/debug/v1/consistency_check", |r| {
    1687            0 :             named_request_span(
    1688            0 :                 r,
    1689            0 :                 handle_consistency_check,
    1690            0 :                 RequestName("debug_v1_consistency_check"),
    1691            0 :             )
    1692            0 :         })
    1693            0 :         .post("/debug/v1/reconcile_all", |r| {
    1694            0 :             request_span(r, handle_reconcile_all)
    1695            0 :         })
    1696            0 :         .put("/debug/v1/failpoints", |r| {
    1697            0 :             request_span(r, |r| failpoints_handler(r, CancellationToken::new()))
    1698            0 :         })
    1699            0 :         // Node operations
    1700            0 :         .post("/control/v1/node", |r| {
    1701            0 :             named_request_span(r, handle_node_register, RequestName("control_v1_node"))
    1702            0 :         })
    1703            0 :         .delete("/control/v1/node/:node_id", |r| {
    1704            0 :             named_request_span(r, handle_node_delete, RequestName("control_v1_node_delete"))
    1705            0 :         })
    1706            0 :         .get("/control/v1/node", |r| {
    1707            0 :             named_request_span(r, handle_node_list, RequestName("control_v1_node"))
    1708            0 :         })
    1709            0 :         .put("/control/v1/node/:node_id/config", |r| {
    1710            0 :             named_request_span(
    1711            0 :                 r,
    1712            0 :                 handle_node_configure,
    1713            0 :                 RequestName("control_v1_node_config"),
    1714            0 :             )
    1715            0 :         })
    1716            0 :         .get("/control/v1/node/:node_id", |r| {
    1717            0 :             named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
    1718            0 :         })
    1719            0 :         .get("/control/v1/node/:node_id/shards", |r| {
    1720            0 :             named_request_span(
    1721            0 :                 r,
    1722            0 :                 handle_node_shards,
    1723            0 :                 RequestName("control_v1_node_describe"),
    1724            0 :             )
    1725            0 :         })
    1726            0 :         .get("/control/v1/leader", |r| {
    1727            0 :             named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
    1728            0 :         })
    1729            0 :         .put("/control/v1/node/:node_id/drain", |r| {
    1730            0 :             named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
    1731            0 :         })
    1732            0 :         .delete("/control/v1/node/:node_id/drain", |r| {
    1733            0 :             named_request_span(
    1734            0 :                 r,
    1735            0 :                 handle_cancel_node_drain,
    1736            0 :                 RequestName("control_v1_cancel_node_drain"),
    1737            0 :             )
    1738            0 :         })
    1739            0 :         .put("/control/v1/node/:node_id/fill", |r| {
    1740            0 :             named_request_span(r, handle_node_fill, RequestName("control_v1_node_fill"))
    1741            0 :         })
    1742            0 :         .delete("/control/v1/node/:node_id/fill", |r| {
    1743            0 :             named_request_span(
    1744            0 :                 r,
    1745            0 :                 handle_cancel_node_fill,
    1746            0 :                 RequestName("control_v1_cancel_node_fill"),
    1747            0 :             )
    1748            0 :         })
    1749            0 :         // Metadata health operations
    1750            0 :         .post("/control/v1/metadata_health/update", |r| {
    1751            0 :             named_request_span(
    1752            0 :                 r,
    1753            0 :                 handle_metadata_health_update,
    1754            0 :                 RequestName("control_v1_metadata_health_update"),
    1755            0 :             )
    1756            0 :         })
    1757            0 :         .get("/control/v1/metadata_health/unhealthy", |r| {
    1758            0 :             named_request_span(
    1759            0 :                 r,
    1760            0 :                 handle_metadata_health_list_unhealthy,
    1761            0 :                 RequestName("control_v1_metadata_health_list_unhealthy"),
    1762            0 :             )
    1763            0 :         })
    1764            0 :         .post("/control/v1/metadata_health/outdated", |r| {
    1765            0 :             named_request_span(
    1766            0 :                 r,
    1767            0 :                 handle_metadata_health_list_outdated,
    1768            0 :                 RequestName("control_v1_metadata_health_list_outdated"),
    1769            0 :             )
    1770            0 :         })
    1771            0 :         // Tenant Shard operations
    1772            0 :         .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {
    1773            0 :             tenant_service_handler(
    1774            0 :                 r,
    1775            0 :                 handle_tenant_shard_migrate,
    1776            0 :                 RequestName("control_v1_tenant_migrate"),
    1777            0 :             )
    1778            0 :         })
    1779            0 :         .put("/control/v1/tenant/:tenant_id/shard_split", |r| {
    1780            0 :             tenant_service_handler(
    1781            0 :                 r,
    1782            0 :                 handle_tenant_shard_split,
    1783            0 :                 RequestName("control_v1_tenant_shard_split"),
    1784            0 :             )
    1785            0 :         })
    1786            0 :         .get("/control/v1/tenant/:tenant_id", |r| {
    1787            0 :             tenant_service_handler(
    1788            0 :                 r,
    1789            0 :                 handle_tenant_describe,
    1790            0 :                 RequestName("control_v1_tenant_describe"),
    1791            0 :             )
    1792            0 :         })
    1793            0 :         .get("/control/v1/tenant", |r| {
    1794            0 :             tenant_service_handler(r, handle_tenant_list, RequestName("control_v1_tenant_list"))
    1795            0 :         })
    1796            0 :         .put("/control/v1/tenant/:tenant_id/policy", |r| {
    1797            0 :             named_request_span(
    1798            0 :                 r,
    1799            0 :                 handle_tenant_update_policy,
    1800            0 :                 RequestName("control_v1_tenant_policy"),
    1801            0 :             )
    1802            0 :         })
    1803            0 :         .put("/control/v1/preferred_azs", |r| {
    1804            0 :             named_request_span(
    1805            0 :                 r,
    1806            0 :                 handle_update_preferred_azs,
    1807            0 :                 RequestName("control_v1_preferred_azs"),
    1808            0 :             )
    1809            0 :         })
    1810            0 :         .put("/control/v1/step_down", |r| {
    1811            0 :             named_request_span(r, handle_step_down, RequestName("control_v1_step_down"))
    1812            0 :         })
    1813            0 :         .get("/control/v1/safekeeper/:id", |r| {
    1814            0 :             named_request_span(r, handle_get_safekeeper, RequestName("v1_safekeeper"))
    1815            0 :         })
    1816            0 :         .post("/control/v1/safekeeper/:id", |r| {
    1817            0 :             // id is in the body
    1818            0 :             named_request_span(r, handle_upsert_safekeeper, RequestName("v1_safekeeper"))
    1819            0 :         })
    1820            0 :         // Tenant operations
    1821            0 :         // The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into
    1822            0 :         // this service to manage tenants that actually consist of many tenant shards, as if they are a single entity.
    1823            0 :         .post("/v1/tenant", |r| {
    1824            0 :             tenant_service_handler(r, handle_tenant_create, RequestName("v1_tenant"))
    1825            0 :         })
    1826            0 :         .delete("/v1/tenant/:tenant_id", |r| {
    1827            0 :             tenant_service_handler(r, handle_tenant_delete, RequestName("v1_tenant"))
    1828            0 :         })
    1829            0 :         .put("/v1/tenant/config", |r| {
    1830            0 :             tenant_service_handler(r, handle_tenant_config_set, RequestName("v1_tenant_config"))
    1831            0 :         })
    1832            0 :         .get("/v1/tenant/:tenant_id/config", |r| {
    1833            0 :             tenant_service_handler(r, handle_tenant_config_get, RequestName("v1_tenant_config"))
    1834            0 :         })
    1835            0 :         .put("/v1/tenant/:tenant_shard_id/location_config", |r| {
    1836            0 :             tenant_service_handler(
    1837            0 :                 r,
    1838            0 :                 handle_tenant_location_config,
    1839            0 :                 RequestName("v1_tenant_location_config"),
    1840            0 :             )
    1841            0 :         })
    1842            0 :         .put("/v1/tenant/:tenant_id/time_travel_remote_storage", |r| {
    1843            0 :             tenant_service_handler(
    1844            0 :                 r,
    1845            0 :                 handle_tenant_time_travel_remote_storage,
    1846            0 :                 RequestName("v1_tenant_time_travel_remote_storage"),
    1847            0 :             )
    1848            0 :         })
    1849            0 :         .post("/v1/tenant/:tenant_id/secondary/download", |r| {
    1850            0 :             tenant_service_handler(
    1851            0 :                 r,
    1852            0 :                 handle_tenant_secondary_download,
    1853            0 :                 RequestName("v1_tenant_secondary_download"),
    1854            0 :             )
    1855            0 :         })
    1856            0 :         // Timeline operations
    1857            0 :         .delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
    1858            0 :             tenant_service_handler(
    1859            0 :                 r,
    1860            0 :                 handle_tenant_timeline_delete,
    1861            0 :                 RequestName("v1_tenant_timeline"),
    1862            0 :             )
    1863            0 :         })
    1864            0 :         .post("/v1/tenant/:tenant_id/timeline", |r| {
    1865            0 :             tenant_service_handler(
    1866            0 :                 r,
    1867            0 :                 handle_tenant_timeline_create,
    1868            0 :                 RequestName("v1_tenant_timeline"),
    1869            0 :             )
    1870            0 :         })
    1871            0 :         .put(
    1872            0 :             "/v1/tenant/:tenant_id/timeline/:timeline_id/archival_config",
    1873            0 :             |r| {
    1874            0 :                 tenant_service_handler(
    1875            0 :                     r,
    1876            0 :                     handle_tenant_timeline_archival_config,
    1877            0 :                     RequestName("v1_tenant_timeline_archival_config"),
    1878            0 :                 )
    1879            0 :             },
    1880            0 :         )
    1881            0 :         .put(
    1882            0 :             "/v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor",
    1883            0 :             |r| {
    1884            0 :                 tenant_service_handler(
    1885            0 :                     r,
    1886            0 :                     handle_tenant_timeline_detach_ancestor,
    1887            0 :                     RequestName("v1_tenant_timeline_detach_ancestor"),
    1888            0 :                 )
    1889            0 :             },
    1890            0 :         )
    1891            0 :         .post(
    1892            0 :             "/v1/tenant/:tenant_id/timeline/:timeline_id/block_gc",
    1893            0 :             |r| {
    1894            0 :                 tenant_service_handler(
    1895            0 :                     r,
    1896            0 :                     |s, r| handle_tenant_timeline_block_unblock_gc(s, r, BlockUnblock::Block),
    1897            0 :                     RequestName("v1_tenant_timeline_block_unblock_gc"),
    1898            0 :                 )
    1899            0 :             },
    1900            0 :         )
    1901            0 :         .post(
    1902            0 :             "/v1/tenant/:tenant_id/timeline/:timeline_id/unblock_gc",
    1903            0 :             |r| {
    1904            0 :                 tenant_service_handler(
    1905            0 :                     r,
    1906            0 :                     |s, r| handle_tenant_timeline_block_unblock_gc(s, r, BlockUnblock::Unblock),
    1907            0 :                     RequestName("v1_tenant_timeline_block_unblock_gc"),
    1908            0 :                 )
    1909            0 :             },
    1910            0 :         )
    1911            0 :         // Tenant detail GET passthrough to shard zero:
    1912            0 :         .get("/v1/tenant/:tenant_id", |r| {
    1913            0 :             tenant_service_handler(
    1914            0 :                 r,
    1915            0 :                 handle_tenant_timeline_passthrough,
    1916            0 :                 RequestName("v1_tenant_passthrough"),
    1917            0 :             )
    1918            0 :         })
    1919            0 :         // The `*` in the  URL is a wildcard: any tenant/timeline GET APIs on the pageserver
    1920            0 :         // are implicitly exposed here.  This must be last in the list to avoid
    1921            0 :         // taking precedence over other GET methods we might implement by hand.
    1922            0 :         .get("/v1/tenant/:tenant_id/*", |r| {
    1923            0 :             tenant_service_handler(
    1924            0 :                 r,
    1925            0 :                 handle_tenant_timeline_passthrough,
    1926            0 :                 RequestName("v1_tenant_passthrough"),
    1927            0 :             )
    1928            0 :         })
    1929            0 : }

Generated by: LCOV version 2.1-beta