LCOV - fc67f8dc6087a0b4f4f0bcd74f6e1dc25fab8cf3.info

LCOV - code coverage report

Current view:	top level - compute_tools/src/http - api.rs (source / functions)		Coverage	Total	Hit
Test:	fc67f8dc6087a0b4f4f0bcd74f6e1dc25fab8cf3.info	Lines:	0.0 %	311	0
Test Date:	2024-09-24 13:57:57	Functions:	0.0 %	24	0

            Line data    Source code

       1              : use std::convert::Infallible;
       2              : use std::net::IpAddr;
       3              : use std::net::Ipv6Addr;
       4              : use std::net::SocketAddr;
       5              : use std::sync::Arc;
       6              : use std::thread;
       7              : 
       8              : use crate::catalog::SchemaDumpError;
       9              : use crate::catalog::{get_database_schema, get_dbs_and_roles};
      10              : use crate::compute::forward_termination_signal;
      11              : use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
      12              : use compute_api::requests::ConfigurationRequest;
      13              : use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
      14              : 
      15              : use anyhow::Result;
      16              : use hyper::header::CONTENT_TYPE;
      17              : use hyper::service::{make_service_fn, service_fn};
      18              : use hyper::{Body, Method, Request, Response, Server, StatusCode};
      19              : use tokio::task;
      20              : use tracing::{debug, error, info, warn};
      21              : use tracing_utils::http::OtelName;
      22              : use utils::http::request::must_get_query_param;
      23              : 
      24            0 : fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
      25            0 :     ComputeStatusResponse {
      26            0 :         start_time: state.start_time,
      27            0 :         tenant: state
      28            0 :             .pspec
      29            0 :             .as_ref()
      30            0 :             .map(|pspec| pspec.tenant_id.to_string()),
      31            0 :         timeline: state
      32            0 :             .pspec
      33            0 :             .as_ref()
      34            0 :             .map(|pspec| pspec.timeline_id.to_string()),
      35            0 :         status: state.status,
      36            0 :         last_active: state.last_active,
      37            0 :         error: state.error.clone(),
      38            0 :     }
      39            0 : }
      40              : 
      41              : // Service function to handle all available routes.
      42            0 : async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
      43            0 :     //
      44            0 :     // NOTE: The URI path is currently included in traces. That's OK because
      45            0 :     // it doesn't contain any variable parts or sensitive information. But
      46            0 :     // please keep that in mind if you change the routing here.
      47            0 :     //
      48            0 :     match (req.method(), req.uri().path()) {
      49              :         // Serialized compute state.
      50            0 :         (&Method::GET, "/status") => {
      51            0 :             debug!("serving /status GET request");
      52            0 :             let state = compute.state.lock().unwrap();
      53            0 :             let status_response = status_response_from_state(&state);
      54            0 :             Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
      55              :         }
      56              : 
      57              :         // Startup metrics in JSON format. Keep /metrics reserved for a possible
      58              :         // future use for Prometheus metrics format.
      59            0 :         (&Method::GET, "/metrics.json") => {
      60            0 :             info!("serving /metrics.json GET request");
      61            0 :             let metrics = compute.state.lock().unwrap().metrics.clone();
      62            0 :             Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
      63              :         }
      64              : 
      65              :         // Collect Postgres current usage insights
      66            0 :         (&Method::GET, "/insights") => {
      67            0 :             info!("serving /insights GET request");
      68            0 :             let status = compute.get_status();
      69            0 :             if status != ComputeStatus::Running {
      70            0 :                 let msg = format!("compute is not running, current status: {:?}", status);
      71            0 :                 error!(msg);
      72            0 :                 return Response::new(Body::from(msg));
      73            0 :             }
      74              : 
      75            0 :             let insights = compute.collect_insights().await;
      76            0 :             Response::new(Body::from(insights))
      77              :         }
      78              : 
      79            0 :         (&Method::POST, "/check_writability") => {
      80            0 :             info!("serving /check_writability POST request");
      81            0 :             let status = compute.get_status();
      82            0 :             if status != ComputeStatus::Running {
      83            0 :                 let msg = format!(
      84            0 :                     "invalid compute status for check_writability request: {:?}",
      85            0 :                     status
      86            0 :                 );
      87            0 :                 error!(msg);
      88            0 :                 return Response::new(Body::from(msg));
      89            0 :             }
      90              : 
      91            0 :             let res = crate::checker::check_writability(compute).await;
      92            0 :             match res {
      93            0 :                 Ok(_) => Response::new(Body::from("true")),
      94            0 :                 Err(e) => {
      95            0 :                     error!("check_writability failed: {}", e);
      96            0 :                     Response::new(Body::from(e.to_string()))
      97              :                 }
      98              :             }
      99              :         }
     100              : 
     101            0 :         (&Method::GET, "/info") => {
     102            0 :             let num_cpus = num_cpus::get_physical();
     103            0 :             info!("serving /info GET request. num_cpus: {}", num_cpus);
     104            0 :             Response::new(Body::from(
     105            0 :                 serde_json::json!({
     106            0 :                     "num_cpus": num_cpus,
     107            0 :                 })
     108            0 :                 .to_string(),
     109            0 :             ))
     110              :         }
     111              : 
     112              :         // Accept spec in JSON format and request compute configuration. If
     113              :         // anything goes wrong after we set the compute status to `ConfigurationPending`
     114              :         // and update compute state with new spec, we basically leave compute
     115              :         // in the potentially wrong state. That said, it's control-plane's
     116              :         // responsibility to watch compute state after reconfiguration request
     117              :         // and to clean restart in case of errors.
     118            0 :         (&Method::POST, "/configure") => {
     119            0 :             info!("serving /configure POST request");
     120            0 :             match handle_configure_request(req, compute).await {
     121            0 :                 Ok(msg) => Response::new(Body::from(msg)),
     122            0 :                 Err((msg, code)) => {
     123            0 :                     error!("error handling /configure request: {msg}");
     124            0 :                     render_json_error(&msg, code)
     125              :                 }
     126              :             }
     127              :         }
     128              : 
     129            0 :         (&Method::POST, "/terminate") => {
     130            0 :             info!("serving /terminate POST request");
     131            0 :             match handle_terminate_request(compute).await {
     132            0 :                 Ok(()) => Response::new(Body::empty()),
     133            0 :                 Err((msg, code)) => {
     134            0 :                     error!("error handling /terminate request: {msg}");
     135            0 :                     render_json_error(&msg, code)
     136              :                 }
     137              :             }
     138              :         }
     139              : 
     140            0 :         (&Method::GET, "/dbs_and_roles") => {
     141            0 :             info!("serving /dbs_and_roles GET request",);
     142            0 :             match get_dbs_and_roles(compute).await {
     143            0 :                 Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
     144              :                 Err(_) => {
     145            0 :                     render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
     146              :                 }
     147              :             }
     148              :         }
     149              : 
     150            0 :         (&Method::GET, "/database_schema") => {
     151            0 :             let database = match must_get_query_param(&req, "database") {
     152            0 :                 Err(e) => return e.into_response(),
     153            0 :                 Ok(database) => database,
     154            0 :             };
     155            0 :             info!("serving /database_schema GET request with database: {database}",);
     156            0 :             match get_database_schema(compute, &database).await {
     157            0 :                 Ok(res) => render_plain(Body::wrap_stream(res)),
     158              :                 Err(SchemaDumpError::DatabaseDoesNotExist) => {
     159            0 :                     render_json_error("database does not exist", StatusCode::NOT_FOUND)
     160              :                 }
     161            0 :                 Err(e) => {
     162            0 :                     error!("can't get schema dump: {}", e);
     163            0 :                     render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
     164              :                 }
     165              :             }
     166              :         }
     167              : 
     168              :         // download extension files from remote extension storage on demand
     169            0 :         (&Method::POST, route) if route.starts_with("/extension_server/") => {
     170            0 :             info!("serving {:?} POST request", route);
     171            0 :             info!("req.uri {:?}", req.uri());
     172              : 
     173              :             // don't even try to download extensions
     174              :             // if no remote storage is configured
     175            0 :             if compute.ext_remote_storage.is_none() {
     176            0 :                 info!("no extensions remote storage configured");
     177            0 :                 let mut resp = Response::new(Body::from("no remote storage configured"));
     178            0 :                 *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
     179            0 :                 return resp;
     180            0 :             }
     181            0 : 
     182            0 :             let mut is_library = false;
     183            0 :             if let Some(params) = req.uri().query() {
     184            0 :                 info!("serving {:?} POST request with params: {}", route, params);
     185            0 :                 if params == "is_library=true" {
     186            0 :                     is_library = true;
     187            0 :                 } else {
     188            0 :                     let mut resp = Response::new(Body::from("Wrong request parameters"));
     189            0 :                     *resp.status_mut() = StatusCode::BAD_REQUEST;
     190            0 :                     return resp;
     191              :                 }
     192            0 :             }
     193            0 :             let filename = route.split('/').last().unwrap().to_string();
     194            0 :             info!("serving /extension_server POST request, filename: {filename:?} is_library: {is_library}");
     195              : 
     196              :             // get ext_name and path from spec
     197              :             // don't lock compute_state for too long
     198            0 :             let ext = {
     199            0 :                 let compute_state = compute.state.lock().unwrap();
     200            0 :                 let pspec = compute_state.pspec.as_ref().expect("spec must be set");
     201            0 :                 let spec = &pspec.spec;
     202            0 : 
     203            0 :                 // debug only
     204            0 :                 info!("spec: {:?}", spec);
     205              : 
     206            0 :                 let remote_extensions = match spec.remote_extensions.as_ref() {
     207            0 :                     Some(r) => r,
     208              :                     None => {
     209            0 :                         info!("no remote extensions spec was provided");
     210            0 :                         let mut resp = Response::new(Body::from("no remote storage configured"));
     211            0 :                         *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
     212            0 :                         return resp;
     213              :                     }
     214              :                 };
     215              : 
     216            0 :                 remote_extensions.get_ext(
     217            0 :                     &filename,
     218            0 :                     is_library,
     219            0 :                     &compute.build_tag,
     220            0 :                     &compute.pgversion,
     221            0 :                 )
     222            0 :             };
     223            0 : 
     224            0 :             match ext {
     225            0 :                 Ok((ext_name, ext_path)) => {
     226            0 :                     match compute.download_extension(ext_name, ext_path).await {
     227            0 :                         Ok(_) => Response::new(Body::from("OK")),
     228            0 :                         Err(e) => {
     229            0 :                             error!("extension download failed: {}", e);
     230            0 :                             let mut resp = Response::new(Body::from(e.to_string()));
     231            0 :                             *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
     232            0 :                             resp
     233              :                         }
     234              :                     }
     235              :                 }
     236            0 :                 Err(e) => {
     237            0 :                     warn!("extension download failed to find extension: {}", e);
     238            0 :                     let mut resp = Response::new(Body::from("failed to find file"));
     239            0 :                     *resp.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
     240            0 :                     resp
     241              :                 }
     242              :             }
     243              :         }
     244              : 
     245              :         // Return the `404 Not Found` for any other routes.
     246              :         _ => {
     247            0 :             let mut not_found = Response::new(Body::from("404 Not Found"));
     248            0 :             *not_found.status_mut() = StatusCode::NOT_FOUND;
     249            0 :             not_found
     250              :         }
     251              :     }
     252            0 : }
     253              : 
     254            0 : async fn handle_configure_request(
     255            0 :     req: Request<Body>,
     256            0 :     compute: &Arc<ComputeNode>,
     257            0 : ) -> Result<String, (String, StatusCode)> {
     258            0 :     if !compute.live_config_allowed {
     259            0 :         return Err((
     260            0 :             "live configuration is not allowed for this compute node".to_string(),
     261            0 :             StatusCode::PRECONDITION_FAILED,
     262            0 :         ));
     263            0 :     }
     264              : 
     265            0 :     let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
     266            0 :     let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
     267            0 :     if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
     268            0 :         let spec = request.spec;
     269              : 
     270            0 :         let parsed_spec = match ParsedSpec::try_from(spec) {
     271            0 :             Ok(ps) => ps,
     272            0 :             Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
     273              :         };
     274              : 
     275              :         // XXX: wrap state update under lock in code blocks. Otherwise,
     276              :         // we will try to `Send` `mut state` into the spawned thread
     277              :         // bellow, which will cause error:
     278              :         // ```
     279              :         // error: future cannot be sent between threads safely
     280              :         // ```
     281              :         {
     282            0 :             let mut state = compute.state.lock().unwrap();
     283            0 :             if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
     284            0 :                 let msg = format!(
     285            0 :                     "invalid compute status for configuration request: {:?}",
     286            0 :                     state.status.clone()
     287            0 :                 );
     288            0 :                 return Err((msg, StatusCode::PRECONDITION_FAILED));
     289            0 :             }
     290            0 :             state.pspec = Some(parsed_spec);
     291            0 :             state.status = ComputeStatus::ConfigurationPending;
     292            0 :             compute.state_changed.notify_all();
     293            0 :             drop(state);
     294            0 :             info!("set new spec and notified waiters");
     295              :         }
     296              : 
     297              :         // Spawn a blocking thread to wait for compute to become Running.
     298              :         // This is needed to do not block the main pool of workers and
     299              :         // be able to serve other requests while some particular request
     300              :         // is waiting for compute to finish configuration.
     301            0 :         let c = compute.clone();
     302            0 :         task::spawn_blocking(move || {
     303            0 :             let mut state = c.state.lock().unwrap();
     304            0 :             while state.status != ComputeStatus::Running {
     305            0 :                 state = c.state_changed.wait(state).unwrap();
     306            0 :                 info!(
     307            0 :                     "waiting for compute to become Running, current status: {:?}",
     308            0 :                     state.status
     309              :                 );
     310              : 
     311            0 :                 if state.status == ComputeStatus::Failed {
     312            0 :                     let err = state.error.as_ref().map_or("unknown error", |x| x);
     313            0 :                     let msg = format!("compute configuration failed: {:?}", err);
     314            0 :                     return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
     315            0 :                 }
     316              :             }
     317              : 
     318            0 :             Ok(())
     319            0 :         })
     320            0 :         .await
     321            0 :         .unwrap()?;
     322              : 
     323              :         // Return current compute state if everything went well.
     324            0 :         let state = compute.state.lock().unwrap().clone();
     325            0 :         let status_response = status_response_from_state(&state);
     326            0 :         Ok(serde_json::to_string(&status_response).unwrap())
     327              :     } else {
     328            0 :         Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
     329              :     }
     330            0 : }
     331              : 
     332            0 : fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
     333            0 :     let error = GenericAPIError {
     334            0 :         error: e.to_string(),
     335            0 :     };
     336            0 :     Response::builder()
     337            0 :         .status(status)
     338            0 :         .header(CONTENT_TYPE, "application/json")
     339            0 :         .body(Body::from(serde_json::to_string(&error).unwrap()))
     340            0 :         .unwrap()
     341            0 : }
     342              : 
     343            0 : fn render_json(body: Body) -> Response<Body> {
     344            0 :     Response::builder()
     345            0 :         .header(CONTENT_TYPE, "application/json")
     346            0 :         .body(body)
     347            0 :         .unwrap()
     348            0 : }
     349              : 
     350            0 : fn render_plain(body: Body) -> Response<Body> {
     351            0 :     Response::builder()
     352            0 :         .header(CONTENT_TYPE, "text/plain")
     353            0 :         .body(body)
     354            0 :         .unwrap()
     355            0 : }
     356              : 
     357            0 : async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
     358            0 :     {
     359            0 :         let mut state = compute.state.lock().unwrap();
     360            0 :         if state.status == ComputeStatus::Terminated {
     361            0 :             return Ok(());
     362            0 :         }
     363            0 :         if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
     364            0 :             let msg = format!(
     365            0 :                 "invalid compute status for termination request: {:?}",
     366            0 :                 state.status.clone()
     367            0 :             );
     368            0 :             return Err((msg, StatusCode::PRECONDITION_FAILED));
     369            0 :         }
     370            0 :         state.status = ComputeStatus::TerminationPending;
     371            0 :         compute.state_changed.notify_all();
     372            0 :         drop(state);
     373            0 :     }
     374            0 :     forward_termination_signal();
     375            0 :     info!("sent signal and notified waiters");
     376              : 
     377              :     // Spawn a blocking thread to wait for compute to become Terminated.
     378              :     // This is needed to do not block the main pool of workers and
     379              :     // be able to serve other requests while some particular request
     380              :     // is waiting for compute to finish configuration.
     381            0 :     let c = compute.clone();
     382            0 :     task::spawn_blocking(move || {
     383            0 :         let mut state = c.state.lock().unwrap();
     384            0 :         while state.status != ComputeStatus::Terminated {
     385            0 :             state = c.state_changed.wait(state).unwrap();
     386            0 :             info!(
     387            0 :                 "waiting for compute to become Terminated, current status: {:?}",
     388            0 :                 state.status
     389              :             );
     390              :         }
     391              : 
     392            0 :         Ok(())
     393            0 :     })
     394            0 :     .await
     395            0 :     .unwrap()?;
     396            0 :     info!("terminated Postgres");
     397            0 :     Ok(())
     398            0 : }
     399              : 
     400              : // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
     401              : #[tokio::main]
     402            0 : async fn serve(port: u16, state: Arc<ComputeNode>) {
     403            0 :     // this usually binds to both IPv4 and IPv6 on linux
     404            0 :     // see e.g. https://github.com/rust-lang/rust/pull/34440
     405            0 :     let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
     406            0 : 
     407            0 :     let make_service = make_service_fn(move |_conn| {
     408            0 :         let state = state.clone();
     409            0 :         async move {
     410            0 :             Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
     411            0 :                 let state = state.clone();
     412            0 :                 async move {
     413            0 :                     Ok::<_, Infallible>(
     414            0 :                         // NOTE: We include the URI path in the string. It
     415            0 :                         // doesn't contain any variable parts or sensitive
     416            0 :                         // information in this API.
     417            0 :                         tracing_utils::http::tracing_handler(
     418            0 :                             req,
     419            0 :                             |req| routes(req, &state),
     420            0 :                             OtelName::UriPath,
     421            0 :                         )
     422            0 :                         .await,
     423            0 :                     )
     424            0 :                 }
     425            0 :             }))
     426            0 :         }
     427            0 :     });
     428            0 : 
     429            0 :     info!("starting HTTP server on {}", addr);
     430            0 : 
     431            0 :     let server = Server::bind(&addr).serve(make_service);
     432            0 : 
     433            0 :     // Run this server forever
     434            0 :     if let Err(e) = server.await {
     435            0 :         error!("server error: {}", e);
     436            0 :     }
     437            0 : }
     438              : 
     439              : /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
     440            0 : pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
     441            0 :     let state = Arc::clone(state);
     442            0 : 
     443            0 :     Ok(thread::Builder::new()
     444            0 :         .name("http-endpoint".into())
     445            0 :         .spawn(move || serve(port, state))?)
     446            0 : }

Generated by: LCOV version 2.1-beta