LCOV - 42f947419473a288706e86ecdf7c2863d760d5d7.info - compute_tools/src/bin/compute

LCOV - code coverage report

Current view:	top level - compute_tools/src/bin - compute_ctl.rs (source / functions)		Coverage	Total	Hit
Test:	42f947419473a288706e86ecdf7c2863d760d5d7.info	Lines:	18.1 %	531	96
Test Date:	2024-08-02 21:34:27	Functions:	9.1 %	22	2

            Line data    Source code

       1              : //!
       2              : //! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
       3              : //! `ExecStart` option. It will handle all the `Neon` specifics during compute node
       4              : //! initialization:
       5              : //! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
       6              : //! - Every start is a fresh start, so the data directory is removed and
       7              : //!   initialized again on each run.
       8              : //! - If remote_extension_config is provided, it will be used to fetch extensions list
       9              : //!   and download `shared_preload_libraries` from the remote storage.
      10              : //! - Next it will put configuration files into the `PGDATA` directory.
      11              : //! - Sync safekeepers and get commit LSN.
      12              : //! - Get `basebackup` from pageserver using the returned on the previous step LSN.
      13              : //! - Try to start `postgres` and wait until it is ready to accept connections.
      14              : //! - Check and alter/drop/create roles and databases.
      15              : //! - Hang waiting on the `postmaster` process to exit.
      16              : //!
      17              : //! Also `compute_ctl` spawns two separate service threads:
      18              : //! - `compute-monitor` checks the last Postgres activity timestamp and saves it
      19              : //!   into the shared `ComputeNode`;
      20              : //! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
      21              : //!   last activity requests.
      22              : //!
      23              : //! If `AUTOSCALING` environment variable is set, `compute_ctl` will start the
      24              : //! `vm-monitor` located in [`neon/libs/vm_monitor`]. For VM compute nodes,
      25              : //! `vm-monitor` communicates with the VM autoscaling system. It coordinates
      26              : //! downscaling and requests immediate upscaling under resource pressure.
      27              : //!
      28              : //! Usage example:
      29              : //! ```sh
      30              : //! compute_ctl -D /var/db/postgres/compute \
      31              : //!             -C 'postgresql://cloud_admin@localhost/postgres' \
      32              : //!             -S /var/db/postgres/specs/current.json \
      33              : //!             -b /usr/local/bin/postgres \
      34              : //!             -r http://pg-ext-s3-gateway \
      35              : //! ```
      36              : use std::collections::HashMap;
      37              : use std::fs::File;
      38              : use std::path::Path;
      39              : use std::process::exit;
      40              : use std::sync::atomic::Ordering;
      41              : use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
      42              : use std::{thread, time::Duration};
      43              : 
      44              : use anyhow::{Context, Result};
      45              : use chrono::Utc;
      46              : use clap::Arg;
      47              : use signal_hook::consts::{SIGQUIT, SIGTERM};
      48              : use signal_hook::{consts::SIGINT, iterator::Signals};
      49              : use tracing::{error, info, warn};
      50              : use url::Url;
      51              : 
      52              : use compute_api::responses::ComputeStatus;
      53              : use compute_api::spec::ComputeSpec;
      54              : 
      55              : use compute_tools::compute::{
      56              :     forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
      57              : };
      58              : use compute_tools::configurator::launch_configurator;
      59              : use compute_tools::extension_server::get_pg_version;
      60              : use compute_tools::http::api::launch_http_server;
      61              : use compute_tools::logger::*;
      62              : use compute_tools::monitor::launch_monitor;
      63              : use compute_tools::params::*;
      64              : use compute_tools::spec::*;
      65              : use compute_tools::swap::resize_swap;
      66              : use rlimit::{setrlimit, Resource};
      67              : 
      68              : // this is an arbitrary build tag. Fine as a default / for testing purposes
      69              : // in-case of not-set environment var
      70              : const BUILD_TAG_DEFAULT: &str = "latest";
      71              : 
      72            0 : fn main() -> Result<()> {
      73            0 :     let (build_tag, clap_args) = init()?;
      74              : 
      75              :     // enable core dumping for all child processes
      76            0 :     setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
      77              : 
      78            0 :     let (pg_handle, start_pg_result) = {
      79              :         // Enter startup tracing context
      80            0 :         let _startup_context_guard = startup_context_from_env();
      81              : 
      82            0 :         let cli_args = process_cli(&clap_args)?;
      83              : 
      84            0 :         let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
      85              : 
      86            0 :         let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
      87              : 
      88            0 :         start_postgres(&clap_args, wait_spec_result)?
      89              : 
      90              :         // Startup is finished, exit the startup tracing span
      91              :     };
      92              : 
      93              :     // PostgreSQL is now running, if startup was successful. Wait until it exits.
      94            0 :     let wait_pg_result = wait_postgres(pg_handle)?;
      95              : 
      96            0 :     let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
      97              : 
      98            0 :     maybe_delay_exit(delay_exit);
      99            0 : 
     100            0 :     deinit_and_exit(wait_pg_result);
     101            0 : }
     102              : 
     103            0 : fn init() -> Result<(String, clap::ArgMatches)> {
     104            0 :     init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
     105              : 
     106            0 :     let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
     107            0 :     thread::spawn(move || {
     108            0 :         for sig in signals.forever() {
     109            0 :             handle_exit_signal(sig);
     110            0 :         }
     111            0 :     });
     112            0 : 
     113            0 :     let build_tag = option_env!("BUILD_TAG")
     114            0 :         .unwrap_or(BUILD_TAG_DEFAULT)
     115            0 :         .to_string();
     116            0 :     info!("build_tag: {build_tag}");
     117              : 
     118            0 :     Ok((build_tag, cli().get_matches()))
     119            0 : }
     120              : 
     121            0 : fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
     122            0 :     let pgbin_default = "postgres";
     123            0 :     let pgbin = matches
     124            0 :         .get_one::<String>("pgbin")
     125            0 :         .map(|s| s.as_str())
     126            0 :         .unwrap_or(pgbin_default);
     127            0 : 
     128            0 :     let ext_remote_storage = matches
     129            0 :         .get_one::<String>("remote-ext-config")
     130            0 :         // Compatibility hack: if the control plane specified any remote-ext-config
     131            0 :         // use the default value for extension storage proxy gateway.
     132            0 :         // Remove this once the control plane is updated to pass the gateway URL
     133            0 :         .map(|conf| {
     134            0 :             if conf.starts_with("http") {
     135            0 :                 conf.trim_end_matches('/')
     136              :             } else {
     137            0 :                 "http://pg-ext-s3-gateway"
     138              :             }
     139            0 :         });
     140            0 : 
     141            0 :     let http_port = *matches
     142            0 :         .get_one::<u16>("http-port")
     143            0 :         .expect("http-port is required");
     144            0 :     let pgdata = matches
     145            0 :         .get_one::<String>("pgdata")
     146            0 :         .expect("PGDATA path is required");
     147            0 :     let connstr = matches
     148            0 :         .get_one::<String>("connstr")
     149            0 :         .expect("Postgres connection string is required");
     150            0 :     let spec_json = matches.get_one::<String>("spec");
     151            0 :     let spec_path = matches.get_one::<String>("spec-path");
     152            0 :     let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
     153            0 : 
     154            0 :     Ok(ProcessCliResult {
     155            0 :         connstr,
     156            0 :         pgdata,
     157            0 :         pgbin,
     158            0 :         ext_remote_storage,
     159            0 :         http_port,
     160            0 :         spec_json,
     161            0 :         spec_path,
     162            0 :         resize_swap_on_bind,
     163            0 :     })
     164            0 : }
     165              : 
     166              : struct ProcessCliResult<'clap> {
     167              :     connstr: &'clap str,
     168              :     pgdata: &'clap str,
     169              :     pgbin: &'clap str,
     170              :     ext_remote_storage: Option<&'clap str>,
     171              :     http_port: u16,
     172              :     spec_json: Option<&'clap String>,
     173              :     spec_path: Option<&'clap String>,
     174              :     resize_swap_on_bind: bool,
     175              : }
     176              : 
     177            0 : fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
     178            0 :     // Extract OpenTelemetry context for the startup actions from the
     179            0 :     // TRACEPARENT and TRACESTATE env variables, and attach it to the current
     180            0 :     // tracing context.
     181            0 :     //
     182            0 :     // This is used to propagate the context for the 'start_compute' operation
     183            0 :     // from the neon control plane. This allows linking together the wider
     184            0 :     // 'start_compute' operation that creates the compute container, with the
     185            0 :     // startup actions here within the container.
     186            0 :     //
     187            0 :     // There is no standard for passing context in env variables, but a lot of
     188            0 :     // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
     189            0 :     // https://github.com/open-telemetry/opentelemetry-specification/issues/740
     190            0 :     //
     191            0 :     // Switch to the startup context here, and exit it once the startup has
     192            0 :     // completed and Postgres is up and running.
     193            0 :     //
     194            0 :     // If this pod is pre-created without binding it to any particular endpoint
     195            0 :     // yet, this isn't the right place to enter the startup context. In that
     196            0 :     // case, the control plane should pass the tracing context as part of the
     197            0 :     // /configure API call.
     198            0 :     //
     199            0 :     // NOTE: This is supposed to only cover the *startup* actions. Once
     200            0 :     // postgres is configured and up-and-running, we exit this span. Any other
     201            0 :     // actions that are performed on incoming HTTP requests, for example, are
     202            0 :     // performed in separate spans.
     203            0 :     //
     204            0 :     // XXX: If the pod is restarted, we perform the startup actions in the same
     205            0 :     // context as the original startup actions, which probably doesn't make
     206            0 :     // sense.
     207            0 :     let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
     208            0 :     if let Ok(val) = std::env::var("TRACEPARENT") {
     209            0 :         startup_tracing_carrier.insert("traceparent".to_string(), val);
     210            0 :     }
     211            0 :     if let Ok(val) = std::env::var("TRACESTATE") {
     212            0 :         startup_tracing_carrier.insert("tracestate".to_string(), val);
     213            0 :     }
     214            0 :     if !startup_tracing_carrier.is_empty() {
     215              :         use opentelemetry::propagation::TextMapPropagator;
     216              :         use opentelemetry::sdk::propagation::TraceContextPropagator;
     217            0 :         let guard = TraceContextPropagator::new()
     218            0 :             .extract(&startup_tracing_carrier)
     219            0 :             .attach();
     220            0 :         info!("startup tracing context attached");
     221            0 :         Some(guard)
     222              :     } else {
     223            0 :         None
     224              :     }
     225            0 : }
     226              : 
     227            0 : fn try_spec_from_cli(
     228            0 :     matches: &clap::ArgMatches,
     229            0 :     ProcessCliResult {
     230            0 :         spec_json,
     231            0 :         spec_path,
     232            0 :         ..
     233            0 :     }: &ProcessCliResult,
     234            0 : ) -> Result<CliSpecParams> {
     235            0 :     let compute_id = matches.get_one::<String>("compute-id");
     236            0 :     let control_plane_uri = matches.get_one::<String>("control-plane-uri");
     237            0 : 
     238            0 :     let spec;
     239            0 :     let mut live_config_allowed = false;
     240            0 :     match spec_json {
     241              :         // First, try to get cluster spec from the cli argument
     242            0 :         Some(json) => {
     243            0 :             info!("got spec from cli argument {}", json);
     244            0 :             spec = Some(serde_json::from_str(json)?);
     245              :         }
     246              :         None => {
     247              :             // Second, try to read it from the file if path is provided
     248            0 :             if let Some(sp) = spec_path {
     249            0 :                 let path = Path::new(sp);
     250            0 :                 let file = File::open(path)?;
     251            0 :                 spec = Some(serde_json::from_reader(file)?);
     252            0 :                 live_config_allowed = true;
     253            0 :             } else if let Some(id) = compute_id {
     254            0 :                 if let Some(cp_base) = control_plane_uri {
     255            0 :                     live_config_allowed = true;
     256            0 :                     spec = match get_spec_from_control_plane(cp_base, id) {
     257            0 :                         Ok(s) => s,
     258            0 :                         Err(e) => {
     259            0 :                             error!("cannot get response from control plane: {}", e);
     260            0 :                             panic!("neither spec nor confirmation that compute is in the Empty state was received");
     261              :                         }
     262              :                     };
     263              :                 } else {
     264            0 :                     panic!("must specify both --control-plane-uri and --compute-id or none");
     265              :                 }
     266              :             } else {
     267            0 :                 panic!(
     268            0 :                     "compute spec should be provided by one of the following ways: \
     269            0 :                     --spec OR --spec-path OR --control-plane-uri and --compute-id"
     270            0 :                 );
     271              :             }
     272              :         }
     273              :     };
     274              : 
     275            0 :     Ok(CliSpecParams {
     276            0 :         spec,
     277            0 :         live_config_allowed,
     278            0 :     })
     279            0 : }
     280              : 
     281              : struct CliSpecParams {
     282              :     /// If a spec was provided via CLI or file, the [`ComputeSpec`]
     283              :     spec: Option<ComputeSpec>,
     284              :     live_config_allowed: bool,
     285              : }
     286              : 
     287            0 : fn wait_spec(
     288            0 :     build_tag: String,
     289            0 :     ProcessCliResult {
     290            0 :         connstr,
     291            0 :         pgdata,
     292            0 :         pgbin,
     293            0 :         ext_remote_storage,
     294            0 :         resize_swap_on_bind,
     295            0 :         http_port,
     296            0 :         ..
     297            0 :     }: ProcessCliResult,
     298            0 :     CliSpecParams {
     299            0 :         spec,
     300            0 :         live_config_allowed,
     301            0 :     }: CliSpecParams,
     302            0 : ) -> Result<WaitSpecResult> {
     303            0 :     let mut new_state = ComputeState::new();
     304              :     let spec_set;
     305              : 
     306            0 :     if let Some(spec) = spec {
     307            0 :         let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
     308            0 :         info!("new pspec.spec: {:?}", pspec.spec);
     309            0 :         new_state.pspec = Some(pspec);
     310            0 :         spec_set = true;
     311            0 :     } else {
     312            0 :         spec_set = false;
     313            0 :     }
     314            0 :     let compute_node = ComputeNode {
     315            0 :         connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
     316            0 :         pgdata: pgdata.to_string(),
     317            0 :         pgbin: pgbin.to_string(),
     318            0 :         pgversion: get_pg_version(pgbin),
     319            0 :         live_config_allowed,
     320            0 :         state: Mutex::new(new_state),
     321            0 :         state_changed: Condvar::new(),
     322            0 :         ext_remote_storage: ext_remote_storage.map(|s| s.to_string()),
     323            0 :         ext_download_progress: RwLock::new(HashMap::new()),
     324            0 :         build_tag,
     325            0 :     };
     326            0 :     let compute = Arc::new(compute_node);
     327            0 : 
     328            0 :     // If this is a pooled VM, prewarm before starting HTTP server and becoming
     329            0 :     // available for binding. Prewarming helps Postgres start quicker later,
     330            0 :     // because QEMU will already have its memory allocated from the host, and
     331            0 :     // the necessary binaries will already be cached.
     332            0 :     if !spec_set {
     333            0 :         compute.prewarm_postgres()?;
     334            0 :     }
     335              : 
     336              :     // Launch http service first, so that we can serve control-plane requests
     337              :     // while configuration is still in progress.
     338            0 :     let _http_handle =
     339            0 :         launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
     340            0 : 
     341            0 :     if !spec_set {
     342              :         // No spec provided, hang waiting for it.
     343            0 :         info!("no compute spec provided, waiting");
     344              : 
     345            0 :         let mut state = compute.state.lock().unwrap();
     346            0 :         while state.status != ComputeStatus::ConfigurationPending {
     347            0 :             state = compute.state_changed.wait(state).unwrap();
     348            0 : 
     349            0 :             if state.status == ComputeStatus::ConfigurationPending {
     350            0 :                 info!("got spec, continue configuration");
     351              :                 // Spec is already set by the http server handler.
     352            0 :                 break;
     353            0 :             }
     354              :         }
     355              : 
     356              :         // Record for how long we slept waiting for the spec.
     357            0 :         let now = Utc::now();
     358            0 :         state.metrics.wait_for_spec_ms = now
     359            0 :             .signed_duration_since(state.start_time)
     360            0 :             .to_std()
     361            0 :             .unwrap()
     362            0 :             .as_millis() as u64;
     363            0 : 
     364            0 :         // Reset start time, so that the total startup time that is calculated later will
     365            0 :         // not include the time that we waited for the spec.
     366            0 :         state.start_time = now;
     367            0 :     }
     368              : 
     369            0 :     Ok(WaitSpecResult {
     370            0 :         compute,
     371            0 :         http_port,
     372            0 :         resize_swap_on_bind,
     373            0 :     })
     374            0 : }
     375              : 
     376              : struct WaitSpecResult {
     377              :     compute: Arc<ComputeNode>,
     378              :     // passed through from ProcessCliResult
     379              :     http_port: u16,
     380              :     resize_swap_on_bind: bool,
     381              : }
     382              : 
     383            0 : fn start_postgres(
     384            0 :     // need to allow unused because `matches` is only used if target_os = "linux"
     385            0 :     #[allow(unused_variables)] matches: &clap::ArgMatches,
     386            0 :     WaitSpecResult {
     387            0 :         compute,
     388            0 :         http_port,
     389            0 :         resize_swap_on_bind,
     390            0 :     }: WaitSpecResult,
     391            0 : ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
     392            0 :     // We got all we need, update the state.
     393            0 :     let mut state = compute.state.lock().unwrap();
     394            0 :     state.status = ComputeStatus::Init;
     395            0 :     compute.state_changed.notify_all();
     396            0 : 
     397            0 :     info!(
     398            0 :         "running compute with features: {:?}",
     399            0 :         state.pspec.as_ref().unwrap().spec.features
     400              :     );
     401              :     // before we release the mutex, fetch the swap size (if any) for later.
     402            0 :     let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
     403            0 :     drop(state);
     404            0 : 
     405            0 :     // Launch remaining service threads
     406            0 :     let _monitor_handle = launch_monitor(&compute);
     407            0 :     let _configurator_handle = launch_configurator(&compute);
     408            0 : 
     409            0 :     let mut prestartup_failed = false;
     410            0 :     let mut delay_exit = false;
     411              : 
     412              :     // Resize swap to the desired size if the compute spec says so
     413            0 :     if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
     414              :         // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
     415              :         // *before* starting postgres.
     416              :         //
     417              :         // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
     418              :         // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
     419              :         // OOM-killed during startup because swap wasn't available yet.
     420            0 :         match resize_swap(size_bytes) {
     421              :             Ok(()) => {
     422            0 :                 let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
     423            0 :                 info!(%size_bytes, %size_gib, "resized swap");
     424              :             }
     425            0 :             Err(err) => {
     426            0 :                 let err = err.context("failed to resize swap");
     427            0 :                 error!("{err:#}");
     428              : 
     429              :                 // Mark compute startup as failed; don't try to start postgres, and report this
     430              :                 // error to the control plane when it next asks.
     431            0 :                 prestartup_failed = true;
     432            0 :                 let mut state = compute.state.lock().unwrap();
     433            0 :                 state.error = Some(format!("{err:?}"));
     434            0 :                 state.status = ComputeStatus::Failed;
     435            0 :                 compute.state_changed.notify_all();
     436            0 :                 delay_exit = true;
     437              :             }
     438              :         }
     439            0 :     }
     440              : 
     441            0 :     let extension_server_port: u16 = http_port;
     442            0 : 
     443            0 :     // Start Postgres
     444            0 :     let mut pg = None;
     445            0 :     if !prestartup_failed {
     446            0 :         pg = match compute.start_compute(extension_server_port) {
     447            0 :             Ok(pg) => Some(pg),
     448            0 :             Err(err) => {
     449            0 :                 error!("could not start the compute node: {:#}", err);
     450            0 :                 let mut state = compute.state.lock().unwrap();
     451            0 :                 state.error = Some(format!("{:?}", err));
     452            0 :                 state.status = ComputeStatus::Failed;
     453            0 :                 // Notify others that Postgres failed to start. In case of configuring the
     454            0 :                 // empty compute, it's likely that API handler is still waiting for compute
     455            0 :                 // state change. With this we will notify it that compute is in Failed state,
     456            0 :                 // so control plane will know about it earlier and record proper error instead
     457            0 :                 // of timeout.
     458            0 :                 compute.state_changed.notify_all();
     459            0 :                 drop(state); // unlock
     460            0 :                 delay_exit = true;
     461            0 :                 None
     462              :             }
     463              :         };
     464              :     } else {
     465            0 :         warn!("skipping postgres startup because pre-startup step failed");
     466              :     }
     467              : 
     468              :     // Start the vm-monitor if directed to. The vm-monitor only runs on linux
     469              :     // because it requires cgroups.
     470              :     cfg_if::cfg_if! {
     471              :         if #[cfg(target_os = "linux")] {
     472              :             use std::env;
     473              :             use tokio_util::sync::CancellationToken;
     474            0 :             let vm_monitor_addr = matches
     475            0 :                 .get_one::<String>("vm-monitor-addr")
     476            0 :                 .expect("--vm-monitor-addr should always be set because it has a default arg");
     477            0 :             let file_cache_connstr = matches.get_one::<String>("filecache-connstr");
     478            0 :             let cgroup = matches.get_one::<String>("cgroup");
     479              : 
     480              :             // Only make a runtime if we need to.
     481              :             // Note: it seems like you can make a runtime in an inner scope and
     482              :             // if you start a task in it it won't be dropped. However, make it
     483              :             // in the outermost scope just to be safe.
     484            0 :             let rt = if env::var_os("AUTOSCALING").is_some() {
     485            0 :                 Some(
     486            0 :                     tokio::runtime::Builder::new_multi_thread()
     487            0 :                         .worker_threads(4)
     488            0 :                         .enable_all()
     489            0 :                         .build()
     490            0 :                         .expect("failed to create tokio runtime for monitor")
     491            0 :                 )
     492              :             } else {
     493            0 :                 None
     494              :             };
     495              : 
     496              :             // This token is used internally by the monitor to clean up all threads
     497            0 :             let token = CancellationToken::new();
     498            0 : 
     499            0 :             let vm_monitor = rt.as_ref().map(|rt| {
     500            0 :                 rt.spawn(vm_monitor::start(
     501            0 :                     Box::leak(Box::new(vm_monitor::Args {
     502            0 :                         cgroup: cgroup.cloned(),
     503            0 :                         pgconnstr: file_cache_connstr.cloned(),
     504            0 :                         addr: vm_monitor_addr.clone(),
     505            0 :                     })),
     506            0 :                     token.clone(),
     507            0 :                 ))
     508            0 :             });
     509            0 :         }
     510            0 :     }
     511            0 : 
     512            0 :     Ok((
     513            0 :         pg,
     514            0 :         StartPostgresResult {
     515            0 :             delay_exit,
     516            0 :             compute,
     517            0 :             #[cfg(target_os = "linux")]
     518            0 :             rt,
     519            0 :             #[cfg(target_os = "linux")]
     520            0 :             token,
     521            0 :             #[cfg(target_os = "linux")]
     522            0 :             vm_monitor,
     523            0 :         },
     524            0 :     ))
     525            0 : }
     526              : 
     527              : type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
     528              : 
     529              : struct StartPostgresResult {
     530              :     delay_exit: bool,
     531              :     // passed through from WaitSpecResult
     532              :     compute: Arc<ComputeNode>,
     533              : 
     534              :     #[cfg(target_os = "linux")]
     535              :     rt: Option<tokio::runtime::Runtime>,
     536              :     #[cfg(target_os = "linux")]
     537              :     token: tokio_util::sync::CancellationToken,
     538              :     #[cfg(target_os = "linux")]
     539              :     vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
     540              : }
     541              : 
     542            0 : fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
     543            0 :     // Wait for the child Postgres process forever. In this state Ctrl+C will
     544            0 :     // propagate to Postgres and it will be shut down as well.
     545            0 :     let mut exit_code = None;
     546            0 :     if let Some((mut pg, logs_handle)) = pg {
     547            0 :         let ecode = pg
     548            0 :             .wait()
     549            0 :             .expect("failed to start waiting on Postgres process");
     550            0 :         PG_PID.store(0, Ordering::SeqCst);
     551            0 : 
     552            0 :         // Process has exited, so we can join the logs thread.
     553            0 :         let _ = logs_handle
     554            0 :             .join()
     555            0 :             .map_err(|e| tracing::error!("log thread panicked: {:?}", e));
     556            0 : 
     557            0 :         info!("Postgres exited with code {}, shutting down", ecode);
     558            0 :         exit_code = ecode.code()
     559            0 :     }
     560              : 
     561            0 :     Ok(WaitPostgresResult { exit_code })
     562            0 : }
     563              : 
     564              : struct WaitPostgresResult {
     565              :     exit_code: Option<i32>,
     566              : }
     567              : 
     568            0 : fn cleanup_after_postgres_exit(
     569            0 :     StartPostgresResult {
     570            0 :         mut delay_exit,
     571            0 :         compute,
     572            0 :         #[cfg(target_os = "linux")]
     573            0 :         vm_monitor,
     574            0 :         #[cfg(target_os = "linux")]
     575            0 :         token,
     576            0 :         #[cfg(target_os = "linux")]
     577            0 :         rt,
     578            0 :     }: StartPostgresResult,
     579            0 : ) -> Result<bool> {
     580              :     // Terminate the vm_monitor so it releases the file watcher on
     581              :     // /sys/fs/cgroup/neon-postgres.
     582              :     // Note: the vm-monitor only runs on linux because it requires cgroups.
     583              :     cfg_if::cfg_if! {
     584              :         if #[cfg(target_os = "linux")] {
     585            0 :             if let Some(handle) = vm_monitor {
     586            0 :                 // Kills all threads spawned by the monitor
     587            0 :                 token.cancel();
     588            0 :                 // Kills the actual task running the monitor
     589            0 :                 handle.abort();
     590            0 : 
     591            0 :                 // If handle is some, rt must have been used to produce it, and
     592            0 :                 // hence is also some
     593            0 :                 rt.unwrap().shutdown_timeout(Duration::from_secs(2));
     594            0 :             }
     595              :         }
     596              :     }
     597              : 
     598              :     // Maybe sync safekeepers again, to speed up next startup
     599            0 :     let compute_state = compute.state.lock().unwrap().clone();
     600            0 :     let pspec = compute_state.pspec.as_ref().expect("spec must be set");
     601            0 :     if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
     602            0 :         info!("syncing safekeepers on shutdown");
     603            0 :         let storage_auth_token = pspec.storage_auth_token.clone();
     604            0 :         let lsn = compute.sync_safekeepers(storage_auth_token)?;
     605            0 :         info!("synced safekeepers at lsn {lsn}");
     606            0 :     }
     607              : 
     608            0 :     let mut state = compute.state.lock().unwrap();
     609            0 :     if state.status == ComputeStatus::TerminationPending {
     610            0 :         state.status = ComputeStatus::Terminated;
     611            0 :         compute.state_changed.notify_all();
     612            0 :         // we were asked to terminate gracefully, don't exit to avoid restart
     613            0 :         delay_exit = true
     614            0 :     }
     615            0 :     drop(state);
     616              : 
     617            0 :     if let Err(err) = compute.check_for_core_dumps() {
     618            0 :         error!("error while checking for core dumps: {err:?}");
     619            0 :     }
     620              : 
     621            0 :     Ok(delay_exit)
     622            0 : }
     623              : 
     624            0 : fn maybe_delay_exit(delay_exit: bool) {
     625            0 :     // If launch failed, keep serving HTTP requests for a while, so the cloud
     626            0 :     // control plane can get the actual error.
     627            0 :     if delay_exit {
     628            0 :         info!("giving control plane 30s to collect the error before shutdown");
     629            0 :         thread::sleep(Duration::from_secs(30));
     630            0 :     }
     631            0 : }
     632              : 
     633            0 : fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
     634            0 :     // Shutdown trace pipeline gracefully, so that it has a chance to send any
     635            0 :     // pending traces before we exit. Shutting down OTEL tracing provider may
     636            0 :     // hang for quite some time, see, for example:
     637            0 :     // - https://github.com/open-telemetry/opentelemetry-rust/issues/868
     638            0 :     // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636
     639            0 :     //
     640            0 :     // Yet, we want computes to shut down fast enough, as we may need a new one
     641            0 :     // for the same timeline ASAP. So wait no longer than 2s for the shutdown to
     642            0 :     // complete, then just error out and exit the main thread.
     643            0 :     info!("shutting down tracing");
     644            0 :     let (sender, receiver) = mpsc::channel();
     645            0 :     let _ = thread::spawn(move || {
     646            0 :         tracing_utils::shutdown_tracing();
     647            0 :         sender.send(()).ok()
     648            0 :     });
     649            0 :     let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000));
     650            0 :     if shutdown_res.is_err() {
     651            0 :         error!("timed out while shutting down tracing, exiting anyway");
     652            0 :     }
     653              : 
     654            0 :     info!("shutting down");
     655            0 :     exit(exit_code.unwrap_or(1))
     656              : }
     657              : 
     658            2 : fn cli() -> clap::Command {
     659            2 :     // Env variable is set by `cargo`
     660            2 :     let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
     661            2 :     clap::Command::new("compute_ctl")
     662            2 :         .version(version)
     663            2 :         .arg(
     664            2 :             Arg::new("http-port")
     665            2 :                 .long("http-port")
     666            2 :                 .value_name("HTTP_PORT")
     667            2 :                 .default_value("3080")
     668            2 :                 .value_parser(clap::value_parser!(u16))
     669            2 :                 .required(false),
     670            2 :         )
     671            2 :         .arg(
     672            2 :             Arg::new("connstr")
     673            2 :                 .short('C')
     674            2 :                 .long("connstr")
     675            2 :                 .value_name("DATABASE_URL")
     676            2 :                 .required(true),
     677            2 :         )
     678            2 :         .arg(
     679            2 :             Arg::new("pgdata")
     680            2 :                 .short('D')
     681            2 :                 .long("pgdata")
     682            2 :                 .value_name("DATADIR")
     683            2 :                 .required(true),
     684            2 :         )
     685            2 :         .arg(
     686            2 :             Arg::new("pgbin")
     687            2 :                 .short('b')
     688            2 :                 .long("pgbin")
     689            2 :                 .default_value("postgres")
     690            2 :                 .value_name("POSTGRES_PATH"),
     691            2 :         )
     692            2 :         .arg(
     693            2 :             Arg::new("spec")
     694            2 :                 .short('s')
     695            2 :                 .long("spec")
     696            2 :                 .value_name("SPEC_JSON"),
     697            2 :         )
     698            2 :         .arg(
     699            2 :             Arg::new("spec-path")
     700            2 :                 .short('S')
     701            2 :                 .long("spec-path")
     702            2 :                 .value_name("SPEC_PATH"),
     703            2 :         )
     704            2 :         .arg(
     705            2 :             Arg::new("compute-id")
     706            2 :                 .short('i')
     707            2 :                 .long("compute-id")
     708            2 :                 .value_name("COMPUTE_ID"),
     709            2 :         )
     710            2 :         .arg(
     711            2 :             Arg::new("control-plane-uri")
     712            2 :                 .short('p')
     713            2 :                 .long("control-plane-uri")
     714            2 :                 .value_name("CONTROL_PLANE_API_BASE_URI"),
     715            2 :         )
     716            2 :         .arg(
     717            2 :             Arg::new("remote-ext-config")
     718            2 :                 .short('r')
     719            2 :                 .long("remote-ext-config")
     720            2 :                 .value_name("REMOTE_EXT_CONFIG"),
     721            2 :         )
     722            2 :         // TODO(fprasx): we currently have default arguments because the cloud PR
     723            2 :         // to pass them in hasn't been merged yet. We should get rid of them once
     724            2 :         // the PR is merged.
     725            2 :         .arg(
     726            2 :             Arg::new("vm-monitor-addr")
     727            2 :                 .long("vm-monitor-addr")
     728            2 :                 .default_value("0.0.0.0:10301")
     729            2 :                 .value_name("VM_MONITOR_ADDR"),
     730            2 :         )
     731            2 :         .arg(
     732            2 :             Arg::new("cgroup")
     733            2 :                 .long("cgroup")
     734            2 :                 .default_value("neon-postgres")
     735            2 :                 .value_name("CGROUP"),
     736            2 :         )
     737            2 :         .arg(
     738            2 :             Arg::new("filecache-connstr")
     739            2 :                 .long("filecache-connstr")
     740            2 :                 .default_value(
     741            2 :                     "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
     742            2 :                 )
     743            2 :                 .value_name("FILECACHE_CONNSTR"),
     744            2 :         )
     745            2 :         .arg(
     746            2 :             Arg::new("resize-swap-on-bind")
     747            2 :                 .long("resize-swap-on-bind")
     748            2 :                 .action(clap::ArgAction::SetTrue),
     749            2 :         )
     750            2 : }
     751              : 
     752              : /// When compute_ctl is killed, send also termination signal to sync-safekeepers
     753              : /// to prevent leakage. TODO: it is better to convert compute_ctl to async and
     754              : /// wait for termination which would be easy then.
     755            0 : fn handle_exit_signal(sig: i32) {
     756            0 :     info!("received {sig} termination signal");
     757            0 :     forward_termination_signal();
     758            0 :     exit(1);
     759              : }
     760              : 
     761              : #[test]
     762            2 : fn verify_cli() {
     763            2 :     cli().debug_assert()
     764            2 : }

Generated by: LCOV version 2.1-beta