LCOV - 727bdccc1d7d53837da843959afb612f56da4e79.info

LCOV - code coverage report

Current view:	top level - control_plane/src - endpoint.rs (source / functions)		Coverage	Total	Hit
Test:	727bdccc1d7d53837da843959afb612f56da4e79.info	Lines:	0.0 %	599	0
Test Date:	2025-01-30 15:18:43	Functions:	0.0 %	45	0

            Line data    Source code

       1              : //! Code to manage compute endpoints
       2              : //!
       3              : //! In the local test environment, the data for each endpoint is stored in
       4              : //!
       5              : //! ```text
       6              : //!   .neon/endpoints/<endpoint id>
       7              : //! ```
       8              : //!
       9              : //! Some basic information about the endpoint, like the tenant and timeline IDs,
      10              : //! are stored in the `endpoint.json` file. The `endpoint.json` file is created
      11              : //! when the endpoint is created, and doesn't change afterwards.
      12              : //!
      13              : //! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
      14              : //! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
      15              : //! the basebackup from the pageserver to initialize the data directory, and
      16              : //! finally launches the PostgreSQL process. It watches the PostgreSQL process
      17              : //! until it exits.
      18              : //!
      19              : //! When an endpoint is created, a `postgresql.conf` file is also created in
      20              : //! the endpoint's directory. The file can be modified before starting PostgreSQL.
      21              : //! However, the `postgresql.conf` file in the endpoint directory is not used directly
      22              : //! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
      23              : //! copy of it in the data directory.
      24              : //!
      25              : //! Directory contents:
      26              : //!
      27              : //! ```text
      28              : //! .neon/endpoints/main/
      29              : //!     compute.log               - log output of `compute_ctl` and `postgres`
      30              : //!     endpoint.json             - serialized `EndpointConf` struct
      31              : //!     postgresql.conf           - postgresql settings
      32              : //!     spec.json                 - passed to `compute_ctl`
      33              : //!     pgdata/
      34              : //!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
      35              : //!         zenith.signal
      36              : //!         <other PostgreSQL files>
      37              : //! ```
      38              : //!
      39              : use std::collections::BTreeMap;
      40              : use std::net::SocketAddr;
      41              : use std::net::TcpStream;
      42              : use std::path::PathBuf;
      43              : use std::process::Command;
      44              : use std::str::FromStr;
      45              : use std::sync::Arc;
      46              : use std::time::Duration;
      47              : 
      48              : use anyhow::{anyhow, bail, Context, Result};
      49              : use compute_api::spec::Database;
      50              : use compute_api::spec::PgIdent;
      51              : use compute_api::spec::RemoteExtSpec;
      52              : use compute_api::spec::Role;
      53              : use nix::sys::signal::kill;
      54              : use nix::sys::signal::Signal;
      55              : use pageserver_api::shard::ShardStripeSize;
      56              : use reqwest::header::CONTENT_TYPE;
      57              : use serde::{Deserialize, Serialize};
      58              : use url::Host;
      59              : use utils::id::{NodeId, TenantId, TimelineId};
      60              : 
      61              : use crate::local_env::LocalEnv;
      62              : use crate::postgresql_conf::PostgresConf;
      63              : use crate::storage_controller::StorageController;
      64              : 
      65              : use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
      66              : use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
      67              : 
      68              : // contents of a endpoint.json file
      69            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
      70              : pub struct EndpointConf {
      71              :     endpoint_id: String,
      72              :     tenant_id: TenantId,
      73              :     timeline_id: TimelineId,
      74              :     mode: ComputeMode,
      75              :     pg_port: u16,
      76              :     http_port: u16,
      77              :     pg_version: u32,
      78              :     skip_pg_catalog_updates: bool,
      79              :     drop_subscriptions_before_start: bool,
      80              :     features: Vec<ComputeFeature>,
      81              : }
      82              : 
      83              : //
      84              : // ComputeControlPlane
      85              : //
      86              : pub struct ComputeControlPlane {
      87              :     base_port: u16,
      88              : 
      89              :     // endpoint ID is the key
      90              :     pub endpoints: BTreeMap<String, Arc<Endpoint>>,
      91              : 
      92              :     env: LocalEnv,
      93              : }
      94              : 
      95              : impl ComputeControlPlane {
      96              :     // Load current endpoints from the endpoints/ subdirectories
      97            0 :     pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
      98            0 :         let mut endpoints = BTreeMap::default();
      99            0 :         for endpoint_dir in std::fs::read_dir(env.endpoints_path())
     100            0 :             .with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
     101              :         {
     102            0 :             let ep_res = Endpoint::from_dir_entry(endpoint_dir?, &env);
     103            0 :             let ep = match ep_res {
     104            0 :                 Ok(ep) => ep,
     105            0 :                 Err(e) => match e.downcast::<std::io::Error>() {
     106            0 :                     Ok(e) => {
     107            0 :                         // A parallel task could delete an endpoint while we have just scanned the directory
     108            0 :                         if e.kind() == std::io::ErrorKind::NotFound {
     109            0 :                             continue;
     110              :                         } else {
     111            0 :                             Err(e)?
     112              :                         }
     113              :                     }
     114            0 :                     Err(e) => Err(e)?,
     115              :                 },
     116              :             };
     117            0 :             endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
     118              :         }
     119              : 
     120            0 :         Ok(ComputeControlPlane {
     121            0 :             base_port: 55431,
     122            0 :             endpoints,
     123            0 :             env,
     124            0 :         })
     125            0 :     }
     126              : 
     127            0 :     fn get_port(&mut self) -> u16 {
     128            0 :         1 + self
     129            0 :             .endpoints
     130            0 :             .values()
     131            0 :             .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
     132            0 :             .max()
     133            0 :             .unwrap_or(self.base_port)
     134            0 :     }
     135              : 
     136              :     #[allow(clippy::too_many_arguments)]
     137            0 :     pub fn new_endpoint(
     138            0 :         &mut self,
     139            0 :         endpoint_id: &str,
     140            0 :         tenant_id: TenantId,
     141            0 :         timeline_id: TimelineId,
     142            0 :         pg_port: Option<u16>,
     143            0 :         http_port: Option<u16>,
     144            0 :         pg_version: u32,
     145            0 :         mode: ComputeMode,
     146            0 :         skip_pg_catalog_updates: bool,
     147            0 :         drop_subscriptions_before_start: bool,
     148            0 :     ) -> Result<Arc<Endpoint>> {
     149            0 :         let pg_port = pg_port.unwrap_or_else(|| self.get_port());
     150            0 :         let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
     151            0 :         let ep = Arc::new(Endpoint {
     152            0 :             endpoint_id: endpoint_id.to_owned(),
     153            0 :             pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
     154            0 :             http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
     155            0 :             env: self.env.clone(),
     156            0 :             timeline_id,
     157            0 :             mode,
     158            0 :             tenant_id,
     159            0 :             pg_version,
     160            0 :             // We don't setup roles and databases in the spec locally, so we don't need to
     161            0 :             // do catalog updates. Catalog updates also include check availability
     162            0 :             // data creation. Yet, we have tests that check that size and db dump
     163            0 :             // before and after start are the same. So, skip catalog updates,
     164            0 :             // with this we basically test a case of waking up an idle compute, where
     165            0 :             // we also skip catalog updates in the cloud.
     166            0 :             skip_pg_catalog_updates,
     167            0 :             drop_subscriptions_before_start,
     168            0 :             features: vec![],
     169            0 :         });
     170            0 : 
     171            0 :         ep.create_endpoint_dir()?;
     172              :         std::fs::write(
     173            0 :             ep.endpoint_path().join("endpoint.json"),
     174            0 :             serde_json::to_string_pretty(&EndpointConf {
     175            0 :                 endpoint_id: endpoint_id.to_string(),
     176            0 :                 tenant_id,
     177            0 :                 timeline_id,
     178            0 :                 mode,
     179            0 :                 http_port,
     180            0 :                 pg_port,
     181            0 :                 pg_version,
     182            0 :                 skip_pg_catalog_updates,
     183            0 :                 drop_subscriptions_before_start,
     184            0 :                 features: vec![],
     185            0 :             })?,
     186            0 :         )?;
     187              :         std::fs::write(
     188            0 :             ep.endpoint_path().join("postgresql.conf"),
     189            0 :             ep.setup_pg_conf()?.to_string(),
     190            0 :         )?;
     191              : 
     192            0 :         self.endpoints
     193            0 :             .insert(ep.endpoint_id.clone(), Arc::clone(&ep));
     194            0 : 
     195            0 :         Ok(ep)
     196            0 :     }
     197              : 
     198            0 :     pub fn check_conflicting_endpoints(
     199            0 :         &self,
     200            0 :         mode: ComputeMode,
     201            0 :         tenant_id: TenantId,
     202            0 :         timeline_id: TimelineId,
     203            0 :     ) -> Result<()> {
     204            0 :         if matches!(mode, ComputeMode::Primary) {
     205              :             // this check is not complete, as you could have a concurrent attempt at
     206              :             // creating another primary, both reading the state before checking it here,
     207              :             // but it's better than nothing.
     208            0 :             let mut duplicates = self.endpoints.iter().filter(|(_k, v)| {
     209            0 :                 v.tenant_id == tenant_id
     210            0 :                     && v.timeline_id == timeline_id
     211            0 :                     && v.mode == mode
     212            0 :                     && v.status() != EndpointStatus::Stopped
     213            0 :             });
     214              : 
     215            0 :             if let Some((key, _)) = duplicates.next() {
     216            0 :                 bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
     217            0 :             }
     218            0 :         }
     219            0 :         Ok(())
     220            0 :     }
     221              : }
     222              : 
     223              : ///////////////////////////////////////////////////////////////////////////////
     224              : 
     225              : #[derive(Debug)]
     226              : pub struct Endpoint {
     227              :     /// used as the directory name
     228              :     endpoint_id: String,
     229              :     pub tenant_id: TenantId,
     230              :     pub timeline_id: TimelineId,
     231              :     pub mode: ComputeMode,
     232              : 
     233              :     // port and address of the Postgres server and `compute_ctl`'s HTTP API
     234              :     pub pg_address: SocketAddr,
     235              :     pub http_address: SocketAddr,
     236              : 
     237              :     // postgres major version in the format: 14, 15, etc.
     238              :     pg_version: u32,
     239              : 
     240              :     // These are not part of the endpoint as such, but the environment
     241              :     // the endpoint runs in.
     242              :     pub env: LocalEnv,
     243              : 
     244              :     // Optimizations
     245              :     skip_pg_catalog_updates: bool,
     246              : 
     247              :     drop_subscriptions_before_start: bool,
     248              :     // Feature flags
     249              :     features: Vec<ComputeFeature>,
     250              : }
     251              : 
     252              : #[derive(PartialEq, Eq)]
     253              : pub enum EndpointStatus {
     254              :     Running,
     255              :     Stopped,
     256              :     Crashed,
     257              :     RunningNoPidfile,
     258              : }
     259              : 
     260              : impl std::fmt::Display for EndpointStatus {
     261            0 :     fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
     262            0 :         let s = match self {
     263            0 :             Self::Running => "running",
     264            0 :             Self::Stopped => "stopped",
     265            0 :             Self::Crashed => "crashed",
     266            0 :             Self::RunningNoPidfile => "running, no pidfile",
     267              :         };
     268            0 :         write!(writer, "{}", s)
     269            0 :     }
     270              : }
     271              : 
     272              : impl Endpoint {
     273            0 :     fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
     274            0 :         if !entry.file_type()?.is_dir() {
     275            0 :             anyhow::bail!(
     276            0 :                 "Endpoint::from_dir_entry failed: '{}' is not a directory",
     277            0 :                 entry.path().display()
     278            0 :             );
     279            0 :         }
     280            0 : 
     281            0 :         // parse data directory name
     282            0 :         let fname = entry.file_name();
     283            0 :         let endpoint_id = fname.to_str().unwrap().to_string();
     284              : 
     285              :         // Read the endpoint.json file
     286            0 :         let conf: EndpointConf =
     287            0 :             serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
     288              : 
     289            0 :         Ok(Endpoint {
     290            0 :             pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
     291            0 :             http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
     292            0 :             endpoint_id,
     293            0 :             env: env.clone(),
     294            0 :             timeline_id: conf.timeline_id,
     295            0 :             mode: conf.mode,
     296            0 :             tenant_id: conf.tenant_id,
     297            0 :             pg_version: conf.pg_version,
     298            0 :             skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
     299            0 :             drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
     300            0 :             features: conf.features,
     301            0 :         })
     302            0 :     }
     303              : 
     304            0 :     fn create_endpoint_dir(&self) -> Result<()> {
     305            0 :         std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
     306            0 :             format!(
     307            0 :                 "could not create endpoint directory {}",
     308            0 :                 self.endpoint_path().display()
     309            0 :             )
     310            0 :         })
     311            0 :     }
     312              : 
     313              :     // Generate postgresql.conf with default configuration
     314            0 :     fn setup_pg_conf(&self) -> Result<PostgresConf> {
     315            0 :         let mut conf = PostgresConf::new();
     316            0 :         conf.append("max_wal_senders", "10");
     317            0 :         conf.append("wal_log_hints", "off");
     318            0 :         conf.append("max_replication_slots", "10");
     319            0 :         conf.append("hot_standby", "on");
     320            0 :         // Set to 1MB to both exercise getPage requests/LFC, and still have enough room for
     321            0 :         // Postgres to operate. Everything smaller might be not enough for Postgres under load,
     322            0 :         // and can cause errors like 'no unpinned buffers available', see
     323            0 :         // <https://github.com/neondatabase/neon/issues/9956>
     324            0 :         conf.append("shared_buffers", "1MB");
     325            0 :         // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
     326            0 :         // batching logic.  Set this to 2 so that we exercise the code a bit without letting
     327            0 :         // individual tests do a lot of concurrent work on underpowered test machines
     328            0 :         conf.append("effective_io_concurrency", "2");
     329            0 :         conf.append("fsync", "off");
     330            0 :         conf.append("max_connections", "100");
     331            0 :         conf.append("wal_level", "logical");
     332            0 :         // wal_sender_timeout is the maximum time to wait for WAL replication.
     333            0 :         // It also defines how often the walreciever will send a feedback message to the wal sender.
     334            0 :         conf.append("wal_sender_timeout", "5s");
     335            0 :         conf.append("listen_addresses", &self.pg_address.ip().to_string());
     336            0 :         conf.append("port", &self.pg_address.port().to_string());
     337            0 :         conf.append("wal_keep_size", "0");
     338            0 :         // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
     339            0 :         conf.append("restart_after_crash", "off");
     340            0 : 
     341            0 :         // Load the 'neon' extension
     342            0 :         conf.append("shared_preload_libraries", "neon");
     343            0 : 
     344            0 :         conf.append_line("");
     345            0 :         // Replication-related configurations, such as WAL sending
     346            0 :         match &self.mode {
     347              :             ComputeMode::Primary => {
     348              :                 // Configure backpressure
     349              :                 // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
     350              :                 //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
     351              :                 //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
     352              :                 //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
     353              :                 //   updates pages are not requested from pageserver.
     354              :                 // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
     355              :                 //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
     356              :                 //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
     357              :                 //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
     358              :                 // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
     359              :                 //   To be able to restore database in case of pageserver node crash, safekeeper should not
     360              :                 //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
     361              :                 //   (if they are not able to upload WAL to S3).
     362            0 :                 conf.append("max_replication_write_lag", "15MB");
     363            0 :                 conf.append("max_replication_flush_lag", "10GB");
     364            0 : 
     365            0 :                 if !self.env.safekeepers.is_empty() {
     366            0 :                     // Configure Postgres to connect to the safekeepers
     367            0 :                     conf.append("synchronous_standby_names", "walproposer");
     368            0 : 
     369            0 :                     let safekeepers = self
     370            0 :                         .env
     371            0 :                         .safekeepers
     372            0 :                         .iter()
     373            0 :                         .map(|sk| format!("localhost:{}", sk.get_compute_port()))
     374            0 :                         .collect::<Vec<String>>()
     375            0 :                         .join(",");
     376            0 :                     conf.append("neon.safekeepers", &safekeepers);
     377            0 :                 } else {
     378            0 :                     // We only use setup without safekeepers for tests,
     379            0 :                     // and don't care about data durability on pageserver,
     380            0 :                     // so set more relaxed synchronous_commit.
     381            0 :                     conf.append("synchronous_commit", "remote_write");
     382            0 : 
     383            0 :                     // Configure the node to stream WAL directly to the pageserver
     384            0 :                     // This isn't really a supported configuration, but can be useful for
     385            0 :                     // testing.
     386            0 :                     conf.append("synchronous_standby_names", "pageserver");
     387            0 :                 }
     388              :             }
     389            0 :             ComputeMode::Static(lsn) => {
     390            0 :                 conf.append("recovery_target_lsn", &lsn.to_string());
     391            0 :             }
     392              :             ComputeMode::Replica => {
     393            0 :                 assert!(!self.env.safekeepers.is_empty());
     394              : 
     395              :                 // TODO: use future host field from safekeeper spec
     396              :                 // Pass the list of safekeepers to the replica so that it can connect to any of them,
     397              :                 // whichever is available.
     398            0 :                 let sk_ports = self
     399            0 :                     .env
     400            0 :                     .safekeepers
     401            0 :                     .iter()
     402            0 :                     .map(|x| x.get_compute_port().to_string())
     403            0 :                     .collect::<Vec<_>>()
     404            0 :                     .join(",");
     405            0 :                 let sk_hosts = vec!["localhost"; self.env.safekeepers.len()].join(",");
     406            0 : 
     407            0 :                 let connstr = format!(
     408            0 :                     "host={} port={} options='-c timeline_id={} tenant_id={}' application_name=replica replication=true",
     409            0 :                     sk_hosts,
     410            0 :                     sk_ports,
     411            0 :                     &self.timeline_id.to_string(),
     412            0 :                     &self.tenant_id.to_string(),
     413            0 :                 );
     414            0 : 
     415            0 :                 let slot_name = format!("repl_{}_", self.timeline_id);
     416            0 :                 conf.append("primary_conninfo", connstr.as_str());
     417            0 :                 conf.append("primary_slot_name", slot_name.as_str());
     418            0 :                 conf.append("hot_standby", "on");
     419            0 :                 // prefetching of blocks referenced in WAL doesn't make sense for us
     420            0 :                 // Neon hot standby ignores pages that are not in the shared_buffers
     421            0 :                 if self.pg_version >= 15 {
     422            0 :                     conf.append("recovery_prefetch", "off");
     423            0 :                 }
     424              :             }
     425              :         }
     426              : 
     427            0 :         Ok(conf)
     428            0 :     }
     429              : 
     430            0 :     pub fn endpoint_path(&self) -> PathBuf {
     431            0 :         self.env.endpoints_path().join(&self.endpoint_id)
     432            0 :     }
     433              : 
     434            0 :     pub fn pgdata(&self) -> PathBuf {
     435            0 :         self.endpoint_path().join("pgdata")
     436            0 :     }
     437              : 
     438            0 :     pub fn status(&self) -> EndpointStatus {
     439            0 :         let timeout = Duration::from_millis(300);
     440            0 :         let has_pidfile = self.pgdata().join("postmaster.pid").exists();
     441            0 :         let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();
     442            0 : 
     443            0 :         match (has_pidfile, can_connect) {
     444            0 :             (true, true) => EndpointStatus::Running,
     445            0 :             (false, false) => EndpointStatus::Stopped,
     446            0 :             (true, false) => EndpointStatus::Crashed,
     447            0 :             (false, true) => EndpointStatus::RunningNoPidfile,
     448              :         }
     449            0 :     }
     450              : 
     451            0 :     fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
     452            0 :         let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
     453            0 :         let mut cmd = Command::new(&pg_ctl_path);
     454            0 :         cmd.args(
     455            0 :             [
     456            0 :                 &[
     457            0 :                     "-D",
     458            0 :                     self.pgdata().to_str().unwrap(),
     459            0 :                     "-w", //wait till pg_ctl actually does what was asked
     460            0 :                 ],
     461            0 :                 args,
     462            0 :             ]
     463            0 :             .concat(),
     464            0 :         )
     465            0 :         .env_clear()
     466            0 :         .env(
     467            0 :             "LD_LIBRARY_PATH",
     468            0 :             self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
     469            0 :         )
     470            0 :         .env(
     471            0 :             "DYLD_LIBRARY_PATH",
     472            0 :             self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
     473              :         );
     474              : 
     475              :         // Pass authentication token used for the connections to pageserver and safekeepers
     476            0 :         if let Some(token) = auth_token {
     477            0 :             cmd.env("NEON_AUTH_TOKEN", token);
     478            0 :         }
     479              : 
     480            0 :         let pg_ctl = cmd
     481            0 :             .output()
     482            0 :             .context(format!("{} failed", pg_ctl_path.display()))?;
     483            0 :         if !pg_ctl.status.success() {
     484            0 :             anyhow::bail!(
     485            0 :                 "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
     486            0 :                 pg_ctl.status,
     487            0 :                 String::from_utf8_lossy(&pg_ctl.stdout),
     488            0 :                 String::from_utf8_lossy(&pg_ctl.stderr),
     489            0 :             );
     490            0 :         }
     491            0 : 
     492            0 :         Ok(())
     493            0 :     }
     494              : 
     495            0 :     fn wait_for_compute_ctl_to_exit(&self, send_sigterm: bool) -> Result<()> {
     496            0 :         // TODO use background_process::stop_process instead: https://github.com/neondatabase/neon/pull/6482
     497            0 :         let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
     498            0 :         let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;
     499            0 :         let pid = nix::unistd::Pid::from_raw(pid as i32);
     500            0 :         if send_sigterm {
     501            0 :             kill(pid, Signal::SIGTERM).ok();
     502            0 :         }
     503            0 :         crate::background_process::wait_until_stopped("compute_ctl", pid)?;
     504            0 :         Ok(())
     505            0 :     }
     506              : 
     507            0 :     fn read_postgresql_conf(&self) -> Result<String> {
     508            0 :         // Slurp the endpoints/<endpoint id>/postgresql.conf file into
     509            0 :         // memory. We will include it in the spec file that we pass to
     510            0 :         // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
     511            0 :         // in the data directory.
     512            0 :         let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
     513            0 :         match std::fs::read(&postgresql_conf_path) {
     514            0 :             Ok(content) => Ok(String::from_utf8(content)?),
     515            0 :             Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok("".to_string()),
     516            0 :             Err(e) => Err(anyhow::Error::new(e).context(format!(
     517            0 :                 "failed to read config file in {}",
     518            0 :                 postgresql_conf_path.to_str().unwrap()
     519            0 :             ))),
     520              :         }
     521            0 :     }
     522              : 
     523            0 :     fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
     524            0 :         pageservers
     525            0 :             .iter()
     526            0 :             .map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
     527            0 :             .collect::<Vec<_>>()
     528            0 :             .join(",")
     529            0 :     }
     530              : 
     531              :     /// Map safekeepers ids to the actual connection strings.
     532            0 :     fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
     533            0 :         let mut safekeeper_connstrings = Vec::new();
     534            0 :         if self.mode == ComputeMode::Primary {
     535            0 :             for sk_id in sk_ids {
     536            0 :                 let sk = self
     537            0 :                     .env
     538            0 :                     .safekeepers
     539            0 :                     .iter()
     540            0 :                     .find(|node| node.id == sk_id)
     541            0 :                     .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
     542            0 :                 safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
     543              :             }
     544            0 :         }
     545            0 :         Ok(safekeeper_connstrings)
     546            0 :     }
     547              : 
     548            0 :     pub async fn start(
     549            0 :         &self,
     550            0 :         auth_token: &Option<String>,
     551            0 :         safekeepers: Vec<NodeId>,
     552            0 :         pageservers: Vec<(Host, u16)>,
     553            0 :         remote_ext_config: Option<&String>,
     554            0 :         shard_stripe_size: usize,
     555            0 :         create_test_user: bool,
     556            0 :     ) -> Result<()> {
     557            0 :         if self.status() == EndpointStatus::Running {
     558            0 :             anyhow::bail!("The endpoint is already running");
     559            0 :         }
     560              : 
     561            0 :         let postgresql_conf = self.read_postgresql_conf()?;
     562              : 
     563              :         // We always start the compute node from scratch, so if the Postgres
     564              :         // data dir exists from a previous launch, remove it first.
     565            0 :         if self.pgdata().exists() {
     566            0 :             std::fs::remove_dir_all(self.pgdata())?;
     567            0 :         }
     568              : 
     569            0 :         let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
     570            0 :         assert!(!pageserver_connstring.is_empty());
     571              : 
     572            0 :         let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
     573              : 
     574              :         // check for file remote_extensions_spec.json
     575              :         // if it is present, read it and pass to compute_ctl
     576            0 :         let remote_extensions_spec_path = self.endpoint_path().join("remote_extensions_spec.json");
     577            0 :         let remote_extensions_spec = std::fs::File::open(remote_extensions_spec_path);
     578              :         let remote_extensions: Option<RemoteExtSpec>;
     579              : 
     580            0 :         if let Ok(spec_file) = remote_extensions_spec {
     581            0 :             remote_extensions = serde_json::from_reader(spec_file).ok();
     582            0 :         } else {
     583            0 :             remote_extensions = None;
     584            0 :         };
     585              : 
     586              :         // Create spec file
     587            0 :         let spec = ComputeSpec {
     588            0 :             skip_pg_catalog_updates: self.skip_pg_catalog_updates,
     589            0 :             format_version: 1.0,
     590            0 :             operation_uuid: None,
     591            0 :             features: self.features.clone(),
     592            0 :             swap_size_bytes: None,
     593            0 :             disk_quota_bytes: None,
     594            0 :             disable_lfc_resizing: None,
     595            0 :             cluster: Cluster {
     596            0 :                 cluster_id: None, // project ID: not used
     597            0 :                 name: None,       // project name: not used
     598            0 :                 state: None,
     599            0 :                 roles: if create_test_user {
     600            0 :                     vec![Role {
     601            0 :                         name: PgIdent::from_str("test").unwrap(),
     602            0 :                         encrypted_password: None,
     603            0 :                         options: None,
     604            0 :                     }]
     605              :                 } else {
     606            0 :                     Vec::new()
     607              :                 },
     608            0 :                 databases: if create_test_user {
     609            0 :                     vec![Database {
     610            0 :                         name: PgIdent::from_str("neondb").unwrap(),
     611            0 :                         owner: PgIdent::from_str("test").unwrap(),
     612            0 :                         options: None,
     613            0 :                         restrict_conn: false,
     614            0 :                         invalid: false,
     615            0 :                     }]
     616              :                 } else {
     617            0 :                     Vec::new()
     618              :                 },
     619            0 :                 settings: None,
     620            0 :                 postgresql_conf: Some(postgresql_conf),
     621            0 :             },
     622            0 :             delta_operations: None,
     623            0 :             tenant_id: Some(self.tenant_id),
     624            0 :             timeline_id: Some(self.timeline_id),
     625            0 :             mode: self.mode,
     626            0 :             pageserver_connstring: Some(pageserver_connstring),
     627            0 :             safekeeper_connstrings,
     628            0 :             storage_auth_token: auth_token.clone(),
     629            0 :             remote_extensions,
     630            0 :             pgbouncer_settings: None,
     631            0 :             shard_stripe_size: Some(shard_stripe_size),
     632            0 :             local_proxy_config: None,
     633            0 :             reconfigure_concurrency: 1,
     634            0 :             drop_subscriptions_before_start: self.drop_subscriptions_before_start,
     635            0 :         };
     636            0 :         let spec_path = self.endpoint_path().join("spec.json");
     637            0 :         std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
     638              : 
     639              :         // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
     640            0 :         let logfile = std::fs::OpenOptions::new()
     641            0 :             .create(true)
     642            0 :             .append(true)
     643            0 :             .open(self.endpoint_path().join("compute.log"))?;
     644              : 
     645              :         // Launch compute_ctl
     646            0 :         let conn_str = self.connstr("cloud_admin", "postgres");
     647            0 :         println!("Starting postgres node at '{}'", conn_str);
     648            0 :         if create_test_user {
     649            0 :             let conn_str = self.connstr("test", "neondb");
     650            0 :             println!("Also at '{}'", conn_str);
     651            0 :         }
     652            0 :         let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
     653            0 :         cmd.args(["--http-port", &self.http_address.port().to_string()])
     654            0 :             .args(["--pgdata", self.pgdata().to_str().unwrap()])
     655            0 :             .args(["--connstr", &conn_str])
     656            0 :             .args([
     657            0 :                 "--spec-path",
     658            0 :                 self.endpoint_path().join("spec.json").to_str().unwrap(),
     659            0 :             ])
     660            0 :             .args([
     661            0 :                 "--pgbin",
     662            0 :                 self.env
     663            0 :                     .pg_bin_dir(self.pg_version)?
     664            0 :                     .join("postgres")
     665            0 :                     .to_str()
     666            0 :                     .unwrap(),
     667            0 :             ])
     668            0 :             .stdin(std::process::Stdio::null())
     669            0 :             .stderr(logfile.try_clone()?)
     670            0 :             .stdout(logfile);
     671              : 
     672            0 :         if let Some(remote_ext_config) = remote_ext_config {
     673            0 :             cmd.args(["--remote-ext-config", remote_ext_config]);
     674            0 :         }
     675              : 
     676            0 :         let child = cmd.spawn()?;
     677              :         // set up a scopeguard to kill & wait for the child in case we panic or bail below
     678            0 :         let child = scopeguard::guard(child, |mut child| {
     679            0 :             println!("SIGKILL & wait the started process");
     680            0 :             (|| {
     681            0 :                 // TODO: use another signal that can be caught by the child so it can clean up any children it spawned
     682            0 :                 child.kill().context("SIGKILL child")?;
     683            0 :                 child.wait().context("wait() for child process")?;
     684            0 :                 anyhow::Ok(())
     685            0 :             })()
     686            0 :             .with_context(|| format!("scopeguard kill&wait child {child:?}"))
     687            0 :             .unwrap();
     688            0 :         });
     689            0 : 
     690            0 :         // Write down the pid so we can wait for it when we want to stop
     691            0 :         // TODO use background_process::start_process instead: https://github.com/neondatabase/neon/pull/6482
     692            0 :         let pid = child.id();
     693            0 :         let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
     694            0 :         std::fs::write(pidfile_path, pid.to_string())?;
     695              : 
     696              :         // Wait for it to start
     697            0 :         let mut attempt = 0;
     698              :         const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
     699              :         const MAX_ATTEMPTS: u32 = 10 * 90; // Wait up to 1.5 min
     700              :         loop {
     701            0 :             attempt += 1;
     702            0 :             match self.get_status().await {
     703            0 :                 Ok(state) => {
     704            0 :                     match state.status {
     705              :                         ComputeStatus::Init => {
     706            0 :                             if attempt == MAX_ATTEMPTS {
     707            0 :                                 bail!("compute startup timed out; still in Init state");
     708            0 :                             }
     709              :                             // keep retrying
     710              :                         }
     711              :                         ComputeStatus::Running => {
     712              :                             // All good!
     713            0 :                             break;
     714              :                         }
     715              :                         ComputeStatus::Failed => {
     716            0 :                             bail!(
     717            0 :                                 "compute startup failed: {}",
     718            0 :                                 state
     719            0 :                                     .error
     720            0 :                                     .as_deref()
     721            0 :                                     .unwrap_or("<no error from compute_ctl>")
     722            0 :                             );
     723              :                         }
     724              :                         ComputeStatus::Empty
     725              :                         | ComputeStatus::ConfigurationPending
     726              :                         | ComputeStatus::Configuration
     727              :                         | ComputeStatus::TerminationPending
     728              :                         | ComputeStatus::Terminated => {
     729            0 :                             bail!("unexpected compute status: {:?}", state.status)
     730              :                         }
     731              :                     }
     732              :                 }
     733            0 :                 Err(e) => {
     734            0 :                     if attempt == MAX_ATTEMPTS {
     735            0 :                         return Err(e).context("timed out waiting to connect to compute_ctl HTTP");
     736            0 :                     }
     737              :                 }
     738              :             }
     739            0 :             tokio::time::sleep(ATTEMPT_INTERVAL).await;
     740              :         }
     741              : 
     742              :         // disarm the scopeguard, let the child outlive this function (and neon_local invoction)
     743            0 :         drop(scopeguard::ScopeGuard::into_inner(child));
     744            0 : 
     745            0 :         Ok(())
     746            0 :     }
     747              : 
     748              :     // Call the /status HTTP API
     749            0 :     pub async fn get_status(&self) -> Result<ComputeStatusResponse> {
     750            0 :         let client = reqwest::Client::new();
     751              : 
     752            0 :         let response = client
     753            0 :             .request(
     754            0 :                 reqwest::Method::GET,
     755            0 :                 format!(
     756            0 :                     "http://{}:{}/status",
     757            0 :                     self.http_address.ip(),
     758            0 :                     self.http_address.port()
     759            0 :                 ),
     760            0 :             )
     761            0 :             .send()
     762            0 :             .await?;
     763              : 
     764              :         // Interpret the response
     765            0 :         let status = response.status();
     766            0 :         if !(status.is_client_error() || status.is_server_error()) {
     767            0 :             Ok(response.json().await?)
     768              :         } else {
     769              :             // reqwest does not export its error construction utility functions, so let's craft the message ourselves
     770            0 :             let url = response.url().to_owned();
     771            0 :             let msg = match response.text().await {
     772            0 :                 Ok(err_body) => format!("Error: {}", err_body),
     773            0 :                 Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
     774              :             };
     775            0 :             Err(anyhow::anyhow!(msg))
     776              :         }
     777            0 :     }
     778              : 
     779            0 :     pub async fn reconfigure(
     780            0 :         &self,
     781            0 :         mut pageservers: Vec<(Host, u16)>,
     782            0 :         stripe_size: Option<ShardStripeSize>,
     783            0 :         safekeepers: Option<Vec<NodeId>>,
     784            0 :     ) -> Result<()> {
     785            0 :         let mut spec: ComputeSpec = {
     786            0 :             let spec_path = self.endpoint_path().join("spec.json");
     787            0 :             let file = std::fs::File::open(spec_path)?;
     788            0 :             serde_json::from_reader(file)?
     789              :         };
     790              : 
     791            0 :         let postgresql_conf = self.read_postgresql_conf()?;
     792            0 :         spec.cluster.postgresql_conf = Some(postgresql_conf);
     793            0 : 
     794            0 :         // If we weren't given explicit pageservers, query the storage controller
     795            0 :         if pageservers.is_empty() {
     796            0 :             let storage_controller = StorageController::from_env(&self.env);
     797            0 :             let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
     798            0 :             pageservers = locate_result
     799            0 :                 .shards
     800            0 :                 .into_iter()
     801            0 :                 .map(|shard| {
     802            0 :                     (
     803            0 :                         Host::parse(&shard.listen_pg_addr)
     804            0 :                             .expect("Storage controller reported bad hostname"),
     805            0 :                         shard.listen_pg_port,
     806            0 :                     )
     807            0 :                 })
     808            0 :                 .collect::<Vec<_>>();
     809            0 :         }
     810              : 
     811            0 :         let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
     812            0 :         assert!(!pageserver_connstr.is_empty());
     813            0 :         spec.pageserver_connstring = Some(pageserver_connstr);
     814            0 :         if stripe_size.is_some() {
     815            0 :             spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
     816            0 :         }
     817              : 
     818              :         // If safekeepers are not specified, don't change them.
     819            0 :         if let Some(safekeepers) = safekeepers {
     820            0 :             let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
     821            0 :             spec.safekeeper_connstrings = safekeeper_connstrings;
     822            0 :         }
     823              : 
     824            0 :         let client = reqwest::Client::builder()
     825            0 :             .timeout(Duration::from_secs(120))
     826            0 :             .build()
     827            0 :             .unwrap();
     828            0 :         let response = client
     829            0 :             .post(format!(
     830            0 :                 "http://{}:{}/configure",
     831            0 :                 self.http_address.ip(),
     832            0 :                 self.http_address.port()
     833            0 :             ))
     834            0 :             .header(CONTENT_TYPE.as_str(), "application/json")
     835            0 :             .body(format!(
     836            0 :                 "{{\"spec\":{}}}",
     837            0 :                 serde_json::to_string_pretty(&spec)?
     838              :             ))
     839            0 :             .send()
     840            0 :             .await?;
     841              : 
     842            0 :         let status = response.status();
     843            0 :         if !(status.is_client_error() || status.is_server_error()) {
     844            0 :             Ok(())
     845              :         } else {
     846            0 :             let url = response.url().to_owned();
     847            0 :             let msg = match response.text().await {
     848            0 :                 Ok(err_body) => format!("Error: {}", err_body),
     849            0 :                 Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
     850              :             };
     851            0 :             Err(anyhow::anyhow!(msg))
     852              :         }
     853            0 :     }
     854              : 
     855            0 :     pub fn stop(&self, mode: &str, destroy: bool) -> Result<()> {
     856            0 :         self.pg_ctl(&["-m", mode, "stop"], &None)?;
     857              : 
     858              :         // Also wait for the compute_ctl process to die. It might have some
     859              :         // cleanup work to do after postgres stops, like syncing safekeepers,
     860              :         // etc.
     861              :         //
     862              :         // If destroying or stop mode is immediate, send it SIGTERM before
     863              :         // waiting. Sometimes we do *not* want this cleanup: tests intentionally
     864              :         // do stop when majority of safekeepers is down, so sync-safekeepers
     865              :         // would hang otherwise. This could be a separate flag though.
     866            0 :         let send_sigterm = destroy || mode == "immediate";
     867            0 :         self.wait_for_compute_ctl_to_exit(send_sigterm)?;
     868            0 :         if destroy {
     869            0 :             println!(
     870            0 :                 "Destroying postgres data directory '{}'",
     871            0 :                 self.pgdata().to_str().unwrap()
     872            0 :             );
     873            0 :             std::fs::remove_dir_all(self.endpoint_path())?;
     874            0 :         }
     875            0 :         Ok(())
     876            0 :     }
     877              : 
     878            0 :     pub fn connstr(&self, user: &str, db_name: &str) -> String {
     879            0 :         format!(
     880            0 :             "postgresql://{}@{}:{}/{}",
     881            0 :             user,
     882            0 :             self.pg_address.ip(),
     883            0 :             self.pg_address.port(),
     884            0 :             db_name
     885            0 :         )
     886            0 :     }
     887              : }

Generated by: LCOV version 2.1-beta