Line data Source code
1 : //! This module is responsible for locating and loading paths in a local setup.
2 : //!
3 : //! Now it also provides init method which acts like a stub for proper installation
4 : //! script which will use local paths.
5 :
6 : use std::collections::HashMap;
7 : use std::net::SocketAddr;
8 : use std::path::{Path, PathBuf};
9 : use std::process::{Command, Stdio};
10 : use std::time::Duration;
11 : use std::{env, fs};
12 :
13 : use anyhow::{Context, bail};
14 : use clap::ValueEnum;
15 : use pageserver_api::config::PostHogConfig;
16 : use pem::Pem;
17 : use postgres_backend::AuthType;
18 : use reqwest::{Certificate, Url};
19 : use safekeeper_api::PgMajorVersion;
20 : use serde::{Deserialize, Serialize};
21 : use utils::auth::encode_from_key_file;
22 : use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
23 :
24 : use crate::broker::StorageBroker;
25 : use crate::endpoint_storage::{
26 : ENDPOINT_STORAGE_DEFAULT_ADDR, ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage,
27 : };
28 : use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
29 : use crate::safekeeper::SafekeeperNode;
30 :
31 : pub const DEFAULT_PG_VERSION: u32 = 17;
32 :
33 : //
34 : // This data structures represents neon_local CLI config
35 : //
36 : // It is deserialized from the .neon/config file, or the config file passed
37 : // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
38 : // an example.
39 : //
40 : #[derive(PartialEq, Eq, Clone, Debug)]
41 : pub struct LocalEnv {
42 : // Base directory for all the nodes (the pageserver, safekeepers and
43 : // compute endpoints).
44 : //
45 : // This is not stored in the config file. Rather, this is the path where the
46 : // config file itself is. It is read from the NEON_REPO_DIR env variable which
47 : // must be an absolute path. If the env var is not set, $PWD/.neon is used.
48 : pub base_data_dir: PathBuf,
49 :
50 : // Path to postgres distribution. It's expected that "bin", "include",
51 : // "lib", "share" from postgres distribution are there. If at some point
52 : // in time we will be able to run against vanilla postgres we may split that
53 : // to four separate paths and match OS-specific installation layout.
54 : pub pg_distrib_dir: PathBuf,
55 :
56 : // Path to pageserver binary.
57 : pub neon_distrib_dir: PathBuf,
58 :
59 : // Default tenant ID to use with the 'neon_local' command line utility, when
60 : // --tenant_id is not explicitly specified.
61 : pub default_tenant_id: Option<TenantId>,
62 :
63 : // used to issue tokens during e.g pg start
64 : pub private_key_path: PathBuf,
65 : /// Path to environment's public key
66 : pub public_key_path: PathBuf,
67 :
68 : pub broker: NeonBroker,
69 :
70 : // Configuration for the storage controller (1 per neon_local environment)
71 : pub storage_controller: NeonStorageControllerConf,
72 :
73 : /// This Vec must always contain at least one pageserver
74 : /// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
75 : /// NB: not used anymore except for informing users that they need to change their `.neon/config`.
76 : pub pageservers: Vec<PageServerConf>,
77 :
78 : pub safekeepers: Vec<SafekeeperConf>,
79 :
80 : pub endpoint_storage: EndpointStorageConf,
81 :
82 : // Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
83 : // be propagated into each pageserver's configuration.
84 : pub control_plane_api: Url,
85 :
86 : // Control plane upcall APIs for storage controller. If set, this will be propagated into the
87 : // storage controller's configuration.
88 : pub control_plane_hooks_api: Option<Url>,
89 :
90 : /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
91 : // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
92 : // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
93 : // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
94 : pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
95 :
96 : /// Flag to generate SSL certificates for components that need it.
97 : /// Also generates root CA certificate that is used to sign all other certificates.
98 : pub generate_local_ssl_certs: bool,
99 : }
100 :
101 : /// On-disk state stored in `.neon/config`.
102 : #[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
103 : #[serde(default, deny_unknown_fields)]
104 : pub struct OnDiskConfig {
105 : pub pg_distrib_dir: PathBuf,
106 : pub neon_distrib_dir: PathBuf,
107 : pub default_tenant_id: Option<TenantId>,
108 : pub private_key_path: PathBuf,
109 : pub public_key_path: PathBuf,
110 : pub broker: NeonBroker,
111 : pub storage_controller: NeonStorageControllerConf,
112 : #[serde(
113 : skip_serializing,
114 : deserialize_with = "fail_if_pageservers_field_specified"
115 : )]
116 : pub pageservers: Vec<PageServerConf>,
117 : pub safekeepers: Vec<SafekeeperConf>,
118 : pub endpoint_storage: EndpointStorageConf,
119 : pub control_plane_api: Option<Url>,
120 : pub control_plane_hooks_api: Option<Url>,
121 : pub control_plane_compute_hook_api: Option<Url>,
122 : branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
123 : // Note: skip serializing because in compat tests old storage controller fails
124 : // to load new config file. May be removed after this field is in release branch.
125 : #[serde(skip_serializing_if = "std::ops::Not::not")]
126 : pub generate_local_ssl_certs: bool,
127 : }
128 :
129 0 : fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
130 0 : where
131 0 : D: serde::Deserializer<'de>,
132 : {
133 0 : Err(serde::de::Error::custom(
134 0 : "The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
135 0 : Please remove the `pageservers` from your .neon/config.",
136 0 : ))
137 0 : }
138 :
139 : /// The description of the neon_local env to be initialized by `neon_local init --config`.
140 0 : #[derive(Clone, Debug, Deserialize)]
141 : #[serde(deny_unknown_fields)]
142 : pub struct NeonLocalInitConf {
143 : // TODO: do we need this? Seems unused
144 : pub pg_distrib_dir: Option<PathBuf>,
145 : // TODO: do we need this? Seems unused
146 : pub neon_distrib_dir: Option<PathBuf>,
147 : pub default_tenant_id: TenantId,
148 : pub broker: NeonBroker,
149 : pub storage_controller: Option<NeonStorageControllerConf>,
150 : pub pageservers: Vec<NeonLocalInitPageserverConf>,
151 : pub safekeepers: Vec<SafekeeperConf>,
152 : pub endpoint_storage: EndpointStorageConf,
153 : pub control_plane_api: Option<Url>,
154 : pub control_plane_hooks_api: Option<Url>,
155 : pub generate_local_ssl_certs: bool,
156 : }
157 :
158 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
159 : #[serde(default)]
160 : pub struct EndpointStorageConf {
161 : pub listen_addr: SocketAddr,
162 : }
163 :
164 : /// Broker config for cluster internal communication.
165 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Default)]
166 : #[serde(default)]
167 : pub struct NeonBroker {
168 : /// Broker listen HTTP address for storage nodes coordination, e.g. '127.0.0.1:50051'.
169 : /// At least one of listen_addr or listen_https_addr must be set.
170 : pub listen_addr: Option<SocketAddr>,
171 : /// Broker listen HTTPS address for storage nodes coordination, e.g. '127.0.0.1:50051'.
172 : /// At least one of listen_addr or listen_https_addr must be set.
173 : /// listen_https_addr is preferred over listen_addr in neon_local.
174 : pub listen_https_addr: Option<SocketAddr>,
175 : }
176 :
177 : /// A part of storage controller's config the neon_local knows about.
178 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
179 : #[serde(default)]
180 : pub struct NeonStorageControllerConf {
181 : /// Heartbeat timeout before marking a node offline
182 : #[serde(with = "humantime_serde")]
183 : pub max_offline: Duration,
184 :
185 : #[serde(with = "humantime_serde")]
186 : pub max_warming_up: Duration,
187 :
188 : pub start_as_candidate: bool,
189 :
190 : /// Database url used when running multiple storage controller instances
191 : pub database_url: Option<SocketAddr>,
192 :
193 : /// Thresholds for auto-splitting a tenant into shards.
194 : pub split_threshold: Option<u64>,
195 : pub max_split_shards: Option<u8>,
196 : pub initial_split_threshold: Option<u64>,
197 : pub initial_split_shards: Option<u8>,
198 :
199 : pub max_secondary_lag_bytes: Option<u64>,
200 :
201 : #[serde(with = "humantime_serde")]
202 : pub heartbeat_interval: Duration,
203 :
204 : #[serde(with = "humantime_serde")]
205 : pub long_reconcile_threshold: Option<Duration>,
206 :
207 : pub use_https_pageserver_api: bool,
208 :
209 : pub timelines_onto_safekeepers: bool,
210 :
211 : pub use_https_safekeeper_api: bool,
212 :
213 : pub use_local_compute_notifications: bool,
214 :
215 : pub timeline_safekeeper_count: Option<usize>,
216 :
217 : pub posthog_config: Option<PostHogConfig>,
218 :
219 : pub kick_secondary_downloads: Option<bool>,
220 : }
221 :
222 : impl NeonStorageControllerConf {
223 : // Use a shorter pageserver unavailability interval than the default to speed up tests.
224 : const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
225 :
226 : const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
227 :
228 : // Very tight heartbeat interval to speed up tests
229 : const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
230 : }
231 :
232 : impl Default for NeonStorageControllerConf {
233 0 : fn default() -> Self {
234 0 : Self {
235 0 : max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
236 0 : max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
237 0 : start_as_candidate: false,
238 0 : database_url: None,
239 0 : split_threshold: None,
240 0 : max_split_shards: None,
241 0 : initial_split_threshold: None,
242 0 : initial_split_shards: None,
243 0 : max_secondary_lag_bytes: None,
244 0 : heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
245 0 : long_reconcile_threshold: None,
246 0 : use_https_pageserver_api: false,
247 0 : timelines_onto_safekeepers: true,
248 0 : use_https_safekeeper_api: false,
249 0 : use_local_compute_notifications: true,
250 0 : timeline_safekeeper_count: None,
251 0 : posthog_config: None,
252 0 : kick_secondary_downloads: None,
253 0 : }
254 0 : }
255 : }
256 :
257 : impl Default for EndpointStorageConf {
258 0 : fn default() -> Self {
259 0 : Self {
260 0 : listen_addr: ENDPOINT_STORAGE_DEFAULT_ADDR,
261 0 : }
262 0 : }
263 : }
264 :
265 : impl NeonBroker {
266 0 : pub fn client_url(&self) -> Url {
267 0 : let url = if let Some(addr) = self.listen_https_addr {
268 0 : format!("https://{addr}")
269 : } else {
270 0 : format!(
271 0 : "http://{}",
272 0 : self.listen_addr
273 0 : .expect("at least one address should be set")
274 : )
275 : };
276 :
277 0 : Url::parse(&url).expect("failed to construct url")
278 0 : }
279 : }
280 :
281 : // neon_local needs to know this subset of pageserver configuration.
282 : // For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
283 : // It can get stale if `pageserver.toml` is changed.
284 : // TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
285 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
286 : #[serde(default, deny_unknown_fields)]
287 : pub struct PageServerConf {
288 : pub id: NodeId,
289 : pub listen_pg_addr: String,
290 : pub listen_http_addr: String,
291 : pub listen_https_addr: Option<String>,
292 : pub listen_grpc_addr: Option<String>,
293 : pub pg_auth_type: AuthType,
294 : pub http_auth_type: AuthType,
295 : pub grpc_auth_type: AuthType,
296 : pub no_sync: bool,
297 : }
298 :
299 : impl Default for PageServerConf {
300 0 : fn default() -> Self {
301 0 : Self {
302 0 : id: NodeId(0),
303 0 : listen_pg_addr: String::new(),
304 0 : listen_http_addr: String::new(),
305 0 : listen_https_addr: None,
306 0 : listen_grpc_addr: None,
307 0 : pg_auth_type: AuthType::Trust,
308 0 : http_auth_type: AuthType::Trust,
309 0 : grpc_auth_type: AuthType::Trust,
310 0 : no_sync: false,
311 0 : }
312 0 : }
313 : }
314 :
315 : /// The toml that can be passed to `neon_local init --config`.
316 : /// This is a subset of the `pageserver.toml` configuration.
317 : // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
318 0 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
319 : pub struct NeonLocalInitPageserverConf {
320 : pub id: NodeId,
321 : pub listen_pg_addr: String,
322 : pub listen_http_addr: String,
323 : pub listen_https_addr: Option<String>,
324 : pub listen_grpc_addr: Option<String>,
325 : pub pg_auth_type: AuthType,
326 : pub http_auth_type: AuthType,
327 : pub grpc_auth_type: AuthType,
328 : #[serde(default, skip_serializing_if = "std::ops::Not::not")]
329 : pub no_sync: bool,
330 : #[serde(flatten)]
331 : pub other: HashMap<String, toml::Value>,
332 : }
333 :
334 : impl From<&NeonLocalInitPageserverConf> for PageServerConf {
335 0 : fn from(conf: &NeonLocalInitPageserverConf) -> Self {
336 : let NeonLocalInitPageserverConf {
337 0 : id,
338 0 : listen_pg_addr,
339 0 : listen_http_addr,
340 0 : listen_https_addr,
341 0 : listen_grpc_addr,
342 0 : pg_auth_type,
343 0 : http_auth_type,
344 0 : grpc_auth_type,
345 0 : no_sync,
346 : other: _,
347 0 : } = conf;
348 0 : Self {
349 0 : id: *id,
350 0 : listen_pg_addr: listen_pg_addr.clone(),
351 0 : listen_http_addr: listen_http_addr.clone(),
352 0 : listen_https_addr: listen_https_addr.clone(),
353 0 : listen_grpc_addr: listen_grpc_addr.clone(),
354 0 : pg_auth_type: *pg_auth_type,
355 0 : grpc_auth_type: *grpc_auth_type,
356 0 : http_auth_type: *http_auth_type,
357 0 : no_sync: *no_sync,
358 0 : }
359 0 : }
360 : }
361 :
362 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
363 : #[serde(default)]
364 : pub struct SafekeeperConf {
365 : pub id: NodeId,
366 : pub pg_port: u16,
367 : pub pg_tenant_only_port: Option<u16>,
368 : pub http_port: u16,
369 : pub https_port: Option<u16>,
370 : pub sync: bool,
371 : pub remote_storage: Option<String>,
372 : pub backup_threads: Option<u32>,
373 : pub auth_enabled: bool,
374 : pub listen_addr: Option<String>,
375 : }
376 :
377 : impl Default for SafekeeperConf {
378 0 : fn default() -> Self {
379 0 : Self {
380 0 : id: NodeId(0),
381 0 : pg_port: 0,
382 0 : pg_tenant_only_port: None,
383 0 : http_port: 0,
384 0 : https_port: None,
385 0 : sync: true,
386 0 : remote_storage: None,
387 0 : backup_threads: None,
388 0 : auth_enabled: false,
389 0 : listen_addr: None,
390 0 : }
391 0 : }
392 : }
393 :
394 : #[derive(Clone, Copy)]
395 : pub enum InitForceMode {
396 : MustNotExist,
397 : EmptyDirOk,
398 : RemoveAllContents,
399 : }
400 :
401 : impl ValueEnum for InitForceMode {
402 0 : fn value_variants<'a>() -> &'a [Self] {
403 0 : &[
404 0 : Self::MustNotExist,
405 0 : Self::EmptyDirOk,
406 0 : Self::RemoveAllContents,
407 0 : ]
408 0 : }
409 :
410 0 : fn to_possible_value(&self) -> Option<clap::builder::PossibleValue> {
411 0 : Some(clap::builder::PossibleValue::new(match self {
412 0 : InitForceMode::MustNotExist => "must-not-exist",
413 0 : InitForceMode::EmptyDirOk => "empty-dir-ok",
414 0 : InitForceMode::RemoveAllContents => "remove-all-contents",
415 : }))
416 0 : }
417 : }
418 :
419 : impl SafekeeperConf {
420 : /// Compute is served by port on which only tenant scoped tokens allowed, if
421 : /// it is configured.
422 0 : pub fn get_compute_port(&self) -> u16 {
423 0 : self.pg_tenant_only_port.unwrap_or(self.pg_port)
424 0 : }
425 : }
426 :
427 : impl LocalEnv {
428 0 : pub fn pg_distrib_dir_raw(&self) -> PathBuf {
429 0 : self.pg_distrib_dir.clone()
430 0 : }
431 :
432 0 : pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
433 0 : let path = self.pg_distrib_dir.clone();
434 :
435 0 : Ok(path.join(pg_version.v_str()))
436 0 : }
437 :
438 0 : pub fn pg_dir(&self, pg_version: PgMajorVersion, dir_name: &str) -> anyhow::Result<PathBuf> {
439 0 : Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
440 0 : }
441 :
442 0 : pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
443 0 : self.pg_dir(pg_version, "bin")
444 0 : }
445 :
446 0 : pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
447 0 : self.pg_dir(pg_version, "lib")
448 0 : }
449 :
450 0 : pub fn endpoint_storage_bin(&self) -> PathBuf {
451 0 : self.neon_distrib_dir.join("endpoint_storage")
452 0 : }
453 :
454 0 : pub fn pageserver_bin(&self) -> PathBuf {
455 0 : self.neon_distrib_dir.join("pageserver")
456 0 : }
457 :
458 0 : pub fn storage_controller_bin(&self) -> PathBuf {
459 : // Irrespective of configuration, storage controller binary is always
460 : // run from the same location as neon_local. This means that for compatibility
461 : // tests that run old pageserver/safekeeper, they still run latest storage controller.
462 0 : let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
463 0 : neon_local_bin_dir.join("storage_controller")
464 0 : }
465 :
466 0 : pub fn safekeeper_bin(&self) -> PathBuf {
467 0 : self.neon_distrib_dir.join("safekeeper")
468 0 : }
469 :
470 0 : pub fn storage_broker_bin(&self) -> PathBuf {
471 0 : self.neon_distrib_dir.join("storage_broker")
472 0 : }
473 :
474 0 : pub fn endpoints_path(&self) -> PathBuf {
475 0 : self.base_data_dir.join("endpoints")
476 0 : }
477 :
478 0 : pub fn storage_broker_data_dir(&self) -> PathBuf {
479 0 : self.base_data_dir.join("storage_broker")
480 0 : }
481 :
482 0 : pub fn pageserver_data_dir(&self, pageserver_id: NodeId) -> PathBuf {
483 0 : self.base_data_dir
484 0 : .join(format!("pageserver_{pageserver_id}"))
485 0 : }
486 :
487 0 : pub fn safekeeper_data_dir(&self, data_dir_name: &str) -> PathBuf {
488 0 : self.base_data_dir.join("safekeepers").join(data_dir_name)
489 0 : }
490 :
491 0 : pub fn endpoint_storage_data_dir(&self) -> PathBuf {
492 0 : self.base_data_dir.join("endpoint_storage")
493 0 : }
494 :
495 0 : pub fn get_pageserver_conf(&self, id: NodeId) -> anyhow::Result<&PageServerConf> {
496 0 : if let Some(conf) = self.pageservers.iter().find(|node| node.id == id) {
497 0 : Ok(conf)
498 : } else {
499 0 : let have_ids = self
500 0 : .pageservers
501 0 : .iter()
502 0 : .map(|node| format!("{}:{}", node.id, node.listen_http_addr))
503 0 : .collect::<Vec<_>>();
504 0 : let joined = have_ids.join(",");
505 0 : bail!("could not find pageserver {id}, have ids {joined}")
506 : }
507 0 : }
508 :
509 0 : pub fn ssl_ca_cert_path(&self) -> Option<PathBuf> {
510 0 : if self.generate_local_ssl_certs {
511 0 : Some(self.base_data_dir.join("rootCA.crt"))
512 : } else {
513 0 : None
514 : }
515 0 : }
516 :
517 0 : pub fn ssl_ca_key_path(&self) -> Option<PathBuf> {
518 0 : if self.generate_local_ssl_certs {
519 0 : Some(self.base_data_dir.join("rootCA.key"))
520 : } else {
521 0 : None
522 : }
523 0 : }
524 :
525 0 : pub fn generate_ssl_ca_cert(&self) -> anyhow::Result<()> {
526 0 : let cert_path = self.ssl_ca_cert_path().unwrap();
527 0 : let key_path = self.ssl_ca_key_path().unwrap();
528 0 : if !fs::exists(cert_path.as_path())? {
529 0 : generate_ssl_ca_cert(cert_path.as_path(), key_path.as_path())?;
530 0 : }
531 0 : Ok(())
532 0 : }
533 :
534 0 : pub fn generate_ssl_cert(&self, cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
535 0 : self.generate_ssl_ca_cert()?;
536 0 : generate_ssl_cert(
537 0 : cert_path,
538 0 : key_path,
539 0 : self.ssl_ca_cert_path().unwrap().as_path(),
540 0 : self.ssl_ca_key_path().unwrap().as_path(),
541 : )
542 0 : }
543 :
544 : /// Creates HTTP client with local SSL CA certificates.
545 0 : pub fn create_http_client(&self) -> reqwest::Client {
546 0 : let ssl_ca_certs = self.ssl_ca_cert_path().map(|ssl_ca_file| {
547 0 : let buf = std::fs::read(ssl_ca_file).expect("SSL CA file should exist");
548 0 : Certificate::from_pem_bundle(&buf).expect("SSL CA file should be valid")
549 0 : });
550 :
551 0 : let mut http_client = reqwest::Client::builder();
552 0 : for ssl_ca_cert in ssl_ca_certs.unwrap_or_default() {
553 0 : http_client = http_client.add_root_certificate(ssl_ca_cert);
554 0 : }
555 :
556 0 : http_client
557 0 : .build()
558 0 : .expect("HTTP client should construct with no error")
559 0 : }
560 :
561 : /// Inspect the base data directory and extract the instance id and instance directory path
562 : /// for all storage controller instances
563 0 : pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
564 0 : let mut instances = Vec::default();
565 :
566 0 : let dir = std::fs::read_dir(self.base_data_dir.clone())?;
567 0 : for dentry in dir {
568 0 : let dentry = dentry?;
569 0 : let is_dir = dentry.metadata()?.is_dir();
570 0 : let filename = dentry.file_name().into_string().unwrap();
571 0 : let parsed_instance_id = match filename.strip_prefix("storage_controller_") {
572 0 : Some(suffix) => suffix.parse::<u8>().ok(),
573 0 : None => None,
574 : };
575 :
576 0 : let is_instance_dir = is_dir && parsed_instance_id.is_some();
577 :
578 0 : if !is_instance_dir {
579 0 : continue;
580 0 : }
581 :
582 0 : instances.push((
583 0 : parsed_instance_id.expect("Checked previously"),
584 0 : dentry.path(),
585 0 : ));
586 : }
587 :
588 0 : Ok(instances)
589 0 : }
590 :
591 0 : pub fn register_branch_mapping(
592 0 : &mut self,
593 0 : branch_name: String,
594 0 : tenant_id: TenantId,
595 0 : timeline_id: TimelineId,
596 0 : ) -> anyhow::Result<()> {
597 0 : let existing_values = self
598 0 : .branch_name_mappings
599 0 : .entry(branch_name.clone())
600 0 : .or_default();
601 :
602 0 : let existing_ids = existing_values
603 0 : .iter()
604 0 : .find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
605 :
606 0 : if let Some((_, old_timeline_id)) = existing_ids {
607 0 : if old_timeline_id == &timeline_id {
608 0 : Ok(())
609 : } else {
610 0 : bail!(
611 0 : "branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}"
612 : );
613 : }
614 : } else {
615 0 : existing_values.push((tenant_id, timeline_id));
616 0 : Ok(())
617 : }
618 0 : }
619 :
620 0 : pub fn get_branch_timeline_id(
621 0 : &self,
622 0 : branch_name: &str,
623 0 : tenant_id: TenantId,
624 0 : ) -> Option<TimelineId> {
625 0 : self.branch_name_mappings
626 0 : .get(branch_name)?
627 0 : .iter()
628 0 : .find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
629 0 : .map(|&(_, timeline_id)| timeline_id)
630 0 : }
631 :
632 0 : pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
633 0 : self.branch_name_mappings
634 0 : .iter()
635 0 : .flat_map(|(name, tenant_timelines)| {
636 0 : tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
637 0 : (TenantTimelineId::new(tenant_id, timeline_id), name.clone())
638 0 : })
639 0 : })
640 0 : .collect()
641 0 : }
642 :
643 : /// Construct `Self` from on-disk state.
644 0 : pub fn load_config(repopath: &Path) -> anyhow::Result<Self> {
645 0 : if !repopath.exists() {
646 0 : bail!(
647 0 : "Neon config is not found in {}. You need to run 'neon_local init' first",
648 0 : repopath.to_str().unwrap()
649 : );
650 0 : }
651 :
652 : // TODO: check that it looks like a neon repository
653 :
654 : // load and parse file
655 0 : let config_file_contents = fs::read_to_string(repopath.join("config"))?;
656 0 : let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
657 0 : let mut env = {
658 : let OnDiskConfig {
659 0 : pg_distrib_dir,
660 0 : neon_distrib_dir,
661 0 : default_tenant_id,
662 0 : private_key_path,
663 0 : public_key_path,
664 0 : broker,
665 0 : storage_controller,
666 0 : pageservers,
667 0 : safekeepers,
668 0 : control_plane_api,
669 0 : control_plane_hooks_api,
670 : control_plane_compute_hook_api: _,
671 0 : branch_name_mappings,
672 0 : generate_local_ssl_certs,
673 0 : endpoint_storage,
674 0 : } = on_disk_config;
675 0 : LocalEnv {
676 0 : base_data_dir: repopath.to_owned(),
677 0 : pg_distrib_dir,
678 0 : neon_distrib_dir,
679 0 : default_tenant_id,
680 0 : private_key_path,
681 0 : public_key_path,
682 0 : broker,
683 0 : storage_controller,
684 0 : pageservers,
685 0 : safekeepers,
686 0 : control_plane_api: control_plane_api.unwrap(),
687 0 : control_plane_hooks_api,
688 0 : branch_name_mappings,
689 0 : generate_local_ssl_certs,
690 0 : endpoint_storage,
691 0 : }
692 : };
693 :
694 : // The source of truth for pageserver configuration is the pageserver.toml.
695 0 : assert!(
696 0 : env.pageservers.is_empty(),
697 0 : "we ensure this during deserialization"
698 : );
699 0 : env.pageservers = {
700 0 : let iter = std::fs::read_dir(repopath).context("open dir")?;
701 0 : let mut pageservers = Vec::new();
702 0 : for res in iter {
703 0 : let dentry = res?;
704 : const PREFIX: &str = "pageserver_";
705 0 : let dentry_name = dentry
706 0 : .file_name()
707 0 : .into_string()
708 0 : .ok()
709 0 : .with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
710 0 : .unwrap();
711 0 : if !dentry_name.starts_with(PREFIX) {
712 0 : continue;
713 0 : }
714 0 : if !dentry.file_type().context("determine file type")?.is_dir() {
715 0 : anyhow::bail!("expected a directory, got {:?}", dentry.path());
716 0 : }
717 0 : let id = dentry_name[PREFIX.len()..]
718 0 : .parse::<NodeId>()
719 0 : .with_context(|| format!("parse id from {:?}", dentry.path()))?;
720 : // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
721 0 : #[derive(serde::Serialize, serde::Deserialize)]
722 : // (allow unknown fields, unlike PageServerConf)
723 : struct PageserverConfigTomlSubset {
724 : listen_pg_addr: String,
725 : listen_http_addr: String,
726 : listen_https_addr: Option<String>,
727 : listen_grpc_addr: Option<String>,
728 : pg_auth_type: AuthType,
729 : http_auth_type: AuthType,
730 : grpc_auth_type: AuthType,
731 : #[serde(default)]
732 : no_sync: bool,
733 : }
734 0 : let config_toml_path = dentry.path().join("pageserver.toml");
735 0 : let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
736 0 : &std::fs::read_to_string(&config_toml_path)
737 0 : .with_context(|| format!("read {config_toml_path:?}"))?,
738 : )
739 0 : .context("parse pageserver.toml")?;
740 0 : let identity_toml_path = dentry.path().join("identity.toml");
741 0 : #[derive(serde::Serialize, serde::Deserialize)]
742 : struct IdentityTomlSubset {
743 : id: NodeId,
744 : }
745 0 : let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
746 0 : &std::fs::read_to_string(&identity_toml_path)
747 0 : .with_context(|| format!("read {identity_toml_path:?}"))?,
748 : )
749 0 : .context("parse identity.toml")?;
750 : let PageserverConfigTomlSubset {
751 0 : listen_pg_addr,
752 0 : listen_http_addr,
753 0 : listen_https_addr,
754 0 : listen_grpc_addr,
755 0 : pg_auth_type,
756 0 : http_auth_type,
757 0 : grpc_auth_type,
758 0 : no_sync,
759 0 : } = config_toml;
760 : let IdentityTomlSubset {
761 0 : id: identity_toml_id,
762 0 : } = identity_toml;
763 0 : let conf = PageServerConf {
764 : id: {
765 0 : anyhow::ensure!(
766 0 : identity_toml_id == id,
767 0 : "id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}",
768 : );
769 0 : id
770 : },
771 0 : listen_pg_addr,
772 0 : listen_http_addr,
773 0 : listen_https_addr,
774 0 : listen_grpc_addr,
775 0 : pg_auth_type,
776 0 : http_auth_type,
777 0 : grpc_auth_type,
778 0 : no_sync,
779 : };
780 0 : pageservers.push(conf);
781 : }
782 0 : pageservers
783 : };
784 :
785 0 : Ok(env)
786 0 : }
787 :
788 0 : pub fn persist_config(&self) -> anyhow::Result<()> {
789 0 : Self::persist_config_impl(
790 0 : &self.base_data_dir,
791 0 : &OnDiskConfig {
792 0 : pg_distrib_dir: self.pg_distrib_dir.clone(),
793 0 : neon_distrib_dir: self.neon_distrib_dir.clone(),
794 0 : default_tenant_id: self.default_tenant_id,
795 0 : private_key_path: self.private_key_path.clone(),
796 0 : public_key_path: self.public_key_path.clone(),
797 0 : broker: self.broker.clone(),
798 0 : storage_controller: self.storage_controller.clone(),
799 0 : pageservers: vec![], // it's skip_serializing anyway
800 0 : safekeepers: self.safekeepers.clone(),
801 0 : control_plane_api: Some(self.control_plane_api.clone()),
802 0 : control_plane_hooks_api: self.control_plane_hooks_api.clone(),
803 0 : control_plane_compute_hook_api: None,
804 0 : branch_name_mappings: self.branch_name_mappings.clone(),
805 0 : generate_local_ssl_certs: self.generate_local_ssl_certs,
806 0 : endpoint_storage: self.endpoint_storage.clone(),
807 0 : },
808 : )
809 0 : }
810 :
811 0 : pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
812 0 : let conf_content = &toml::to_string_pretty(config)?;
813 0 : let target_config_path = base_path.join("config");
814 0 : fs::write(&target_config_path, conf_content).with_context(|| {
815 0 : format!(
816 0 : "Failed to write config file into path '{}'",
817 0 : target_config_path.display()
818 : )
819 0 : })
820 0 : }
821 :
822 : // this function is used only for testing purposes in CLI e g generate tokens during init
823 0 : pub fn generate_auth_token<S: Serialize>(&self, claims: &S) -> anyhow::Result<String> {
824 0 : let key = self.read_private_key()?;
825 0 : encode_from_key_file(claims, &key)
826 0 : }
827 :
828 : /// Get the path to the private key.
829 0 : pub fn get_private_key_path(&self) -> PathBuf {
830 0 : if self.private_key_path.is_absolute() {
831 0 : self.private_key_path.to_path_buf()
832 : } else {
833 0 : self.base_data_dir.join(&self.private_key_path)
834 : }
835 0 : }
836 :
837 : /// Get the path to the public key.
838 0 : pub fn get_public_key_path(&self) -> PathBuf {
839 0 : if self.public_key_path.is_absolute() {
840 0 : self.public_key_path.to_path_buf()
841 : } else {
842 0 : self.base_data_dir.join(&self.public_key_path)
843 : }
844 0 : }
845 :
846 : /// Read the contents of the private key file.
847 0 : pub fn read_private_key(&self) -> anyhow::Result<Pem> {
848 0 : let private_key_path = self.get_private_key_path();
849 0 : let pem = pem::parse(fs::read(private_key_path)?)?;
850 0 : Ok(pem)
851 0 : }
852 :
853 : /// Read the contents of the public key file.
854 0 : pub fn read_public_key(&self) -> anyhow::Result<Pem> {
855 0 : let public_key_path = self.get_public_key_path();
856 0 : let pem = pem::parse(fs::read(public_key_path)?)?;
857 0 : Ok(pem)
858 0 : }
859 :
860 : /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
861 0 : pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
862 0 : let base_path = base_path();
863 0 : assert_ne!(base_path, Path::new(""));
864 0 : let base_path = &base_path;
865 :
866 : // create base_path dir
867 0 : if base_path.exists() {
868 0 : match force {
869 : InitForceMode::MustNotExist => {
870 0 : bail!(
871 0 : "directory '{}' already exists. Perhaps already initialized?",
872 0 : base_path.display()
873 : );
874 : }
875 : InitForceMode::EmptyDirOk => {
876 0 : if let Some(res) = std::fs::read_dir(base_path)?.next() {
877 0 : res.context("check if directory is empty")?;
878 0 : anyhow::bail!("directory not empty: {base_path:?}");
879 0 : }
880 : }
881 : InitForceMode::RemoveAllContents => {
882 0 : println!("removing all contents of '{}'", base_path.display());
883 : // instead of directly calling `remove_dir_all`, we keep the original dir but removing
884 : // all contents inside. This helps if the developer symbol links another directory (i.e.,
885 : // S3 local SSD) to the `.neon` base directory.
886 0 : for entry in std::fs::read_dir(base_path)? {
887 0 : let entry = entry?;
888 0 : let path = entry.path();
889 0 : if path.is_dir() {
890 0 : fs::remove_dir_all(&path)?;
891 : } else {
892 0 : fs::remove_file(&path)?;
893 : }
894 : }
895 : }
896 : }
897 0 : }
898 0 : if !base_path.exists() {
899 0 : fs::create_dir(base_path)?;
900 0 : }
901 :
902 : let NeonLocalInitConf {
903 0 : pg_distrib_dir,
904 0 : neon_distrib_dir,
905 0 : default_tenant_id,
906 0 : broker,
907 0 : storage_controller,
908 0 : pageservers,
909 0 : safekeepers,
910 0 : control_plane_api,
911 0 : generate_local_ssl_certs,
912 0 : control_plane_hooks_api,
913 0 : endpoint_storage,
914 0 : } = conf;
915 :
916 : // Find postgres binaries.
917 : // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
918 : // Note that later in the code we assume, that distrib dirs follow the same pattern
919 : // for all postgres versions.
920 0 : let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
921 0 : if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
922 0 : postgres_bin.into()
923 : } else {
924 0 : let cwd = env::current_dir().unwrap();
925 0 : cwd.join("pg_install")
926 : }
927 0 : });
928 :
929 : // Find neon binaries.
930 0 : let neon_distrib_dir = neon_distrib_dir
931 0 : .unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
932 :
933 : // Generate keypair for JWT.
934 : //
935 : // The keypair is only needed if authentication is enabled in any of the
936 : // components. For convenience, we generate the keypair even if authentication
937 : // is not enabled, so that you can easily enable it after the initialization
938 : // step.
939 0 : generate_auth_keys(
940 0 : base_path.join("auth_private_key.pem").as_path(),
941 0 : base_path.join("auth_public_key.pem").as_path(),
942 : )
943 0 : .context("generate auth keys")?;
944 0 : let private_key_path = PathBuf::from("auth_private_key.pem");
945 0 : let public_key_path = PathBuf::from("auth_public_key.pem");
946 :
947 : // create the runtime type because the remaining initialization code below needs
948 : // a LocalEnv instance op operation
949 : // TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
950 0 : let env = LocalEnv {
951 0 : base_data_dir: base_path.clone(),
952 0 : pg_distrib_dir,
953 0 : neon_distrib_dir,
954 0 : default_tenant_id: Some(default_tenant_id),
955 0 : private_key_path,
956 0 : public_key_path,
957 0 : broker,
958 0 : storage_controller: storage_controller.unwrap_or_default(),
959 0 : pageservers: pageservers.iter().map(Into::into).collect(),
960 0 : safekeepers,
961 0 : control_plane_api: control_plane_api.unwrap(),
962 0 : control_plane_hooks_api,
963 0 : branch_name_mappings: Default::default(),
964 0 : generate_local_ssl_certs,
965 0 : endpoint_storage,
966 0 : };
967 :
968 0 : if generate_local_ssl_certs {
969 0 : env.generate_ssl_ca_cert()?;
970 0 : }
971 :
972 : // create endpoints dir
973 0 : fs::create_dir_all(env.endpoints_path())?;
974 :
975 : // create storage broker dir
976 0 : fs::create_dir_all(env.storage_broker_data_dir())?;
977 0 : StorageBroker::from_env(&env)
978 0 : .initialize()
979 0 : .context("storage broker init failed")?;
980 :
981 : // create safekeeper dirs
982 0 : for safekeeper in &env.safekeepers {
983 0 : fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
984 0 : SafekeeperNode::from_env(&env, safekeeper)
985 0 : .initialize()
986 0 : .context("safekeeper init failed")?;
987 : }
988 :
989 : // initialize pageserver state
990 0 : for (i, ps) in pageservers.into_iter().enumerate() {
991 0 : let runtime_ps = &env.pageservers[i];
992 0 : assert_eq!(&PageServerConf::from(&ps), runtime_ps);
993 0 : fs::create_dir(env.pageserver_data_dir(ps.id))?;
994 0 : PageServerNode::from_env(&env, runtime_ps)
995 0 : .initialize(ps)
996 0 : .context("pageserver init failed")?;
997 : }
998 :
999 0 : EndpointStorage::from_env(&env)
1000 0 : .init()
1001 0 : .context("object storage init failed")?;
1002 :
1003 : // setup remote remote location for default LocalFs remote storage
1004 0 : std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
1005 0 : std::fs::create_dir_all(env.base_data_dir.join(ENDPOINT_STORAGE_REMOTE_STORAGE_DIR))?;
1006 :
1007 0 : env.persist_config()
1008 0 : }
1009 : }
1010 :
1011 0 : pub fn base_path() -> PathBuf {
1012 0 : let path = match std::env::var_os("NEON_REPO_DIR") {
1013 0 : Some(val) => {
1014 0 : let path = PathBuf::from(val);
1015 0 : if !path.is_absolute() {
1016 : // repeat the env var in the error because our default is always absolute
1017 0 : panic!("NEON_REPO_DIR must be an absolute path, got {path:?}");
1018 0 : }
1019 0 : path
1020 : }
1021 : None => {
1022 0 : let pwd = std::env::current_dir()
1023 : // technically this can fail but it's quite unlikeley
1024 0 : .expect("determine current directory");
1025 0 : let pwd_abs = pwd.canonicalize().expect("canonicalize current directory");
1026 0 : pwd_abs.join(".neon")
1027 : }
1028 : };
1029 0 : assert!(path.is_absolute());
1030 0 : path
1031 0 : }
1032 :
1033 : /// Generate a public/private key pair for JWT authentication
1034 0 : fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow::Result<()> {
1035 : // Generate the key pair
1036 : //
1037 : // openssl genpkey -algorithm ed25519 -out auth_private_key.pem
1038 0 : let keygen_output = Command::new("openssl")
1039 0 : .arg("genpkey")
1040 0 : .args(["-algorithm", "ed25519"])
1041 0 : .args(["-out", private_key_path.to_str().unwrap()])
1042 0 : .stdout(Stdio::null())
1043 0 : .output()
1044 0 : .context("failed to generate auth private key")?;
1045 0 : if !keygen_output.status.success() {
1046 0 : bail!(
1047 0 : "openssl failed: '{}'",
1048 0 : String::from_utf8_lossy(&keygen_output.stderr)
1049 : );
1050 0 : }
1051 :
1052 : // Extract the public key from the private key file
1053 : //
1054 : // openssl pkey -in auth_private_key.pem -pubout -out auth_public_key.pem
1055 0 : let keygen_output = Command::new("openssl")
1056 0 : .arg("pkey")
1057 0 : .args(["-in", private_key_path.to_str().unwrap()])
1058 0 : .arg("-pubout")
1059 0 : .args(["-out", public_key_path.to_str().unwrap()])
1060 0 : .output()
1061 0 : .context("failed to extract public key from private key")?;
1062 0 : if !keygen_output.status.success() {
1063 0 : bail!(
1064 0 : "openssl failed: '{}'",
1065 0 : String::from_utf8_lossy(&keygen_output.stderr)
1066 : );
1067 0 : }
1068 :
1069 0 : Ok(())
1070 0 : }
1071 :
1072 0 : fn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> {
1073 : // openssl req -x509 -newkey rsa:2048 -nodes -subj "/CN=Neon Local CA" -days 36500 \
1074 : // -out rootCA.crt -keyout rootCA.key
1075 0 : let keygen_output = Command::new("openssl")
1076 0 : .args([
1077 0 : "req", "-x509", "-newkey", "ed25519", "-nodes", "-days", "36500",
1078 0 : ])
1079 0 : .args(["-subj", "/CN=Neon Local CA"])
1080 0 : .args(["-out", cert_path.to_str().unwrap()])
1081 0 : .args(["-keyout", key_path.to_str().unwrap()])
1082 0 : .output()
1083 0 : .context("failed to generate CA certificate")?;
1084 0 : if !keygen_output.status.success() {
1085 0 : bail!(
1086 0 : "openssl failed: '{}'",
1087 0 : String::from_utf8_lossy(&keygen_output.stderr)
1088 : );
1089 0 : }
1090 0 : Ok(())
1091 0 : }
1092 :
1093 0 : fn generate_ssl_cert(
1094 0 : cert_path: &Path,
1095 0 : key_path: &Path,
1096 0 : ca_cert_path: &Path,
1097 0 : ca_key_path: &Path,
1098 0 : ) -> anyhow::Result<()> {
1099 : // Generate Certificate Signing Request (CSR).
1100 0 : let mut csr_path = cert_path.to_path_buf();
1101 0 : csr_path.set_extension(".csr");
1102 :
1103 : // openssl req -new -nodes -newkey rsa:2048 -keyout server.key -out server.csr \
1104 : // -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
1105 0 : let keygen_output = Command::new("openssl")
1106 0 : .args(["req", "-new", "-nodes"])
1107 0 : .args(["-newkey", "ed25519"])
1108 0 : .args(["-subj", "/CN=localhost"])
1109 0 : .args(["-addext", "subjectAltName=DNS:localhost,IP:127.0.0.1"])
1110 0 : .args(["-keyout", key_path.to_str().unwrap()])
1111 0 : .args(["-out", csr_path.to_str().unwrap()])
1112 0 : .output()
1113 0 : .context("failed to generate CSR")?;
1114 0 : if !keygen_output.status.success() {
1115 0 : bail!(
1116 0 : "openssl failed: '{}'",
1117 0 : String::from_utf8_lossy(&keygen_output.stderr)
1118 : );
1119 0 : }
1120 :
1121 : // Sign CSR with CA key.
1122 : //
1123 : // openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial \
1124 : // -out server.crt -days 36500 -copy_extensions copyall
1125 0 : let keygen_output = Command::new("openssl")
1126 0 : .args(["x509", "-req"])
1127 0 : .args(["-in", csr_path.to_str().unwrap()])
1128 0 : .args(["-CA", ca_cert_path.to_str().unwrap()])
1129 0 : .args(["-CAkey", ca_key_path.to_str().unwrap()])
1130 0 : .arg("-CAcreateserial")
1131 0 : .args(["-out", cert_path.to_str().unwrap()])
1132 0 : .args(["-days", "36500"])
1133 0 : .args(["-copy_extensions", "copyall"])
1134 0 : .output()
1135 0 : .context("failed to sign CSR")?;
1136 0 : if !keygen_output.status.success() {
1137 0 : bail!(
1138 0 : "openssl failed: '{}'",
1139 0 : String::from_utf8_lossy(&keygen_output.stderr)
1140 : );
1141 0 : }
1142 :
1143 : // Remove CSR file as it's not needed anymore.
1144 0 : fs::remove_file(csr_path)?;
1145 :
1146 0 : Ok(())
1147 0 : }
|