Line data Source code
1 : //
2 : // Main entry point for the safekeeper executable
3 : //
4 : use anyhow::{bail, Context, Result};
5 : use camino::{Utf8Path, Utf8PathBuf};
6 : use clap::{ArgAction, Parser};
7 : use futures::future::BoxFuture;
8 : use futures::stream::FuturesUnordered;
9 : use futures::{FutureExt, StreamExt};
10 : use remote_storage::RemoteStorageConfig;
11 : use sd_notify::NotifyState;
12 : use tokio::runtime::Handle;
13 : use tokio::signal::unix::{signal, SignalKind};
14 : use tokio::task::JoinError;
15 : use toml_edit::Document;
16 :
17 : use std::fs::{self, File};
18 : use std::io::{ErrorKind, Write};
19 : use std::str::FromStr;
20 : use std::sync::Arc;
21 : use std::time::Duration;
22 : use storage_broker::Uri;
23 : use tokio::sync::mpsc;
24 :
25 : use tracing::*;
26 : use utils::pid_file;
27 :
28 : use metrics::set_build_info_metric;
29 : use safekeeper::defaults::{
30 : DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES,
31 : DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR,
32 : };
33 : use safekeeper::wal_service;
34 : use safekeeper::GlobalTimelines;
35 : use safekeeper::SafeKeeperConf;
36 : use safekeeper::{broker, WAL_SERVICE_RUNTIME};
37 : use safekeeper::{control_file, BROKER_RUNTIME};
38 : use safekeeper::{http, WAL_REMOVER_RUNTIME};
39 : use safekeeper::{remove_wal, WAL_BACKUP_RUNTIME};
40 : use safekeeper::{wal_backup, HTTP_RUNTIME};
41 : use storage_broker::DEFAULT_ENDPOINT;
42 : use utils::auth::{JwtAuth, Scope, SwappableJwtAuth};
43 : use utils::{
44 : id::NodeId,
45 : logging::{self, LogFormat},
46 : project_build_tag, project_git_version,
47 : sentry_init::init_sentry,
48 : tcp_listener,
49 : };
50 :
51 : const PID_FILE_NAME: &str = "safekeeper.pid";
52 : const ID_FILE_NAME: &str = "safekeeper.id";
53 :
54 : project_git_version!(GIT_VERSION);
55 : project_build_tag!(BUILD_TAG);
56 :
57 : const FEATURES: &[&str] = &[
58 : #[cfg(feature = "testing")]
59 : "testing",
60 : ];
61 :
62 0 : fn version() -> String {
63 0 : format!(
64 0 : "{GIT_VERSION} failpoints: {}, features: {:?}",
65 0 : fail::has_failpoints(),
66 0 : FEATURES,
67 0 : )
68 0 : }
69 :
70 : const ABOUT: &str = r#"
71 : A fleet of safekeepers is responsible for reliably storing WAL received from
72 : compute, passing it through consensus (mitigating potential computes brain
73 : split), and serving the hardened part further downstream to pageserver(s).
74 : "#;
75 :
76 2 : #[derive(Parser)]
77 : #[command(name = "Neon safekeeper", version = GIT_VERSION, about = ABOUT, long_about = None)]
78 : struct Args {
79 : /// Path to the safekeeper data directory.
80 : #[arg(short = 'D', long, default_value = "./")]
81 0 : datadir: Utf8PathBuf,
82 : /// Safekeeper node id.
83 : #[arg(long)]
84 : id: Option<u64>,
85 : /// Initialize safekeeper with given id and exit.
86 : #[arg(long)]
87 0 : init: bool,
88 : /// Listen endpoint for receiving/sending WAL in the form host:port.
89 : #[arg(short, long, default_value = DEFAULT_PG_LISTEN_ADDR)]
90 0 : listen_pg: String,
91 : /// Listen endpoint for receiving/sending WAL in the form host:port allowing
92 : /// only tenant scoped auth tokens. Pointless if auth is disabled.
93 : #[arg(long, default_value = None, verbatim_doc_comment)]
94 : listen_pg_tenant_only: Option<String>,
95 : /// Listen http endpoint for management and metrics in the form host:port.
96 : #[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)]
97 0 : listen_http: String,
98 : /// Advertised endpoint for receiving/sending WAL in the form host:port. If not
99 : /// specified, listen_pg is used to advertise instead.
100 : #[arg(long, default_value = None)]
101 : advertise_pg: Option<String>,
102 : /// Availability zone of the safekeeper.
103 : #[arg(long)]
104 : availability_zone: Option<String>,
105 : /// Do not wait for changes to be written safely to disk. Unsafe.
106 : #[arg(short, long)]
107 0 : no_sync: bool,
108 : /// Dump control file at path specified by this argument and exit.
109 : #[arg(long)]
110 : dump_control_file: Option<Utf8PathBuf>,
111 : /// Broker endpoint for storage nodes coordination in the form
112 : /// http[s]://host:port. In case of https schema TLS is connection is
113 : /// established; plaintext otherwise.
114 : #[arg(long, default_value = DEFAULT_ENDPOINT, verbatim_doc_comment)]
115 0 : broker_endpoint: Uri,
116 : /// Broker keepalive interval.
117 : #[arg(long, value_parser= humantime::parse_duration, default_value = storage_broker::DEFAULT_KEEPALIVE_INTERVAL)]
118 0 : broker_keepalive_interval: Duration,
119 : /// Peer safekeeper is considered dead after not receiving heartbeats from
120 : /// it during this period passed as a human readable duration.
121 : #[arg(long, value_parser= humantime::parse_duration, default_value = DEFAULT_HEARTBEAT_TIMEOUT, verbatim_doc_comment)]
122 0 : heartbeat_timeout: Duration,
123 : /// Enable/disable peer recovery.
124 : #[arg(long, default_value = "false", action=ArgAction::Set)]
125 0 : peer_recovery: bool,
126 : /// Remote storage configuration for WAL backup (offloading to s3) as TOML
127 : /// inline table, e.g.
128 : /// {"max_concurrent_syncs" = 17, "max_sync_errors": 13, "bucket_name": "<BUCKETNAME>", "bucket_region":"<REGION>", "concurrency_limit": 119}
129 : /// Safekeeper offloads WAL to
130 : /// [prefix_in_bucket/]<tenant_id>/<timeline_id>/<segment_file>, mirroring
131 : /// structure on the file system.
132 : #[arg(long, value_parser = parse_remote_storage, verbatim_doc_comment)]
133 : remote_storage: Option<RemoteStorageConfig>,
134 : /// Safekeeper won't be elected for WAL offloading if it is lagging for more than this value in bytes
135 2 : #[arg(long, default_value_t = DEFAULT_MAX_OFFLOADER_LAG_BYTES)]
136 0 : max_offloader_lag: u64,
137 : /// Number of max parallel WAL segments to be offloaded to remote storage.
138 : #[arg(long, default_value = "5")]
139 0 : wal_backup_parallel_jobs: usize,
140 : /// Disable WAL backup to s3. When disabled, safekeeper removes WAL ignoring
141 : /// WAL backup horizon.
142 : #[arg(long)]
143 0 : disable_wal_backup: bool,
144 : /// If given, enables auth on incoming connections to WAL service endpoint
145 : /// (--listen-pg). Value specifies path to a .pem public key used for
146 : /// validations of JWT tokens. Empty string is allowed and means disabling
147 : /// auth.
148 : #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]
149 : pg_auth_public_key_path: Option<Utf8PathBuf>,
150 : /// If given, enables auth on incoming connections to tenant only WAL
151 : /// service endpoint (--listen-pg-tenant-only). Value specifies path to a
152 : /// .pem public key used for validations of JWT tokens. Empty string is
153 : /// allowed and means disabling auth.
154 : #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]
155 : pg_tenant_only_auth_public_key_path: Option<Utf8PathBuf>,
156 : /// If given, enables auth on incoming connections to http management
157 : /// service endpoint (--listen-http). Value specifies path to a .pem public
158 : /// key used for validations of JWT tokens. Empty string is allowed and
159 : /// means disabling auth.
160 : #[arg(long, verbatim_doc_comment, value_parser = opt_pathbuf_parser)]
161 : http_auth_public_key_path: Option<Utf8PathBuf>,
162 : /// Format for logging, either 'plain' or 'json'.
163 : #[arg(long, default_value = "plain")]
164 0 : log_format: String,
165 : /// Run everything in single threaded current thread runtime, might be
166 : /// useful for debugging.
167 : #[arg(long)]
168 0 : current_thread_runtime: bool,
169 : /// Keep horizon for walsenders, i.e. don't remove WAL segments that are
170 : /// still needed for existing replication connection.
171 : #[arg(long)]
172 0 : walsenders_keep_horizon: bool,
173 : /// Enable partial backup. If disabled, safekeeper will not upload partial
174 : /// segments to remote storage.
175 : #[arg(long)]
176 0 : partial_backup_enabled: bool,
177 : /// Controls how long backup will wait until uploading the partial segment.
178 : #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_PARTIAL_BACKUP_TIMEOUT, verbatim_doc_comment)]
179 0 : partial_backup_timeout: Duration,
180 : }
181 :
182 : // Like PathBufValueParser, but allows empty string.
183 0 : fn opt_pathbuf_parser(s: &str) -> Result<Utf8PathBuf, String> {
184 0 : Ok(Utf8PathBuf::from_str(s).unwrap())
185 0 : }
186 :
187 : #[tokio::main(flavor = "current_thread")]
188 0 : async fn main() -> anyhow::Result<()> {
189 0 : // We want to allow multiple occurences of the same arg (taking the last) so
190 0 : // that neon_local could generate command with defaults + overrides without
191 0 : // getting 'argument cannot be used multiple times' error. This seems to be
192 0 : // impossible with pure Derive API, so convert struct to Command, modify it,
193 0 : // parse arguments, and then fill the struct back.
194 0 : let cmd = <Args as clap::CommandFactory>::command()
195 0 : .args_override_self(true)
196 0 : .version(version());
197 0 : let mut matches = cmd.get_matches();
198 0 : let mut args = <Args as clap::FromArgMatches>::from_arg_matches_mut(&mut matches)?;
199 0 :
200 0 : // I failed to modify opt_pathbuf_parser to return Option<PathBuf> in
201 0 : // reasonable time, so turn empty string into option post factum.
202 0 : if let Some(pb) = &args.pg_auth_public_key_path {
203 0 : if pb.as_os_str().is_empty() {
204 0 : args.pg_auth_public_key_path = None;
205 0 : }
206 0 : }
207 0 : if let Some(pb) = &args.pg_tenant_only_auth_public_key_path {
208 0 : if pb.as_os_str().is_empty() {
209 0 : args.pg_tenant_only_auth_public_key_path = None;
210 0 : }
211 0 : }
212 0 : if let Some(pb) = &args.http_auth_public_key_path {
213 0 : if pb.as_os_str().is_empty() {
214 0 : args.http_auth_public_key_path = None;
215 0 : }
216 0 : }
217 0 :
218 0 : if let Some(addr) = args.dump_control_file {
219 0 : let state = control_file::FileStorage::load_control_file(addr)?;
220 0 : let json = serde_json::to_string(&state)?;
221 0 : print!("{json}");
222 0 : return Ok(());
223 0 : }
224 0 :
225 0 : // important to keep the order of:
226 0 : // 1. init logging
227 0 : // 2. tracing panic hook
228 0 : // 3. sentry
229 0 : logging::init(
230 0 : LogFormat::from_config(&args.log_format)?,
231 0 : logging::TracingErrorLayerEnablement::Disabled,
232 0 : logging::Output::Stdout,
233 0 : )?;
234 0 : logging::replace_panic_hook_with_tracing_panic_hook().forget();
235 0 : info!("version: {GIT_VERSION}");
236 0 : info!("buld_tag: {BUILD_TAG}");
237 0 :
238 0 : let args_workdir = &args.datadir;
239 0 : let workdir = args_workdir.canonicalize_utf8().with_context(|| {
240 0 : format!("Failed to get the absolute path for input workdir {args_workdir:?}")
241 0 : })?;
242 0 :
243 0 : // Change into the data directory.
244 0 : std::env::set_current_dir(&workdir)?;
245 0 :
246 0 : // Set or read our ID.
247 0 : let id = set_id(&workdir, args.id.map(NodeId))?;
248 0 : if args.init {
249 0 : return Ok(());
250 0 : }
251 0 :
252 0 : let pg_auth = match args.pg_auth_public_key_path.as_ref() {
253 0 : None => {
254 0 : info!("pg auth is disabled");
255 0 : None
256 0 : }
257 0 : Some(path) => {
258 0 : info!("loading pg auth JWT key from {path}");
259 0 : Some(Arc::new(
260 0 : JwtAuth::from_key_path(path).context("failed to load the auth key")?,
261 0 : ))
262 0 : }
263 0 : };
264 0 : let pg_tenant_only_auth = match args.pg_tenant_only_auth_public_key_path.as_ref() {
265 0 : None => {
266 0 : info!("pg tenant only auth is disabled");
267 0 : None
268 0 : }
269 0 : Some(path) => {
270 0 : info!("loading pg tenant only auth JWT key from {path}");
271 0 : Some(Arc::new(
272 0 : JwtAuth::from_key_path(path).context("failed to load the auth key")?,
273 0 : ))
274 0 : }
275 0 : };
276 0 : let http_auth = match args.http_auth_public_key_path.as_ref() {
277 0 : None => {
278 0 : info!("http auth is disabled");
279 0 : None
280 0 : }
281 0 : Some(path) => {
282 0 : info!("loading http auth JWT key(s) from {path}");
283 0 : let jwt_auth = JwtAuth::from_key_path(path).context("failed to load the auth key")?;
284 0 : Some(Arc::new(SwappableJwtAuth::new(jwt_auth)))
285 0 : }
286 0 : };
287 0 :
288 0 : let conf = SafeKeeperConf {
289 0 : workdir,
290 0 : my_id: id,
291 0 : listen_pg_addr: args.listen_pg,
292 0 : listen_pg_addr_tenant_only: args.listen_pg_tenant_only,
293 0 : listen_http_addr: args.listen_http,
294 0 : advertise_pg_addr: args.advertise_pg,
295 0 : availability_zone: args.availability_zone,
296 0 : no_sync: args.no_sync,
297 0 : broker_endpoint: args.broker_endpoint,
298 0 : broker_keepalive_interval: args.broker_keepalive_interval,
299 0 : heartbeat_timeout: args.heartbeat_timeout,
300 0 : peer_recovery_enabled: args.peer_recovery,
301 0 : remote_storage: args.remote_storage,
302 0 : max_offloader_lag_bytes: args.max_offloader_lag,
303 0 : wal_backup_enabled: !args.disable_wal_backup,
304 0 : backup_parallel_jobs: args.wal_backup_parallel_jobs,
305 0 : pg_auth,
306 0 : pg_tenant_only_auth,
307 0 : http_auth,
308 0 : current_thread_runtime: args.current_thread_runtime,
309 0 : walsenders_keep_horizon: args.walsenders_keep_horizon,
310 0 : partial_backup_enabled: args.partial_backup_enabled,
311 0 : partial_backup_timeout: args.partial_backup_timeout,
312 0 : };
313 0 :
314 0 : // initialize sentry if SENTRY_DSN is provided
315 0 : let _sentry_guard = init_sentry(
316 0 : Some(GIT_VERSION.into()),
317 0 : &[("node_id", &conf.my_id.to_string())],
318 0 : );
319 0 : start_safekeeper(conf).await
320 0 : }
321 :
322 : /// Result of joining any of main tasks: upper error means task failed to
323 : /// complete, e.g. panicked, inner is error produced by task itself.
324 : type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
325 :
326 0 : async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
327 0 : // Prevent running multiple safekeepers on the same directory
328 0 : let lock_file_path = conf.workdir.join(PID_FILE_NAME);
329 0 : let lock_file =
330 0 : pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
331 0 : info!("claimed pid file at {lock_file_path:?}");
332 :
333 : // ensure that the lock file is held even if the main thread of the process is panics
334 : // we need to release the lock file only when the current process is gone
335 0 : std::mem::forget(lock_file);
336 0 :
337 0 : info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
338 0 : let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
339 0 : error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
340 0 : e
341 0 : })?;
342 :
343 0 : let pg_listener_tenant_only =
344 0 : if let Some(listen_pg_addr_tenant_only) = &conf.listen_pg_addr_tenant_only {
345 0 : info!(
346 0 : "starting safekeeper tenant scoped WAL service on {}",
347 0 : listen_pg_addr_tenant_only
348 0 : );
349 0 : let listener = tcp_listener::bind(listen_pg_addr_tenant_only.clone()).map_err(|e| {
350 0 : error!(
351 0 : "failed to bind to address {}: {}",
352 0 : listen_pg_addr_tenant_only, e
353 0 : );
354 0 : e
355 0 : })?;
356 0 : Some(listener)
357 : } else {
358 0 : None
359 : };
360 :
361 0 : info!(
362 0 : "starting safekeeper HTTP service on {}",
363 0 : conf.listen_http_addr
364 0 : );
365 0 : let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
366 0 : error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
367 0 : e
368 0 : })?;
369 :
370 : // Register metrics collector for active timelines. It's important to do this
371 : // after daemonizing, otherwise process collector will be upset.
372 0 : let timeline_collector = safekeeper::metrics::TimelineCollector::new();
373 0 : metrics::register_internal(Box::new(timeline_collector))?;
374 :
375 0 : let (wal_backup_launcher_tx, wal_backup_launcher_rx) = mpsc::channel(100);
376 0 :
377 0 : wal_backup::init_remote_storage(&conf);
378 0 :
379 0 : // Keep handles to main tasks to die if any of them disappears.
380 0 : let mut tasks_handles: FuturesUnordered<BoxFuture<(String, JoinTaskRes)>> =
381 0 : FuturesUnordered::new();
382 0 :
383 0 : // Start wal backup launcher before loading timelines as we'll notify it
384 0 : // through the channel about timelines which need offloading, not draining
385 0 : // the channel would cause deadlock.
386 0 : let current_thread_rt = conf
387 0 : .current_thread_runtime
388 0 : .then(|| Handle::try_current().expect("no runtime in main"));
389 0 : let conf_ = conf.clone();
390 0 : let wal_backup_handle = current_thread_rt
391 0 : .as_ref()
392 0 : .unwrap_or_else(|| WAL_BACKUP_RUNTIME.handle())
393 0 : .spawn(wal_backup::wal_backup_launcher_task_main(
394 0 : conf_,
395 0 : wal_backup_launcher_rx,
396 0 : ))
397 0 : .map(|res| ("WAL backup launcher".to_owned(), res));
398 0 : tasks_handles.push(Box::pin(wal_backup_handle));
399 0 :
400 0 : // Load all timelines from disk to memory.
401 0 : GlobalTimelines::init(conf.clone(), wal_backup_launcher_tx).await?;
402 :
403 0 : let conf_ = conf.clone();
404 0 : // Run everything in current thread rt, if asked.
405 0 : if conf.current_thread_runtime {
406 0 : info!("running in current thread runtime");
407 0 : }
408 :
409 0 : let wal_service_handle = current_thread_rt
410 0 : .as_ref()
411 0 : .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())
412 0 : .spawn(wal_service::task_main(
413 0 : conf_,
414 0 : pg_listener,
415 0 : Scope::SafekeeperData,
416 0 : ))
417 0 : // wrap with task name for error reporting
418 0 : .map(|res| ("WAL service main".to_owned(), res));
419 0 : tasks_handles.push(Box::pin(wal_service_handle));
420 :
421 0 : if let Some(pg_listener_tenant_only) = pg_listener_tenant_only {
422 0 : let conf_ = conf.clone();
423 0 : let wal_service_handle = current_thread_rt
424 0 : .as_ref()
425 0 : .unwrap_or_else(|| WAL_SERVICE_RUNTIME.handle())
426 0 : .spawn(wal_service::task_main(
427 0 : conf_,
428 0 : pg_listener_tenant_only,
429 0 : Scope::Tenant,
430 0 : ))
431 0 : // wrap with task name for error reporting
432 0 : .map(|res| ("WAL service tenant only main".to_owned(), res));
433 0 : tasks_handles.push(Box::pin(wal_service_handle));
434 0 : }
435 :
436 0 : let conf_ = conf.clone();
437 0 : let http_handle = current_thread_rt
438 0 : .as_ref()
439 0 : .unwrap_or_else(|| HTTP_RUNTIME.handle())
440 0 : .spawn(http::task_main(conf_, http_listener))
441 0 : .map(|res| ("HTTP service main".to_owned(), res));
442 0 : tasks_handles.push(Box::pin(http_handle));
443 0 :
444 0 : let conf_ = conf.clone();
445 0 : let broker_task_handle = current_thread_rt
446 0 : .as_ref()
447 0 : .unwrap_or_else(|| BROKER_RUNTIME.handle())
448 0 : .spawn(broker::task_main(conf_).instrument(info_span!("broker")))
449 0 : .map(|res| ("broker main".to_owned(), res));
450 0 : tasks_handles.push(Box::pin(broker_task_handle));
451 0 :
452 0 : let conf_ = conf.clone();
453 0 : let wal_remover_handle = current_thread_rt
454 0 : .as_ref()
455 0 : .unwrap_or_else(|| WAL_REMOVER_RUNTIME.handle())
456 0 : .spawn(remove_wal::task_main(conf_))
457 0 : .map(|res| ("WAL remover".to_owned(), res));
458 0 : tasks_handles.push(Box::pin(wal_remover_handle));
459 0 :
460 0 : set_build_info_metric(GIT_VERSION, BUILD_TAG);
461 :
462 : // TODO: update tokio-stream, convert to real async Stream with
463 : // SignalStream, map it to obtain missing signal name, combine streams into
464 : // single stream we can easily sit on.
465 0 : let mut sigquit_stream = signal(SignalKind::quit())?;
466 0 : let mut sigint_stream = signal(SignalKind::interrupt())?;
467 0 : let mut sigterm_stream = signal(SignalKind::terminate())?;
468 :
469 : // Notify systemd that we are ready. This is important as currently loading
470 : // timelines takes significant time (~30s in busy regions).
471 0 : if let Err(e) = sd_notify::notify(true, &[NotifyState::Ready]) {
472 0 : warn!("systemd notify failed: {:?}", e);
473 0 : }
474 :
475 0 : tokio::select! {
476 0 : Some((task_name, res)) = tasks_handles.next()=> {
477 0 : error!("{} task failed: {:?}, exiting", task_name, res);
478 : std::process::exit(1);
479 : }
480 : // On any shutdown signal, log receival and exit. Additionally, handling
481 : // SIGQUIT prevents coredump.
482 0 : _ = sigquit_stream.recv() => info!("received SIGQUIT, terminating"),
483 0 : _ = sigint_stream.recv() => info!("received SIGINT, terminating"),
484 0 : _ = sigterm_stream.recv() => info!("received SIGTERM, terminating")
485 :
486 : };
487 0 : std::process::exit(0);
488 0 : }
489 :
490 : /// Determine safekeeper id.
491 0 : fn set_id(workdir: &Utf8Path, given_id: Option<NodeId>) -> Result<NodeId> {
492 0 : let id_file_path = workdir.join(ID_FILE_NAME);
493 0 :
494 0 : let my_id: NodeId;
495 0 : // If file with ID exists, read it in; otherwise set one passed.
496 0 : match fs::read(&id_file_path) {
497 0 : Ok(id_serialized) => {
498 0 : my_id = NodeId(
499 0 : std::str::from_utf8(&id_serialized)
500 0 : .context("failed to parse safekeeper id")?
501 0 : .parse()
502 0 : .context("failed to parse safekeeper id")?,
503 : );
504 0 : if let Some(given_id) = given_id {
505 0 : if given_id != my_id {
506 0 : bail!(
507 0 : "safekeeper already initialized with id {}, can't set {}",
508 0 : my_id,
509 0 : given_id
510 0 : );
511 0 : }
512 0 : }
513 0 : info!("safekeeper ID {}", my_id);
514 : }
515 0 : Err(error) => match error.kind() {
516 : ErrorKind::NotFound => {
517 0 : my_id = if let Some(given_id) = given_id {
518 0 : given_id
519 : } else {
520 0 : bail!("safekeeper id is not specified");
521 : };
522 0 : let mut f = File::create(&id_file_path)
523 0 : .with_context(|| format!("Failed to create id file at {id_file_path:?}"))?;
524 0 : f.write_all(my_id.to_string().as_bytes())?;
525 0 : f.sync_all()?;
526 0 : info!("initialized safekeeper id {}", my_id);
527 : }
528 : _ => {
529 0 : return Err(error.into());
530 : }
531 : },
532 : }
533 0 : Ok(my_id)
534 0 : }
535 :
536 : // Parse RemoteStorage from TOML table.
537 0 : fn parse_remote_storage(storage_conf: &str) -> anyhow::Result<RemoteStorageConfig> {
538 0 : // funny toml doesn't consider plain inline table as valid document, so wrap in a key to parse
539 0 : let storage_conf_toml = format!("remote_storage = {storage_conf}");
540 0 : let parsed_toml = storage_conf_toml.parse::<Document>()?; // parse
541 0 : let (_, storage_conf_parsed_toml) = parsed_toml.iter().next().unwrap(); // and strip key off again
542 0 : RemoteStorageConfig::from_toml(storage_conf_parsed_toml).and_then(|parsed_config| {
543 0 : // XXX: Don't print the original toml here, there might be some sensitive data
544 0 : parsed_config.context("Incorrectly parsed remote storage toml as no remote storage config")
545 0 : })
546 0 : }
547 :
548 : #[test]
549 2 : fn verify_cli() {
550 2 : use clap::CommandFactory;
551 2 : Args::command().debug_assert()
552 2 : }
|