Line data Source code
1 : //!
2 : //! `neon_local` is an executable that can be used to create a local
3 : //! Neon environment, for testing purposes. The local environment is
4 : //! quite different from the cloud environment with Kubernetes, but it
5 : //! easier to work with locally. The python tests in `test_runner`
6 : //! rely on `neon_local` to set up the environment for each test.
7 : //!
8 : use anyhow::{anyhow, bail, Context, Result};
9 : use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
10 : use compute_api::spec::ComputeMode;
11 : use control_plane::endpoint::ComputeControlPlane;
12 : use control_plane::local_env::{
13 : InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
14 : SafekeeperConf,
15 : };
16 : use control_plane::pageserver::PageServerNode;
17 : use control_plane::safekeeper::SafekeeperNode;
18 : use control_plane::storage_controller::StorageController;
19 : use control_plane::{broker, local_env};
20 : use pageserver_api::config::{
21 : DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
22 : DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
23 : };
24 : use pageserver_api::controller_api::PlacementPolicy;
25 : use pageserver_api::models::{
26 : ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
27 : };
28 : use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
29 : use postgres_backend::AuthType;
30 : use postgres_connection::parse_host_port;
31 : use safekeeper_api::{
32 : DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
33 : DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
34 : };
35 : use std::collections::{BTreeSet, HashMap};
36 : use std::path::PathBuf;
37 : use std::process::exit;
38 : use std::str::FromStr;
39 : use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
40 : use url::Host;
41 : use utils::{
42 : auth::{Claims, Scope},
43 : id::{NodeId, TenantId, TenantTimelineId, TimelineId},
44 : lsn::Lsn,
45 : project_git_version,
46 : };
47 :
48 : // Default id of a safekeeper node, if not specified on the command line.
49 : const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
50 : const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
51 : const DEFAULT_BRANCH_NAME: &str = "main";
52 : project_git_version!(GIT_VERSION);
53 :
54 : const DEFAULT_PG_VERSION: &str = "15";
55 :
56 : const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
57 :
58 : ///
59 : /// Timelines tree element used as a value in the HashMap.
60 : ///
61 : struct TimelineTreeEl {
62 : /// `TimelineInfo` received from the `pageserver` via the `timeline_list` http API call.
63 : pub info: TimelineInfo,
64 : /// Name, recovered from neon config mappings
65 : pub name: Option<String>,
66 : /// Holds all direct children of this timeline referenced using `timeline_id`.
67 : pub children: BTreeSet<TimelineId>,
68 : }
69 :
70 : // Main entry point for the 'neon_local' CLI utility
71 : //
72 : // This utility helps to manage neon installation. That includes following:
73 : // * Management of local postgres installations running on top of the
74 : // pageserver.
75 : // * Providing CLI api to the pageserver
76 : // * TODO: export/import to/from usual postgres
77 0 : fn main() -> Result<()> {
78 0 : let matches = cli().get_matches();
79 :
80 0 : let (sub_name, sub_args) = match matches.subcommand() {
81 0 : Some(subcommand_data) => subcommand_data,
82 0 : None => bail!("no subcommand provided"),
83 : };
84 :
85 : // Check for 'neon init' command first.
86 0 : let subcommand_result = if sub_name == "init" {
87 0 : handle_init(sub_args).map(Some)
88 : } else {
89 : // all other commands need an existing config
90 0 : let mut env =
91 0 : LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
92 0 : let original_env = env.clone();
93 0 :
94 0 : let rt = tokio::runtime::Builder::new_current_thread()
95 0 : .enable_all()
96 0 : .build()
97 0 : .unwrap();
98 :
99 0 : let subcommand_result = match sub_name {
100 0 : "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
101 0 : "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
102 0 : "start" => rt.block_on(handle_start_all(&env)),
103 0 : "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
104 0 : "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
105 0 : "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
106 0 : "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
107 0 : "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
108 0 : "mappings" => handle_mappings(sub_args, &mut env),
109 0 : "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
110 0 : _ => bail!("unexpected subcommand {sub_name}"),
111 : };
112 :
113 0 : if original_env != env {
114 0 : subcommand_result.map(|()| Some(env))
115 : } else {
116 0 : subcommand_result.map(|()| None)
117 : }
118 : };
119 :
120 0 : match subcommand_result {
121 0 : Ok(Some(updated_env)) => updated_env.persist_config()?,
122 0 : Ok(None) => (),
123 0 : Err(e) => {
124 0 : eprintln!("command failed: {e:?}");
125 0 : exit(1);
126 : }
127 : }
128 0 : Ok(())
129 0 : }
130 :
131 : ///
132 : /// Prints timelines list as a tree-like structure.
133 : ///
134 0 : fn print_timelines_tree(
135 0 : timelines: Vec<TimelineInfo>,
136 0 : mut timeline_name_mappings: HashMap<TenantTimelineId, String>,
137 0 : ) -> Result<()> {
138 0 : let mut timelines_hash = timelines
139 0 : .iter()
140 0 : .map(|t| {
141 0 : (
142 0 : t.timeline_id,
143 0 : TimelineTreeEl {
144 0 : info: t.clone(),
145 0 : children: BTreeSet::new(),
146 0 : name: timeline_name_mappings
147 0 : .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
148 0 : },
149 0 : )
150 0 : })
151 0 : .collect::<HashMap<_, _>>();
152 :
153 : // Memorize all direct children of each timeline.
154 0 : for timeline in timelines.iter() {
155 0 : if let Some(ancestor_timeline_id) = timeline.ancestor_timeline_id {
156 0 : timelines_hash
157 0 : .get_mut(&ancestor_timeline_id)
158 0 : .context("missing timeline info in the HashMap")?
159 : .children
160 0 : .insert(timeline.timeline_id);
161 0 : }
162 : }
163 :
164 0 : for timeline in timelines_hash.values() {
165 : // Start with root local timelines (no ancestors) first.
166 0 : if timeline.info.ancestor_timeline_id.is_none() {
167 0 : print_timeline(0, &Vec::from([true]), timeline, &timelines_hash)?;
168 0 : }
169 : }
170 :
171 0 : Ok(())
172 0 : }
173 :
174 : ///
175 : /// Recursively prints timeline info with all its children.
176 : ///
177 0 : fn print_timeline(
178 0 : nesting_level: usize,
179 0 : is_last: &[bool],
180 0 : timeline: &TimelineTreeEl,
181 0 : timelines: &HashMap<TimelineId, TimelineTreeEl>,
182 0 : ) -> Result<()> {
183 0 : if nesting_level > 0 {
184 0 : let ancestor_lsn = match timeline.info.ancestor_lsn {
185 0 : Some(lsn) => lsn.to_string(),
186 0 : None => "Unknown Lsn".to_string(),
187 : };
188 :
189 0 : let mut br_sym = "┣━";
190 0 :
191 0 : // Draw each nesting padding with proper style
192 0 : // depending on whether its timeline ended or not.
193 0 : if nesting_level > 1 {
194 0 : for l in &is_last[1..is_last.len() - 1] {
195 0 : if *l {
196 0 : print!(" ");
197 0 : } else {
198 0 : print!("┃ ");
199 0 : }
200 : }
201 0 : }
202 :
203 : // We are the last in this sub-timeline
204 0 : if *is_last.last().unwrap() {
205 0 : br_sym = "┗━";
206 0 : }
207 :
208 0 : print!("{} @{}: ", br_sym, ancestor_lsn);
209 0 : }
210 :
211 : // Finally print a timeline id and name with new line
212 0 : println!(
213 0 : "{} [{}]",
214 0 : timeline.name.as_deref().unwrap_or("_no_name_"),
215 0 : timeline.info.timeline_id
216 0 : );
217 0 :
218 0 : let len = timeline.children.len();
219 0 : let mut i: usize = 0;
220 0 : let mut is_last_new = Vec::from(is_last);
221 0 : is_last_new.push(false);
222 :
223 0 : for child in &timeline.children {
224 0 : i += 1;
225 0 :
226 0 : // Mark that the last padding is the end of the timeline
227 0 : if i == len {
228 0 : if let Some(last) = is_last_new.last_mut() {
229 0 : *last = true;
230 0 : }
231 0 : }
232 :
233 : print_timeline(
234 0 : nesting_level + 1,
235 0 : &is_last_new,
236 0 : timelines
237 0 : .get(child)
238 0 : .context("missing timeline info in the HashMap")?,
239 0 : timelines,
240 0 : )?;
241 : }
242 :
243 0 : Ok(())
244 0 : }
245 :
246 : /// Returns a map of timeline IDs to timeline_id@lsn strings.
247 : /// Connects to the pageserver to query this information.
248 0 : async fn get_timeline_infos(
249 0 : env: &local_env::LocalEnv,
250 0 : tenant_shard_id: &TenantShardId,
251 0 : ) -> Result<HashMap<TimelineId, TimelineInfo>> {
252 0 : Ok(get_default_pageserver(env)
253 0 : .timeline_list(tenant_shard_id)
254 0 : .await?
255 0 : .into_iter()
256 0 : .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
257 0 : .collect())
258 0 : }
259 :
260 : // Helper function to parse --tenant_id option, or get the default from config file
261 0 : fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<TenantId> {
262 0 : if let Some(tenant_id_from_arguments) = parse_tenant_id(sub_match).transpose() {
263 0 : tenant_id_from_arguments
264 0 : } else if let Some(default_id) = env.default_tenant_id {
265 0 : Ok(default_id)
266 : } else {
267 0 : anyhow::bail!("No tenant id. Use --tenant-id, or set a default tenant");
268 : }
269 0 : }
270 :
271 : // Helper function to parse --tenant_id option, for commands that accept a shard suffix
272 0 : fn get_tenant_shard_id(
273 0 : sub_match: &ArgMatches,
274 0 : env: &local_env::LocalEnv,
275 0 : ) -> anyhow::Result<TenantShardId> {
276 0 : if let Some(tenant_id_from_arguments) = parse_tenant_shard_id(sub_match).transpose() {
277 0 : tenant_id_from_arguments
278 0 : } else if let Some(default_id) = env.default_tenant_id {
279 0 : Ok(TenantShardId::unsharded(default_id))
280 : } else {
281 0 : anyhow::bail!("No tenant shard id. Use --tenant-id, or set a default tenant");
282 : }
283 0 : }
284 :
285 0 : fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
286 0 : sub_match
287 0 : .get_one::<String>("tenant-id")
288 0 : .map(|tenant_id| TenantId::from_str(tenant_id))
289 0 : .transpose()
290 0 : .context("Failed to parse tenant id from the argument string")
291 0 : }
292 :
293 0 : fn parse_tenant_shard_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantShardId>> {
294 0 : sub_match
295 0 : .get_one::<String>("tenant-id")
296 0 : .map(|id_str| TenantShardId::from_str(id_str))
297 0 : .transpose()
298 0 : .context("Failed to parse tenant shard id from the argument string")
299 0 : }
300 :
301 0 : fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
302 0 : sub_match
303 0 : .get_one::<String>("timeline-id")
304 0 : .map(|timeline_id| TimelineId::from_str(timeline_id))
305 0 : .transpose()
306 0 : .context("Failed to parse timeline id from the argument string")
307 0 : }
308 :
309 0 : fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
310 0 : let num_pageservers = init_match.get_one::<u16>("num-pageservers");
311 0 :
312 0 : let force = init_match.get_one("force").expect("we set a default value");
313 :
314 : // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
315 0 : let init_conf: NeonLocalInitConf = if let Some(config_path) =
316 0 : init_match.get_one::<PathBuf>("config")
317 : {
318 : // User (likely the Python test suite) provided a description of the environment.
319 0 : if num_pageservers.is_some() {
320 0 : bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
321 0 : }
322 : // load and parse the file
323 0 : let contents = std::fs::read_to_string(config_path).with_context(|| {
324 0 : format!(
325 0 : "Could not read configuration file '{}'",
326 0 : config_path.display()
327 0 : )
328 0 : })?;
329 0 : toml_edit::de::from_str(&contents)?
330 : } else {
331 : // User (likely interactive) did not provide a description of the environment, give them the default
332 0 : NeonLocalInitConf {
333 0 : control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
334 0 : broker: NeonBroker {
335 0 : listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
336 0 : },
337 0 : safekeepers: vec![SafekeeperConf {
338 0 : id: DEFAULT_SAFEKEEPER_ID,
339 0 : pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
340 0 : http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
341 0 : ..Default::default()
342 0 : }],
343 0 : pageservers: (0..num_pageservers.copied().unwrap_or(1))
344 0 : .map(|i| {
345 0 : let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
346 0 : let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
347 0 : let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
348 0 : NeonLocalInitPageserverConf {
349 0 : id: pageserver_id,
350 0 : listen_pg_addr: format!("127.0.0.1:{pg_port}"),
351 0 : listen_http_addr: format!("127.0.0.1:{http_port}"),
352 0 : pg_auth_type: AuthType::Trust,
353 0 : http_auth_type: AuthType::Trust,
354 0 : other: Default::default(),
355 0 : }
356 0 : })
357 0 : .collect(),
358 0 : pg_distrib_dir: None,
359 0 : neon_distrib_dir: None,
360 0 : default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
361 0 : storage_controller: None,
362 0 : control_plane_compute_hook_api: None,
363 0 : }
364 : };
365 :
366 0 : LocalEnv::init(init_conf, force)
367 0 : .context("materialize initial neon_local environment on disk")?;
368 0 : Ok(LocalEnv::load_config(&local_env::base_path())
369 0 : .expect("freshly written config should be loadable"))
370 0 : }
371 :
372 : /// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
373 : /// For typical interactive use, one would just run with a single pageserver. Scenarios with
374 : /// tenant/timeline placement across multiple pageservers are managed by python test code rather
375 : /// than this CLI.
376 0 : fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
377 0 : let ps_conf = env
378 0 : .pageservers
379 0 : .first()
380 0 : .expect("Config is validated to contain at least one pageserver");
381 0 : PageServerNode::from_env(env, ps_conf)
382 0 : }
383 :
384 0 : async fn handle_tenant(
385 0 : tenant_match: &ArgMatches,
386 0 : env: &mut local_env::LocalEnv,
387 0 : ) -> anyhow::Result<()> {
388 0 : let pageserver = get_default_pageserver(env);
389 0 : match tenant_match.subcommand() {
390 0 : Some(("list", _)) => {
391 0 : for t in pageserver.tenant_list().await? {
392 0 : println!("{} {:?}", t.id, t.state);
393 0 : }
394 : }
395 0 : Some(("import", import_match)) => {
396 0 : let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
397 0 :
398 0 : let storage_controller = StorageController::from_env(env);
399 0 : let create_response = storage_controller.tenant_import(tenant_id).await?;
400 :
401 0 : let shard_zero = create_response
402 0 : .shards
403 0 : .first()
404 0 : .expect("Import response omitted shards");
405 0 :
406 0 : let attached_pageserver_id = shard_zero.node_id;
407 0 : let pageserver =
408 0 : PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);
409 :
410 0 : println!(
411 0 : "Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}"
412 0 : );
413 :
414 0 : let timelines = pageserver
415 0 : .http_client
416 0 : .list_timelines(shard_zero.shard_id)
417 0 : .await?;
418 :
419 : // Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names
420 0 : let main_timeline = timelines
421 0 : .iter()
422 0 : .find(|t| t.ancestor_timeline_id.is_none())
423 0 : .expect("No timelines found")
424 0 : .timeline_id;
425 0 :
426 0 : let mut branch_i = 0;
427 0 : for timeline in timelines.iter() {
428 0 : let branch_name = if timeline.timeline_id == main_timeline {
429 0 : "main".to_string()
430 : } else {
431 0 : branch_i += 1;
432 0 : format!("branch_{branch_i}")
433 : };
434 :
435 0 : println!(
436 0 : "Importing timeline {tenant_id}/{} as branch {branch_name}",
437 0 : timeline.timeline_id
438 0 : );
439 0 :
440 0 : env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
441 : }
442 : }
443 0 : Some(("create", create_match)) => {
444 0 : let tenant_conf: HashMap<_, _> = create_match
445 0 : .get_many::<String>("config")
446 0 : .map(|vals: clap::parser::ValuesRef<'_, String>| {
447 0 : vals.flat_map(|c| c.split_once(':')).collect()
448 0 : })
449 0 : .unwrap_or_default();
450 0 :
451 0 : let shard_count: u8 = create_match
452 0 : .get_one::<u8>("shard-count")
453 0 : .cloned()
454 0 : .unwrap_or(0);
455 0 :
456 0 : let shard_stripe_size: Option<u32> =
457 0 : create_match.get_one::<u32>("shard-stripe-size").cloned();
458 :
459 0 : let placement_policy = match create_match.get_one::<String>("placement-policy") {
460 0 : Some(s) if !s.is_empty() => serde_json::from_str::<PlacementPolicy>(s)?,
461 0 : _ => PlacementPolicy::Attached(0),
462 : };
463 :
464 0 : let tenant_conf = PageServerNode::parse_config(tenant_conf)?;
465 :
466 : // If tenant ID was not specified, generate one
467 0 : let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
468 0 :
469 0 : // We must register the tenant with the storage controller, so
470 0 : // that when the pageserver restarts, it will be re-attached.
471 0 : let storage_controller = StorageController::from_env(env);
472 0 : storage_controller
473 0 : .tenant_create(TenantCreateRequest {
474 0 : // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
475 0 : // storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
476 0 : // type is used both in storage controller (for creating tenants) and in pageserver (for creating shards)
477 0 : new_tenant_id: TenantShardId::unsharded(tenant_id),
478 0 : generation: None,
479 0 : shard_parameters: ShardParameters {
480 0 : count: ShardCount::new(shard_count),
481 0 : stripe_size: shard_stripe_size
482 0 : .map(ShardStripeSize)
483 0 : .unwrap_or(ShardParameters::DEFAULT_STRIPE_SIZE),
484 0 : },
485 0 : placement_policy: Some(placement_policy),
486 0 : config: tenant_conf,
487 0 : })
488 0 : .await?;
489 0 : println!("tenant {tenant_id} successfully created on the pageserver");
490 :
491 : // Create an initial timeline for the new tenant
492 0 : let new_timeline_id =
493 0 : parse_timeline_id(create_match)?.unwrap_or(TimelineId::generate());
494 0 : let pg_version = create_match
495 0 : .get_one::<u32>("pg-version")
496 0 : .copied()
497 0 : .context("Failed to parse postgres version from the argument string")?;
498 :
499 : // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
500 : // different shards picking different start lsns. Maybe we have to teach storage controller
501 : // to let shard 0 branch first and then propagate the chosen LSN to other shards.
502 0 : storage_controller
503 0 : .tenant_timeline_create(
504 0 : tenant_id,
505 0 : TimelineCreateRequest {
506 0 : new_timeline_id,
507 0 : ancestor_timeline_id: None,
508 0 : ancestor_start_lsn: None,
509 0 : existing_initdb_timeline_id: None,
510 0 : pg_version: Some(pg_version),
511 0 : },
512 0 : )
513 0 : .await?;
514 :
515 0 : env.register_branch_mapping(
516 0 : DEFAULT_BRANCH_NAME.to_string(),
517 0 : tenant_id,
518 0 : new_timeline_id,
519 0 : )?;
520 :
521 0 : println!("Created an initial timeline '{new_timeline_id}' for tenant: {tenant_id}",);
522 0 :
523 0 : if create_match.get_flag("set-default") {
524 0 : println!("Setting tenant {tenant_id} as a default one");
525 0 : env.default_tenant_id = Some(tenant_id);
526 0 : }
527 : }
528 0 : Some(("set-default", set_default_match)) => {
529 0 : let tenant_id =
530 0 : parse_tenant_id(set_default_match)?.context("No tenant id specified")?;
531 0 : println!("Setting tenant {tenant_id} as a default one");
532 0 : env.default_tenant_id = Some(tenant_id);
533 : }
534 0 : Some(("config", create_match)) => {
535 0 : let tenant_id = get_tenant_id(create_match, env)?;
536 0 : let tenant_conf: HashMap<_, _> = create_match
537 0 : .get_many::<String>("config")
538 0 : .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
539 0 : .unwrap_or_default();
540 0 :
541 0 : pageserver
542 0 : .tenant_config(tenant_id, tenant_conf)
543 0 : .await
544 0 : .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
545 0 : println!("tenant {tenant_id} successfully configured on the pageserver");
546 : }
547 :
548 0 : Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
549 0 : None => bail!("no tenant subcommand provided"),
550 : }
551 0 : Ok(())
552 0 : }
553 :
554 0 : async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
555 0 : let pageserver = get_default_pageserver(env);
556 0 :
557 0 : match timeline_match.subcommand() {
558 0 : Some(("list", list_match)) => {
559 : // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
560 : // where shard 0 is attached, and query there.
561 0 : let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
562 0 : let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
563 0 : print_timelines_tree(timelines, env.timeline_name_mappings())?;
564 : }
565 0 : Some(("create", create_match)) => {
566 0 : let tenant_id = get_tenant_id(create_match, env)?;
567 0 : let new_branch_name = create_match
568 0 : .get_one::<String>("branch-name")
569 0 : .ok_or_else(|| anyhow!("No branch name provided"))?;
570 :
571 0 : let pg_version = create_match
572 0 : .get_one::<u32>("pg-version")
573 0 : .copied()
574 0 : .context("Failed to parse postgres version from the argument string")?;
575 :
576 0 : let new_timeline_id_opt = parse_timeline_id(create_match)?;
577 0 : let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
578 0 :
579 0 : let storage_controller = StorageController::from_env(env);
580 0 : let create_req = TimelineCreateRequest {
581 0 : new_timeline_id,
582 0 : ancestor_timeline_id: None,
583 0 : existing_initdb_timeline_id: None,
584 0 : ancestor_start_lsn: None,
585 0 : pg_version: Some(pg_version),
586 0 : };
587 0 : let timeline_info = storage_controller
588 0 : .tenant_timeline_create(tenant_id, create_req)
589 0 : .await?;
590 :
591 0 : let last_record_lsn = timeline_info.last_record_lsn;
592 0 : env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
593 :
594 0 : println!(
595 0 : "Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}",
596 0 : timeline_info.timeline_id
597 0 : );
598 : }
599 0 : Some(("import", import_match)) => {
600 0 : let tenant_id = get_tenant_id(import_match, env)?;
601 0 : let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
602 0 : let name = import_match
603 0 : .get_one::<String>("node-name")
604 0 : .ok_or_else(|| anyhow!("No node name provided"))?;
605 0 : let update_catalog = import_match
606 0 : .get_one::<bool>("update-catalog")
607 0 : .cloned()
608 0 : .unwrap_or_default();
609 :
610 : // Parse base inputs
611 0 : let base_tarfile = import_match
612 0 : .get_one::<PathBuf>("base-tarfile")
613 0 : .ok_or_else(|| anyhow!("No base-tarfile provided"))?
614 0 : .to_owned();
615 0 : let base_lsn = Lsn::from_str(
616 0 : import_match
617 0 : .get_one::<String>("base-lsn")
618 0 : .ok_or_else(|| anyhow!("No base-lsn provided"))?,
619 0 : )?;
620 0 : let base = (base_lsn, base_tarfile);
621 0 :
622 0 : // Parse pg_wal inputs
623 0 : let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
624 0 : let end_lsn = import_match
625 0 : .get_one::<String>("end-lsn")
626 0 : .map(|s| Lsn::from_str(s).unwrap());
627 0 : // TODO validate both or none are provided
628 0 : let pg_wal = end_lsn.zip(wal_tarfile);
629 :
630 0 : let pg_version = import_match
631 0 : .get_one::<u32>("pg-version")
632 0 : .copied()
633 0 : .context("Failed to parse postgres version from the argument string")?;
634 :
635 0 : let mut cplane = ComputeControlPlane::load(env.clone())?;
636 0 : println!("Importing timeline into pageserver ...");
637 0 : pageserver
638 0 : .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
639 0 : .await?;
640 0 : env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
641 :
642 0 : println!("Creating endpoint for imported timeline ...");
643 0 : cplane.new_endpoint(
644 0 : name,
645 0 : tenant_id,
646 0 : timeline_id,
647 0 : None,
648 0 : None,
649 0 : pg_version,
650 0 : ComputeMode::Primary,
651 0 : !update_catalog,
652 0 : )?;
653 0 : println!("Done");
654 : }
655 0 : Some(("branch", branch_match)) => {
656 0 : let tenant_id = get_tenant_id(branch_match, env)?;
657 0 : let new_branch_name = branch_match
658 0 : .get_one::<String>("branch-name")
659 0 : .ok_or_else(|| anyhow!("No branch name provided"))?;
660 0 : let ancestor_branch_name = branch_match
661 0 : .get_one::<String>("ancestor-branch-name")
662 0 : .map(|s| s.as_str())
663 0 : .unwrap_or(DEFAULT_BRANCH_NAME);
664 0 : let ancestor_timeline_id = env
665 0 : .get_branch_timeline_id(ancestor_branch_name, tenant_id)
666 0 : .ok_or_else(|| {
667 0 : anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
668 0 : })?;
669 :
670 0 : let start_lsn = branch_match
671 0 : .get_one::<String>("ancestor-start-lsn")
672 0 : .map(|lsn_str| Lsn::from_str(lsn_str))
673 0 : .transpose()
674 0 : .context("Failed to parse ancestor start Lsn from the request")?;
675 0 : let new_timeline_id = TimelineId::generate();
676 0 : let storage_controller = StorageController::from_env(env);
677 0 : let create_req = TimelineCreateRequest {
678 0 : new_timeline_id,
679 0 : ancestor_timeline_id: Some(ancestor_timeline_id),
680 0 : existing_initdb_timeline_id: None,
681 0 : ancestor_start_lsn: start_lsn,
682 0 : pg_version: None,
683 0 : };
684 0 : let timeline_info = storage_controller
685 0 : .tenant_timeline_create(tenant_id, create_req)
686 0 : .await?;
687 :
688 0 : let last_record_lsn = timeline_info.last_record_lsn;
689 0 :
690 0 : env.register_branch_mapping(new_branch_name.to_string(), tenant_id, new_timeline_id)?;
691 :
692 0 : println!(
693 0 : "Created timeline '{}' at Lsn {last_record_lsn} for tenant: {tenant_id}. Ancestor timeline: '{ancestor_branch_name}'",
694 0 : timeline_info.timeline_id
695 0 : );
696 : }
697 0 : Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{sub_name}'"),
698 0 : None => bail!("no tenant subcommand provided"),
699 : }
700 :
701 0 : Ok(())
702 0 : }
703 :
704 0 : async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
705 0 : let (sub_name, sub_args) = match ep_match.subcommand() {
706 0 : Some(ep_subcommand_data) => ep_subcommand_data,
707 0 : None => bail!("no endpoint subcommand provided"),
708 : };
709 0 : let mut cplane = ComputeControlPlane::load(env.clone())?;
710 :
711 0 : match sub_name {
712 0 : "list" => {
713 : // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
714 : // where shard 0 is attached, and query there.
715 0 : let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
716 0 : let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
717 0 : .await
718 0 : .unwrap_or_else(|e| {
719 0 : eprintln!("Failed to load timeline info: {}", e);
720 0 : HashMap::new()
721 0 : });
722 0 :
723 0 : let timeline_name_mappings = env.timeline_name_mappings();
724 0 :
725 0 : let mut table = comfy_table::Table::new();
726 0 :
727 0 : table.load_preset(comfy_table::presets::NOTHING);
728 0 :
729 0 : table.set_header([
730 0 : "ENDPOINT",
731 0 : "ADDRESS",
732 0 : "TIMELINE",
733 0 : "BRANCH NAME",
734 0 : "LSN",
735 0 : "STATUS",
736 0 : ]);
737 :
738 0 : for (endpoint_id, endpoint) in cplane
739 0 : .endpoints
740 0 : .iter()
741 0 : .filter(|(_, endpoint)| endpoint.tenant_id == tenant_shard_id.tenant_id)
742 0 : {
743 0 : let lsn_str = match endpoint.mode {
744 0 : ComputeMode::Static(lsn) => {
745 0 : // -> read-only endpoint
746 0 : // Use the node's LSN.
747 0 : lsn.to_string()
748 : }
749 : _ => {
750 : // -> primary endpoint or hot replica
751 : // Use the LSN at the end of the timeline.
752 0 : timeline_infos
753 0 : .get(&endpoint.timeline_id)
754 0 : .map(|bi| bi.last_record_lsn.to_string())
755 0 : .unwrap_or_else(|| "?".to_string())
756 : }
757 : };
758 :
759 0 : let branch_name = timeline_name_mappings
760 0 : .get(&TenantTimelineId::new(
761 0 : tenant_shard_id.tenant_id,
762 0 : endpoint.timeline_id,
763 0 : ))
764 0 : .map(|name| name.as_str())
765 0 : .unwrap_or("?");
766 0 :
767 0 : table.add_row([
768 0 : endpoint_id.as_str(),
769 0 : &endpoint.pg_address.to_string(),
770 0 : &endpoint.timeline_id.to_string(),
771 0 : branch_name,
772 0 : lsn_str.as_str(),
773 0 : &format!("{}", endpoint.status()),
774 0 : ]);
775 : }
776 :
777 0 : println!("{table}");
778 : }
779 0 : "create" => {
780 0 : let tenant_id = get_tenant_id(sub_args, env)?;
781 0 : let branch_name = sub_args
782 0 : .get_one::<String>("branch-name")
783 0 : .map(|s| s.as_str())
784 0 : .unwrap_or(DEFAULT_BRANCH_NAME);
785 0 : let endpoint_id = sub_args
786 0 : .get_one::<String>("endpoint_id")
787 0 : .map(String::to_string)
788 0 : .unwrap_or_else(|| format!("ep-{branch_name}"));
789 0 : let update_catalog = sub_args
790 0 : .get_one::<bool>("update-catalog")
791 0 : .cloned()
792 0 : .unwrap_or_default();
793 :
794 0 : let lsn = sub_args
795 0 : .get_one::<String>("lsn")
796 0 : .map(|lsn_str| Lsn::from_str(lsn_str))
797 0 : .transpose()
798 0 : .context("Failed to parse Lsn from the request")?;
799 0 : let timeline_id = env
800 0 : .get_branch_timeline_id(branch_name, tenant_id)
801 0 : .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
802 :
803 0 : let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
804 0 : let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
805 0 : let pg_version = sub_args
806 0 : .get_one::<u32>("pg-version")
807 0 : .copied()
808 0 : .context("Failed to parse postgres version from the argument string")?;
809 :
810 0 : let hot_standby = sub_args
811 0 : .get_one::<bool>("hot-standby")
812 0 : .copied()
813 0 : .unwrap_or(false);
814 0 :
815 0 : let allow_multiple = sub_args.get_flag("allow-multiple");
816 :
817 0 : let mode = match (lsn, hot_standby) {
818 0 : (Some(lsn), false) => ComputeMode::Static(lsn),
819 0 : (None, true) => ComputeMode::Replica,
820 0 : (None, false) => ComputeMode::Primary,
821 0 : (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
822 : };
823 :
824 0 : match (mode, hot_standby) {
825 : (ComputeMode::Static(_), true) => {
826 0 : bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
827 : }
828 : (ComputeMode::Primary, true) => {
829 0 : bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
830 : }
831 0 : _ => {}
832 0 : }
833 0 :
834 0 : if !allow_multiple {
835 0 : cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
836 0 : }
837 :
838 0 : cplane.new_endpoint(
839 0 : &endpoint_id,
840 0 : tenant_id,
841 0 : timeline_id,
842 0 : pg_port,
843 0 : http_port,
844 0 : pg_version,
845 0 : mode,
846 0 : !update_catalog,
847 0 : )?;
848 : }
849 0 : "start" => {
850 0 : let endpoint_id = sub_args
851 0 : .get_one::<String>("endpoint_id")
852 0 : .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
853 :
854 0 : let pageserver_id =
855 0 : if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
856 : Some(NodeId(
857 0 : id_str.parse().context("while parsing pageserver id")?,
858 : ))
859 : } else {
860 0 : None
861 : };
862 :
863 0 : let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
864 0 :
865 0 : let allow_multiple = sub_args.get_flag("allow-multiple");
866 :
867 : // If --safekeepers argument is given, use only the listed safekeeper nodes.
868 0 : let safekeepers =
869 0 : if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
870 0 : let mut safekeepers: Vec<NodeId> = Vec::new();
871 0 : for sk_id in safekeepers_str.split(',').map(str::trim) {
872 0 : let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
873 0 : anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
874 0 : })?);
875 0 : safekeepers.push(sk_id);
876 : }
877 0 : safekeepers
878 : } else {
879 0 : env.safekeepers.iter().map(|sk| sk.id).collect()
880 : };
881 :
882 0 : let endpoint = cplane
883 0 : .endpoints
884 0 : .get(endpoint_id.as_str())
885 0 : .ok_or_else(|| anyhow::anyhow!("endpoint {endpoint_id} not found"))?;
886 :
887 0 : let create_test_user = sub_args
888 0 : .get_one::<bool>("create-test-user")
889 0 : .cloned()
890 0 : .unwrap_or_default();
891 0 :
892 0 : if !allow_multiple {
893 0 : cplane.check_conflicting_endpoints(
894 0 : endpoint.mode,
895 0 : endpoint.tenant_id,
896 0 : endpoint.timeline_id,
897 0 : )?;
898 0 : }
899 :
900 0 : let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
901 0 : let conf = env.get_pageserver_conf(pageserver_id).unwrap();
902 0 : let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
903 0 : (
904 0 : vec![(parsed.0, parsed.1.unwrap_or(5432))],
905 0 : // If caller is telling us what pageserver to use, this is not a tenant which is
906 0 : // full managed by storage controller, therefore not sharded.
907 0 : ShardParameters::DEFAULT_STRIPE_SIZE,
908 0 : )
909 : } else {
910 : // Look up the currently attached location of the tenant, and its striping metadata,
911 : // to pass these on to postgres.
912 0 : let storage_controller = StorageController::from_env(env);
913 0 : let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
914 0 : let pageservers = locate_result
915 0 : .shards
916 0 : .into_iter()
917 0 : .map(|shard| {
918 0 : (
919 0 : Host::parse(&shard.listen_pg_addr)
920 0 : .expect("Storage controller reported bad hostname"),
921 0 : shard.listen_pg_port,
922 0 : )
923 0 : })
924 0 : .collect::<Vec<_>>();
925 0 : let stripe_size = locate_result.shard_params.stripe_size;
926 0 :
927 0 : (pageservers, stripe_size)
928 : };
929 0 : assert!(!pageservers.is_empty());
930 :
931 0 : let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
932 0 : let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
933 0 : let claims = Claims::new(Some(endpoint.tenant_id), Scope::Tenant);
934 0 :
935 0 : Some(env.generate_auth_token(&claims)?)
936 : } else {
937 0 : None
938 : };
939 :
940 0 : println!("Starting existing endpoint {endpoint_id}...");
941 0 : endpoint
942 0 : .start(
943 0 : &auth_token,
944 0 : safekeepers,
945 0 : pageservers,
946 0 : remote_ext_config,
947 0 : stripe_size.0 as usize,
948 0 : create_test_user,
949 0 : )
950 0 : .await?;
951 : }
952 0 : "reconfigure" => {
953 0 : let endpoint_id = sub_args
954 0 : .get_one::<String>("endpoint_id")
955 0 : .ok_or_else(|| anyhow!("No endpoint ID provided to reconfigure"))?;
956 0 : let endpoint = cplane
957 0 : .endpoints
958 0 : .get(endpoint_id.as_str())
959 0 : .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
960 0 : let pageservers =
961 0 : if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
962 0 : let ps_id = NodeId(id_str.parse().context("while parsing pageserver id")?);
963 0 : let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
964 0 : vec![(
965 0 : pageserver.pg_connection_config.host().clone(),
966 0 : pageserver.pg_connection_config.port(),
967 0 : )]
968 : } else {
969 0 : let storage_controller = StorageController::from_env(env);
970 0 : storage_controller
971 0 : .tenant_locate(endpoint.tenant_id)
972 0 : .await?
973 : .shards
974 0 : .into_iter()
975 0 : .map(|shard| {
976 0 : (
977 0 : Host::parse(&shard.listen_pg_addr)
978 0 : .expect("Storage controller reported malformed host"),
979 0 : shard.listen_pg_port,
980 0 : )
981 0 : })
982 0 : .collect::<Vec<_>>()
983 : };
984 0 : endpoint.reconfigure(pageservers, None).await?;
985 : }
986 0 : "stop" => {
987 0 : let endpoint_id = sub_args
988 0 : .get_one::<String>("endpoint_id")
989 0 : .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?;
990 0 : let destroy = sub_args.get_flag("destroy");
991 0 : let mode = sub_args.get_one::<String>("mode").expect("has a default");
992 :
993 0 : let endpoint = cplane
994 0 : .endpoints
995 0 : .get(endpoint_id.as_str())
996 0 : .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
997 0 : endpoint.stop(mode, destroy)?;
998 : }
999 :
1000 0 : _ => bail!("Unexpected endpoint subcommand '{sub_name}'"),
1001 : }
1002 :
1003 0 : Ok(())
1004 0 : }
1005 :
1006 0 : fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
1007 0 : let (sub_name, sub_args) = match sub_match.subcommand() {
1008 0 : Some(ep_subcommand_data) => ep_subcommand_data,
1009 0 : None => bail!("no mappings subcommand provided"),
1010 : };
1011 :
1012 0 : match sub_name {
1013 0 : "map" => {
1014 0 : let branch_name = sub_args
1015 0 : .get_one::<String>("branch-name")
1016 0 : .expect("branch-name argument missing");
1017 0 :
1018 0 : let tenant_id = sub_args
1019 0 : .get_one::<String>("tenant-id")
1020 0 : .map(|x| TenantId::from_str(x))
1021 0 : .expect("tenant-id argument missing")
1022 0 : .expect("malformed tenant-id arg");
1023 0 :
1024 0 : let timeline_id = sub_args
1025 0 : .get_one::<String>("timeline-id")
1026 0 : .map(|x| TimelineId::from_str(x))
1027 0 : .expect("timeline-id argument missing")
1028 0 : .expect("malformed timeline-id arg");
1029 0 :
1030 0 : env.register_branch_mapping(branch_name.to_owned(), tenant_id, timeline_id)?;
1031 :
1032 0 : Ok(())
1033 : }
1034 0 : other => unimplemented!("mappings subcommand {other}"),
1035 : }
1036 0 : }
1037 :
1038 0 : fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageServerNode> {
1039 0 : let node_id = if let Some(id_str) = args.get_one::<String>("pageserver-id") {
1040 0 : NodeId(id_str.parse().context("while parsing pageserver id")?)
1041 : } else {
1042 0 : DEFAULT_PAGESERVER_ID
1043 : };
1044 :
1045 : Ok(PageServerNode::from_env(
1046 0 : env,
1047 0 : env.get_pageserver_conf(node_id)?,
1048 : ))
1049 0 : }
1050 :
1051 0 : async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
1052 0 : match sub_match.subcommand() {
1053 0 : Some(("start", subcommand_args)) => {
1054 0 : if let Err(e) = get_pageserver(env, subcommand_args)?.start().await {
1055 0 : eprintln!("pageserver start failed: {e}");
1056 0 : exit(1);
1057 0 : }
1058 : }
1059 :
1060 0 : Some(("stop", subcommand_args)) => {
1061 0 : let immediate = subcommand_args
1062 0 : .get_one::<String>("stop-mode")
1063 0 : .map(|s| s.as_str())
1064 0 : == Some("immediate");
1065 :
1066 0 : if let Err(e) = get_pageserver(env, subcommand_args)?.stop(immediate) {
1067 0 : eprintln!("pageserver stop failed: {}", e);
1068 0 : exit(1);
1069 0 : }
1070 : }
1071 :
1072 0 : Some(("restart", subcommand_args)) => {
1073 0 : let pageserver = get_pageserver(env, subcommand_args)?;
1074 : //TODO what shutdown strategy should we use here?
1075 0 : if let Err(e) = pageserver.stop(false) {
1076 0 : eprintln!("pageserver stop failed: {}", e);
1077 0 : exit(1);
1078 0 : }
1079 :
1080 0 : if let Err(e) = pageserver.start().await {
1081 0 : eprintln!("pageserver start failed: {e}");
1082 0 : exit(1);
1083 0 : }
1084 : }
1085 :
1086 0 : Some(("status", subcommand_args)) => {
1087 0 : match get_pageserver(env, subcommand_args)?.check_status().await {
1088 0 : Ok(_) => println!("Page server is up and running"),
1089 0 : Err(err) => {
1090 0 : eprintln!("Page server is not available: {}", err);
1091 0 : exit(1);
1092 : }
1093 : }
1094 : }
1095 :
1096 0 : Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
1097 0 : None => bail!("no pageserver subcommand provided"),
1098 : }
1099 0 : Ok(())
1100 0 : }
1101 :
1102 0 : async fn handle_storage_controller(
1103 0 : sub_match: &ArgMatches,
1104 0 : env: &local_env::LocalEnv,
1105 0 : ) -> Result<()> {
1106 0 : let svc = StorageController::from_env(env);
1107 0 : match sub_match.subcommand() {
1108 0 : Some(("start", _start_match)) => {
1109 0 : if let Err(e) = svc.start().await {
1110 0 : eprintln!("start failed: {e}");
1111 0 : exit(1);
1112 0 : }
1113 : }
1114 :
1115 0 : Some(("stop", stop_match)) => {
1116 0 : let immediate = stop_match
1117 0 : .get_one::<String>("stop-mode")
1118 0 : .map(|s| s.as_str())
1119 0 : == Some("immediate");
1120 :
1121 0 : if let Err(e) = svc.stop(immediate).await {
1122 0 : eprintln!("stop failed: {}", e);
1123 0 : exit(1);
1124 0 : }
1125 : }
1126 0 : Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name),
1127 0 : None => bail!("no storage_controller subcommand provided"),
1128 : }
1129 0 : Ok(())
1130 0 : }
1131 :
1132 0 : fn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNode> {
1133 0 : if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) {
1134 0 : Ok(SafekeeperNode::from_env(env, node))
1135 : } else {
1136 0 : bail!("could not find safekeeper {id}")
1137 : }
1138 0 : }
1139 :
1140 : // Get list of options to append to safekeeper command invocation.
1141 0 : fn safekeeper_extra_opts(init_match: &ArgMatches) -> Vec<String> {
1142 0 : init_match
1143 0 : .get_many::<String>("safekeeper-extra-opt")
1144 0 : .into_iter()
1145 0 : .flatten()
1146 0 : .map(|s| s.to_owned())
1147 0 : .collect()
1148 0 : }
1149 :
1150 0 : async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
1151 0 : let (sub_name, sub_args) = match sub_match.subcommand() {
1152 0 : Some(safekeeper_command_data) => safekeeper_command_data,
1153 0 : None => bail!("no safekeeper subcommand provided"),
1154 : };
1155 :
1156 : // All the commands take an optional safekeeper name argument
1157 0 : let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
1158 0 : NodeId(id_str.parse().context("while parsing safekeeper id")?)
1159 : } else {
1160 0 : DEFAULT_SAFEKEEPER_ID
1161 : };
1162 0 : let safekeeper = get_safekeeper(env, sk_id)?;
1163 :
1164 0 : match sub_name {
1165 0 : "start" => {
1166 0 : let extra_opts = safekeeper_extra_opts(sub_args);
1167 :
1168 0 : if let Err(e) = safekeeper.start(extra_opts).await {
1169 0 : eprintln!("safekeeper start failed: {}", e);
1170 0 : exit(1);
1171 0 : }
1172 : }
1173 :
1174 0 : "stop" => {
1175 0 : let immediate =
1176 0 : sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
1177 :
1178 0 : if let Err(e) = safekeeper.stop(immediate) {
1179 0 : eprintln!("safekeeper stop failed: {}", e);
1180 0 : exit(1);
1181 0 : }
1182 : }
1183 :
1184 0 : "restart" => {
1185 0 : let immediate =
1186 0 : sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
1187 :
1188 0 : if let Err(e) = safekeeper.stop(immediate) {
1189 0 : eprintln!("safekeeper stop failed: {}", e);
1190 0 : exit(1);
1191 0 : }
1192 0 :
1193 0 : let extra_opts = safekeeper_extra_opts(sub_args);
1194 0 : if let Err(e) = safekeeper.start(extra_opts).await {
1195 0 : eprintln!("safekeeper start failed: {}", e);
1196 0 : exit(1);
1197 0 : }
1198 : }
1199 :
1200 : _ => {
1201 0 : bail!("Unexpected safekeeper subcommand '{}'", sub_name)
1202 : }
1203 : }
1204 0 : Ok(())
1205 0 : }
1206 :
1207 0 : async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {
1208 0 : // Endpoints are not started automatically
1209 0 :
1210 0 : broker::start_broker_process(env).await?;
1211 :
1212 : // Only start the storage controller if the pageserver is configured to need it
1213 0 : if env.control_plane_api.is_some() {
1214 0 : let storage_controller = StorageController::from_env(env);
1215 0 : if let Err(e) = storage_controller.start().await {
1216 0 : eprintln!("storage_controller start failed: {:#}", e);
1217 0 : try_stop_all(env, true).await;
1218 0 : exit(1);
1219 0 : }
1220 0 : }
1221 :
1222 0 : for ps_conf in &env.pageservers {
1223 0 : let pageserver = PageServerNode::from_env(env, ps_conf);
1224 0 : if let Err(e) = pageserver.start().await {
1225 0 : eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
1226 0 : try_stop_all(env, true).await;
1227 0 : exit(1);
1228 0 : }
1229 : }
1230 :
1231 0 : for node in env.safekeepers.iter() {
1232 0 : let safekeeper = SafekeeperNode::from_env(env, node);
1233 0 : if let Err(e) = safekeeper.start(vec![]).await {
1234 0 : eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
1235 0 : try_stop_all(env, false).await;
1236 0 : exit(1);
1237 0 : }
1238 : }
1239 0 : Ok(())
1240 0 : }
1241 :
1242 0 : async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
1243 0 : let immediate =
1244 0 : sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
1245 0 :
1246 0 : try_stop_all(env, immediate).await;
1247 :
1248 0 : Ok(())
1249 0 : }
1250 :
1251 0 : async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
1252 0 : // Stop all endpoints
1253 0 : match ComputeControlPlane::load(env.clone()) {
1254 0 : Ok(cplane) => {
1255 0 : for (_k, node) in cplane.endpoints {
1256 0 : if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
1257 0 : eprintln!("postgres stop failed: {e:#}");
1258 0 : }
1259 : }
1260 : }
1261 0 : Err(e) => {
1262 0 : eprintln!("postgres stop failed, could not restore control plane data from env: {e:#}")
1263 : }
1264 : }
1265 :
1266 0 : for ps_conf in &env.pageservers {
1267 0 : let pageserver = PageServerNode::from_env(env, ps_conf);
1268 0 : if let Err(e) = pageserver.stop(immediate) {
1269 0 : eprintln!("pageserver {} stop failed: {:#}", ps_conf.id, e);
1270 0 : }
1271 : }
1272 :
1273 0 : for node in env.safekeepers.iter() {
1274 0 : let safekeeper = SafekeeperNode::from_env(env, node);
1275 0 : if let Err(e) = safekeeper.stop(immediate) {
1276 0 : eprintln!("safekeeper {} stop failed: {:#}", safekeeper.id, e);
1277 0 : }
1278 : }
1279 :
1280 0 : if let Err(e) = broker::stop_broker_process(env) {
1281 0 : eprintln!("neon broker stop failed: {e:#}");
1282 0 : }
1283 :
1284 0 : if env.control_plane_api.is_some() {
1285 0 : let storage_controller = StorageController::from_env(env);
1286 0 : if let Err(e) = storage_controller.stop(immediate).await {
1287 0 : eprintln!("storage controller stop failed: {e:#}");
1288 0 : }
1289 0 : }
1290 0 : }
1291 :
1292 2 : fn cli() -> Command {
1293 2 : let branch_name_arg = Arg::new("branch-name")
1294 2 : .long("branch-name")
1295 2 : .help("Name of the branch to be created or used as an alias for other services")
1296 2 : .required(false);
1297 2 :
1298 2 : let endpoint_id_arg = Arg::new("endpoint_id")
1299 2 : .help("Postgres endpoint id")
1300 2 : .required(false);
1301 2 :
1302 2 : let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
1303 2 :
1304 2 : // --id, when using a pageserver command
1305 2 : let pageserver_id_arg = Arg::new("pageserver-id")
1306 2 : .long("id")
1307 2 : .global(true)
1308 2 : .help("pageserver id")
1309 2 : .required(false);
1310 2 : // --pageserver-id when using a non-pageserver command
1311 2 : let endpoint_pageserver_id_arg = Arg::new("endpoint-pageserver-id")
1312 2 : .long("pageserver-id")
1313 2 : .required(false);
1314 2 :
1315 2 : let safekeeper_extra_opt_arg = Arg::new("safekeeper-extra-opt")
1316 2 : .short('e')
1317 2 : .long("safekeeper-extra-opt")
1318 2 : .num_args(1)
1319 2 : .action(ArgAction::Append)
1320 2 : .help("Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo")
1321 2 : .required(false);
1322 2 :
1323 2 : let tenant_id_arg = Arg::new("tenant-id")
1324 2 : .long("tenant-id")
1325 2 : .help("Tenant id. Represented as a hexadecimal string 32 symbols length")
1326 2 : .required(false);
1327 2 :
1328 2 : let timeline_id_arg = Arg::new("timeline-id")
1329 2 : .long("timeline-id")
1330 2 : .help("Timeline id. Represented as a hexadecimal string 32 symbols length")
1331 2 : .required(false);
1332 2 :
1333 2 : let pg_version_arg = Arg::new("pg-version")
1334 2 : .long("pg-version")
1335 2 : .help("Postgres version to use for the initial tenant")
1336 2 : .required(false)
1337 2 : .value_parser(value_parser!(u32))
1338 2 : .default_value(DEFAULT_PG_VERSION);
1339 2 :
1340 2 : let pg_port_arg = Arg::new("pg-port")
1341 2 : .long("pg-port")
1342 2 : .required(false)
1343 2 : .value_parser(value_parser!(u16))
1344 2 : .value_name("pg-port");
1345 2 :
1346 2 : let http_port_arg = Arg::new("http-port")
1347 2 : .long("http-port")
1348 2 : .required(false)
1349 2 : .value_parser(value_parser!(u16))
1350 2 : .value_name("http-port");
1351 2 :
1352 2 : let safekeepers_arg = Arg::new("safekeepers")
1353 2 : .long("safekeepers")
1354 2 : .required(false)
1355 2 : .value_name("safekeepers");
1356 2 :
1357 2 : let stop_mode_arg = Arg::new("stop-mode")
1358 2 : .short('m')
1359 2 : .value_parser(["fast", "immediate"])
1360 2 : .default_value("fast")
1361 2 : .help("If 'immediate', don't flush repository data at shutdown")
1362 2 : .required(false)
1363 2 : .value_name("stop-mode");
1364 2 :
1365 2 : let remote_ext_config_args = Arg::new("remote-ext-config")
1366 2 : .long("remote-ext-config")
1367 2 : .num_args(1)
1368 2 : .help("Configure the remote extensions storage proxy gateway to request for extensions.")
1369 2 : .required(false);
1370 2 :
1371 2 : let lsn_arg = Arg::new("lsn")
1372 2 : .long("lsn")
1373 2 : .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
1374 2 : .required(false);
1375 2 :
1376 2 : let hot_standby_arg = Arg::new("hot-standby")
1377 2 : .value_parser(value_parser!(bool))
1378 2 : .long("hot-standby")
1379 2 : .help("If set, the node will be a hot replica on the specified timeline")
1380 2 : .required(false);
1381 2 :
1382 2 : let force_arg = Arg::new("force")
1383 2 : .value_parser(value_parser!(InitForceMode))
1384 2 : .long("force")
1385 2 : .default_value(
1386 2 : InitForceMode::MustNotExist
1387 2 : .to_possible_value()
1388 2 : .unwrap()
1389 2 : .get_name()
1390 2 : .to_owned(),
1391 2 : )
1392 2 : .help("Force initialization even if the repository is not empty")
1393 2 : .required(false);
1394 2 :
1395 2 : let num_pageservers_arg = Arg::new("num-pageservers")
1396 2 : .value_parser(value_parser!(u16))
1397 2 : .long("num-pageservers")
1398 2 : .help("How many pageservers to create (default 1)");
1399 2 :
1400 2 : let update_catalog = Arg::new("update-catalog")
1401 2 : .value_parser(value_parser!(bool))
1402 2 : .long("update-catalog")
1403 2 : .help("If set, will set up the catalog for neon_superuser")
1404 2 : .required(false);
1405 2 :
1406 2 : let create_test_user = Arg::new("create-test-user")
1407 2 : .value_parser(value_parser!(bool))
1408 2 : .long("create-test-user")
1409 2 : .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
1410 2 : .required(false);
1411 2 :
1412 2 : let allow_multiple = Arg::new("allow-multiple")
1413 2 : .help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
1414 2 : .long("allow-multiple")
1415 2 : .action(ArgAction::SetTrue)
1416 2 : .required(false);
1417 2 :
1418 2 : Command::new("Neon CLI")
1419 2 : .arg_required_else_help(true)
1420 2 : .version(GIT_VERSION)
1421 2 : .subcommand(
1422 2 : Command::new("init")
1423 2 : .about("Initialize a new Neon repository, preparing configs for services to start with")
1424 2 : .arg(num_pageservers_arg.clone())
1425 2 : .arg(
1426 2 : Arg::new("config")
1427 2 : .long("config")
1428 2 : .required(false)
1429 2 : .value_parser(value_parser!(PathBuf))
1430 2 : .value_name("config")
1431 2 : )
1432 2 : .arg(pg_version_arg.clone())
1433 2 : .arg(force_arg)
1434 2 : )
1435 2 : .subcommand(
1436 2 : Command::new("timeline")
1437 2 : .about("Manage timelines")
1438 2 : .arg_required_else_help(true)
1439 2 : .subcommand(Command::new("list")
1440 2 : .about("List all timelines, available to this pageserver")
1441 2 : .arg(tenant_id_arg.clone()))
1442 2 : .subcommand(Command::new("branch")
1443 2 : .about("Create a new timeline, using another timeline as a base, copying its data")
1444 2 : .arg(tenant_id_arg.clone())
1445 2 : .arg(branch_name_arg.clone())
1446 2 : .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
1447 2 : .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
1448 2 : .arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
1449 2 : .help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
1450 2 : .subcommand(Command::new("create")
1451 2 : .about("Create a new blank timeline")
1452 2 : .arg(tenant_id_arg.clone())
1453 2 : .arg(timeline_id_arg.clone())
1454 2 : .arg(branch_name_arg.clone())
1455 2 : .arg(pg_version_arg.clone())
1456 2 : )
1457 2 : .subcommand(Command::new("import")
1458 2 : .about("Import timeline from basebackup directory")
1459 2 : .arg(tenant_id_arg.clone())
1460 2 : .arg(timeline_id_arg.clone())
1461 2 : .arg(Arg::new("node-name").long("node-name")
1462 2 : .help("Name to assign to the imported timeline"))
1463 2 : .arg(Arg::new("base-tarfile")
1464 2 : .long("base-tarfile")
1465 2 : .value_parser(value_parser!(PathBuf))
1466 2 : .help("Basebackup tarfile to import")
1467 2 : )
1468 2 : .arg(Arg::new("base-lsn").long("base-lsn")
1469 2 : .help("Lsn the basebackup starts at"))
1470 2 : .arg(Arg::new("wal-tarfile")
1471 2 : .long("wal-tarfile")
1472 2 : .value_parser(value_parser!(PathBuf))
1473 2 : .help("Wal to add after base")
1474 2 : )
1475 2 : .arg(Arg::new("end-lsn").long("end-lsn")
1476 2 : .help("Lsn the basebackup ends at"))
1477 2 : .arg(pg_version_arg.clone())
1478 2 : .arg(update_catalog.clone())
1479 2 : )
1480 2 : ).subcommand(
1481 2 : Command::new("tenant")
1482 2 : .arg_required_else_help(true)
1483 2 : .about("Manage tenants")
1484 2 : .subcommand(Command::new("list"))
1485 2 : .subcommand(Command::new("create")
1486 2 : .arg(tenant_id_arg.clone())
1487 2 : .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
1488 2 : .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
1489 2 : .arg(pg_version_arg.clone())
1490 2 : .arg(Arg::new("set-default").long("set-default").action(ArgAction::SetTrue).required(false)
1491 2 : .help("Use this tenant in future CLI commands where tenant_id is needed, but not specified"))
1492 2 : .arg(Arg::new("shard-count").value_parser(value_parser!(u8)).long("shard-count").action(ArgAction::Set).help("Number of shards in the new tenant (default 1)"))
1493 2 : .arg(Arg::new("shard-stripe-size").value_parser(value_parser!(u32)).long("shard-stripe-size").action(ArgAction::Set).help("Sharding stripe size in pages"))
1494 2 : .arg(Arg::new("placement-policy").value_parser(value_parser!(String)).long("placement-policy").action(ArgAction::Set).help("Placement policy shards in this tenant"))
1495 2 : )
1496 2 : .subcommand(Command::new("set-default").arg(tenant_id_arg.clone().required(true))
1497 2 : .about("Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"))
1498 2 : .subcommand(Command::new("config")
1499 2 : .arg(tenant_id_arg.clone())
1500 2 : .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
1501 2 : .subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
1502 2 : .about("Import a tenant that is present in remote storage, and create branches for its timelines"))
1503 2 : )
1504 2 : .subcommand(
1505 2 : Command::new("pageserver")
1506 2 : .arg_required_else_help(true)
1507 2 : .about("Manage pageserver")
1508 2 : .arg(pageserver_id_arg)
1509 2 : .subcommand(Command::new("status"))
1510 2 : .subcommand(Command::new("start")
1511 2 : .about("Start local pageserver")
1512 2 : )
1513 2 : .subcommand(Command::new("stop")
1514 2 : .about("Stop local pageserver")
1515 2 : .arg(stop_mode_arg.clone())
1516 2 : )
1517 2 : .subcommand(Command::new("restart")
1518 2 : .about("Restart local pageserver")
1519 2 : )
1520 2 : )
1521 2 : .subcommand(
1522 2 : Command::new("storage_controller")
1523 2 : .arg_required_else_help(true)
1524 2 : .about("Manage storage_controller")
1525 2 : .subcommand(Command::new("start").about("Start storage controller"))
1526 2 : .subcommand(Command::new("stop").about("Stop storage controller")
1527 2 : .arg(stop_mode_arg.clone()))
1528 2 : )
1529 2 : .subcommand(
1530 2 : Command::new("safekeeper")
1531 2 : .arg_required_else_help(true)
1532 2 : .about("Manage safekeepers")
1533 2 : .subcommand(Command::new("start")
1534 2 : .about("Start local safekeeper")
1535 2 : .arg(safekeeper_id_arg.clone())
1536 2 : .arg(safekeeper_extra_opt_arg.clone())
1537 2 : )
1538 2 : .subcommand(Command::new("stop")
1539 2 : .about("Stop local safekeeper")
1540 2 : .arg(safekeeper_id_arg.clone())
1541 2 : .arg(stop_mode_arg.clone())
1542 2 : )
1543 2 : .subcommand(Command::new("restart")
1544 2 : .about("Restart local safekeeper")
1545 2 : .arg(safekeeper_id_arg)
1546 2 : .arg(stop_mode_arg.clone())
1547 2 : .arg(safekeeper_extra_opt_arg)
1548 2 : )
1549 2 : )
1550 2 : .subcommand(
1551 2 : Command::new("endpoint")
1552 2 : .arg_required_else_help(true)
1553 2 : .about("Manage postgres instances")
1554 2 : .subcommand(Command::new("list").arg(tenant_id_arg.clone()))
1555 2 : .subcommand(Command::new("create")
1556 2 : .about("Create a compute endpoint")
1557 2 : .arg(endpoint_id_arg.clone())
1558 2 : .arg(branch_name_arg.clone())
1559 2 : .arg(tenant_id_arg.clone())
1560 2 : .arg(lsn_arg.clone())
1561 2 : .arg(pg_port_arg.clone())
1562 2 : .arg(http_port_arg.clone())
1563 2 : .arg(endpoint_pageserver_id_arg.clone())
1564 2 : .arg(
1565 2 : Arg::new("config-only")
1566 2 : .help("Don't do basebackup, create endpoint directory with only config files")
1567 2 : .long("config-only")
1568 2 : .required(false))
1569 2 : .arg(pg_version_arg.clone())
1570 2 : .arg(hot_standby_arg.clone())
1571 2 : .arg(update_catalog)
1572 2 : .arg(allow_multiple.clone())
1573 2 : )
1574 2 : .subcommand(Command::new("start")
1575 2 : .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
1576 2 : .arg(endpoint_id_arg.clone())
1577 2 : .arg(endpoint_pageserver_id_arg.clone())
1578 2 : .arg(safekeepers_arg)
1579 2 : .arg(remote_ext_config_args)
1580 2 : .arg(create_test_user)
1581 2 : .arg(allow_multiple.clone())
1582 2 : )
1583 2 : .subcommand(Command::new("reconfigure")
1584 2 : .about("Reconfigure the endpoint")
1585 2 : .arg(endpoint_pageserver_id_arg)
1586 2 : .arg(endpoint_id_arg.clone())
1587 2 : .arg(tenant_id_arg.clone())
1588 2 : )
1589 2 : .subcommand(
1590 2 : Command::new("stop")
1591 2 : .arg(endpoint_id_arg)
1592 2 : .arg(
1593 2 : Arg::new("destroy")
1594 2 : .help("Also delete data directory (now optional, should be default in future)")
1595 2 : .long("destroy")
1596 2 : .action(ArgAction::SetTrue)
1597 2 : .required(false)
1598 2 : )
1599 2 : .arg(
1600 2 : Arg::new("mode")
1601 2 : .help("Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")
1602 2 : .long("mode")
1603 2 : .action(ArgAction::Set)
1604 2 : .required(false)
1605 2 : .value_parser(["smart", "fast", "immediate"])
1606 2 : .default_value("fast")
1607 2 : )
1608 2 : )
1609 2 :
1610 2 : )
1611 2 : .subcommand(
1612 2 : Command::new("mappings")
1613 2 : .arg_required_else_help(true)
1614 2 : .about("Manage neon_local branch name mappings")
1615 2 : .subcommand(
1616 2 : Command::new("map")
1617 2 : .about("Create new mapping which cannot exist already")
1618 2 : .arg(branch_name_arg.clone())
1619 2 : .arg(tenant_id_arg.clone())
1620 2 : .arg(timeline_id_arg.clone())
1621 2 : )
1622 2 : )
1623 2 : // Obsolete old name for 'endpoint'. We now just print an error if it's used.
1624 2 : .subcommand(
1625 2 : Command::new("pg")
1626 2 : .hide(true)
1627 2 : .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false))
1628 2 : .trailing_var_arg(true)
1629 2 : )
1630 2 : .subcommand(
1631 2 : Command::new("start")
1632 2 : .about("Start page server and safekeepers")
1633 2 : )
1634 2 : .subcommand(
1635 2 : Command::new("stop")
1636 2 : .about("Stop page server and safekeepers")
1637 2 : .arg(stop_mode_arg)
1638 2 : )
1639 2 : }
1640 :
1641 : #[test]
1642 2 : fn verify_cli() {
1643 2 : cli().debug_assert();
1644 2 : }
|