Line data Source code
1 : use pageserver_api::shard::TenantShardId;
2 : use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
3 : use s3_scrubber::scan_metadata::scan_metadata;
4 : use s3_scrubber::{init_logging, BucketConfig, ConsoleConfig, NodeKind, TraversingDepth};
5 :
6 : use clap::{Parser, Subcommand};
7 :
8 0 : #[derive(Parser)]
9 : #[command(author, version, about, long_about = None)]
10 : #[command(arg_required_else_help(true))]
11 : struct Cli {
12 : #[command(subcommand)]
13 : command: Command,
14 :
15 0 : #[arg(short, long, default_value_t = false)]
16 0 : delete: bool,
17 : }
18 :
19 0 : #[derive(Subcommand, Debug)]
20 : enum Command {
21 : FindGarbage {
22 : #[arg(short, long)]
23 0 : node_kind: NodeKind,
24 0 : #[arg(short, long, default_value_t=TraversingDepth::Tenant)]
25 0 : depth: TraversingDepth,
26 0 : #[arg(short, long, default_value_t = String::from("garbage.json"))]
27 0 : output_path: String,
28 : },
29 : PurgeGarbage {
30 : #[arg(short, long)]
31 0 : input_path: String,
32 0 : #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)]
33 0 : mode: PurgeMode,
34 : },
35 : ScanMetadata {
36 0 : #[arg(short, long, default_value_t = false)]
37 0 : json: bool,
38 : #[arg(long = "tenant-id", num_args = 0..)]
39 0 : tenant_ids: Vec<TenantShardId>,
40 : },
41 : }
42 :
43 : #[tokio::main]
44 0 : async fn main() -> anyhow::Result<()> {
45 0 : let cli = Cli::parse();
46 :
47 0 : let bucket_config = BucketConfig::from_env()?;
48 :
49 0 : let command_log_name = match &cli.command {
50 0 : Command::ScanMetadata { .. } => "scan",
51 0 : Command::FindGarbage { .. } => "find-garbage",
52 0 : Command::PurgeGarbage { .. } => "purge-garbage",
53 : };
54 0 : let _guard = init_logging(&format!(
55 0 : "{}_{}_{}_{}.log",
56 0 : std::env::args().next().unwrap(),
57 0 : command_log_name,
58 0 : bucket_config.bucket,
59 0 : chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
60 0 : ));
61 0 :
62 0 : match cli.command {
63 0 : Command::ScanMetadata { json, tenant_ids } => {
64 0 : match scan_metadata(bucket_config.clone(), tenant_ids).await {
65 0 : Err(e) => {
66 0 : tracing::error!("Failed: {e}");
67 0 : Err(e)
68 : }
69 0 : Ok(summary) => {
70 0 : if json {
71 0 : println!("{}", serde_json::to_string(&summary).unwrap())
72 0 : } else {
73 0 : println!("{}", summary.summary_string());
74 0 : }
75 0 : if summary.is_fatal() {
76 0 : Err(anyhow::anyhow!("Fatal scrub errors detected"))
77 0 : } else if summary.is_empty() {
78 : // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
79 : // scrubber they were likely expecting to scan something, and if we see no timelines
80 : // at all then it's likely due to some configuration issues like a bad prefix
81 0 : Err(anyhow::anyhow!(
82 0 : "No timelines found in bucket {} prefix {}",
83 0 : bucket_config.bucket,
84 0 : bucket_config
85 0 : .prefix_in_bucket
86 0 : .unwrap_or("<none>".to_string())
87 0 : ))
88 : } else {
89 0 : Ok(())
90 : }
91 : }
92 : }
93 : }
94 : Command::FindGarbage {
95 0 : node_kind,
96 0 : depth,
97 0 : output_path,
98 : } => {
99 0 : let console_config = ConsoleConfig::from_env()?;
100 0 : find_garbage(bucket_config, console_config, depth, node_kind, output_path).await
101 : }
102 0 : Command::PurgeGarbage { input_path, mode } => {
103 0 : purge_garbage(input_path, mode, !cli.delete).await
104 : }
105 : }
106 : }
|