Line data Source code
1 : use anyhow::bail;
2 : use camino::Utf8PathBuf;
3 : use pageserver_api::shard::TenantShardId;
4 : use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
5 : use s3_scrubber::scan_pageserver_metadata::scan_metadata;
6 : use s3_scrubber::tenant_snapshot::SnapshotDownloader;
7 : use s3_scrubber::{
8 : init_logging, scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig,
9 : NodeKind, TraversingDepth,
10 : };
11 :
12 : use clap::{Parser, Subcommand};
13 : use utils::id::TenantId;
14 :
15 0 : #[derive(Parser)]
16 : #[command(author, version, about, long_about = None)]
17 : #[command(arg_required_else_help(true))]
18 : struct Cli {
19 : #[command(subcommand)]
20 : command: Command,
21 :
22 0 : #[arg(short, long, default_value_t = false)]
23 0 : delete: bool,
24 : }
25 :
26 0 : #[derive(Subcommand, Debug)]
27 : enum Command {
28 : FindGarbage {
29 : #[arg(short, long)]
30 0 : node_kind: NodeKind,
31 0 : #[arg(short, long, default_value_t=TraversingDepth::Tenant)]
32 0 : depth: TraversingDepth,
33 0 : #[arg(short, long, default_value_t = String::from("garbage.json"))]
34 0 : output_path: String,
35 : },
36 : PurgeGarbage {
37 : #[arg(short, long)]
38 0 : input_path: String,
39 0 : #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)]
40 0 : mode: PurgeMode,
41 : },
42 : #[command(verbatim_doc_comment)]
43 : ScanMetadata {
44 : #[arg(short, long)]
45 0 : node_kind: NodeKind,
46 0 : #[arg(short, long, default_value_t = false)]
47 0 : json: bool,
48 : #[arg(long = "tenant-id", num_args = 0..)]
49 0 : tenant_ids: Vec<TenantShardId>,
50 : #[arg(long, default_value = None)]
51 : /// For safekeeper node_kind only, points to db with debug dump
52 : dump_db_connstr: Option<String>,
53 : /// For safekeeper node_kind only, table in the db with debug dump
54 : #[arg(long, default_value = None)]
55 : dump_db_table: Option<String>,
56 : },
57 : TenantSnapshot {
58 : #[arg(long = "tenant-id")]
59 0 : tenant_id: TenantId,
60 0 : #[arg(long = "concurrency", short = 'j', default_value_t = 8)]
61 0 : concurrency: usize,
62 : #[arg(short, long)]
63 0 : output_path: Utf8PathBuf,
64 : },
65 : }
66 :
67 : #[tokio::main]
68 0 : async fn main() -> anyhow::Result<()> {
69 0 : let cli = Cli::parse();
70 0 :
71 0 : let bucket_config = BucketConfig::from_env()?;
72 0 :
73 0 : let command_log_name = match &cli.command {
74 0 : Command::ScanMetadata { .. } => "scan",
75 0 : Command::FindGarbage { .. } => "find-garbage",
76 0 : Command::PurgeGarbage { .. } => "purge-garbage",
77 0 : Command::TenantSnapshot { .. } => "tenant-snapshot",
78 0 : };
79 0 : let _guard = init_logging(&format!(
80 0 : "{}_{}_{}_{}.log",
81 0 : std::env::args().next().unwrap(),
82 0 : command_log_name,
83 0 : bucket_config.bucket,
84 0 : chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
85 0 : ));
86 0 :
87 0 : match cli.command {
88 0 : Command::ScanMetadata {
89 0 : json,
90 0 : tenant_ids,
91 0 : node_kind,
92 0 : dump_db_connstr,
93 0 : dump_db_table,
94 0 : } => {
95 0 : if let NodeKind::Safekeeper = node_kind {
96 0 : let dump_db_connstr =
97 0 : dump_db_connstr.ok_or(anyhow::anyhow!("dump_db_connstr not specified"))?;
98 0 : let dump_db_table =
99 0 : dump_db_table.ok_or(anyhow::anyhow!("dump_db_table not specified"))?;
100 0 :
101 0 : let summary = scan_safekeeper_metadata(
102 0 : bucket_config.clone(),
103 0 : tenant_ids.iter().map(|tshid| tshid.tenant_id).collect(),
104 0 : dump_db_connstr,
105 0 : dump_db_table,
106 0 : )
107 0 : .await?;
108 0 : if json {
109 0 : println!("{}", serde_json::to_string(&summary).unwrap())
110 0 : } else {
111 0 : println!("{}", summary.summary_string());
112 0 : }
113 0 : if summary.is_fatal() {
114 0 : bail!("Fatal scrub errors detected");
115 0 : }
116 0 : if summary.is_empty() {
117 0 : // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
118 0 : // scrubber they were likely expecting to scan something, and if we see no timelines
119 0 : // at all then it's likely due to some configuration issues like a bad prefix
120 0 : bail!(
121 0 : "No timelines found in bucket {} prefix {}",
122 0 : bucket_config.bucket,
123 0 : bucket_config
124 0 : .prefix_in_bucket
125 0 : .unwrap_or("<none>".to_string())
126 0 : );
127 0 : }
128 0 : Ok(())
129 0 : } else {
130 0 : match scan_metadata(bucket_config.clone(), tenant_ids).await {
131 0 : Err(e) => {
132 0 : tracing::error!("Failed: {e}");
133 0 : Err(e)
134 0 : }
135 0 : Ok(summary) => {
136 0 : if json {
137 0 : println!("{}", serde_json::to_string(&summary).unwrap())
138 0 : } else {
139 0 : println!("{}", summary.summary_string());
140 0 : }
141 0 : if summary.is_fatal() {
142 0 : Err(anyhow::anyhow!("Fatal scrub errors detected"))
143 0 : } else if summary.is_empty() {
144 0 : // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
145 0 : // scrubber they were likely expecting to scan something, and if we see no timelines
146 0 : // at all then it's likely due to some configuration issues like a bad prefix
147 0 : Err(anyhow::anyhow!(
148 0 : "No timelines found in bucket {} prefix {}",
149 0 : bucket_config.bucket,
150 0 : bucket_config
151 0 : .prefix_in_bucket
152 0 : .unwrap_or("<none>".to_string())
153 0 : ))
154 0 : } else {
155 0 : Ok(())
156 0 : }
157 0 : }
158 0 : }
159 0 : }
160 0 : }
161 0 : Command::FindGarbage {
162 0 : node_kind,
163 0 : depth,
164 0 : output_path,
165 0 : } => {
166 0 : let console_config = ConsoleConfig::from_env()?;
167 0 : find_garbage(bucket_config, console_config, depth, node_kind, output_path).await
168 0 : }
169 0 : Command::PurgeGarbage { input_path, mode } => {
170 0 : purge_garbage(input_path, mode, !cli.delete).await
171 0 : }
172 0 : Command::TenantSnapshot {
173 0 : tenant_id,
174 0 : output_path,
175 0 : concurrency,
176 0 : } => {
177 0 : let downloader =
178 0 : SnapshotDownloader::new(bucket_config, tenant_id, output_path, concurrency)?;
179 0 : downloader.download().await
180 0 : }
181 0 : }
182 0 : }
|