LCOV - code coverage report
Current view: top level - s3_scrubber/src - main.rs (source / functions) Coverage Total Hit
Test: 691a4c28fe7169edd60b367c52d448a0a6605f1f.info Lines: 0.0 % 135 0
Test Date: 2024-05-10 13:18:37 Functions: 0.0 % 47 0

            Line data    Source code
       1              : use anyhow::bail;
       2              : use camino::Utf8PathBuf;
       3              : use pageserver_api::shard::TenantShardId;
       4              : use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
       5              : use s3_scrubber::scan_pageserver_metadata::scan_metadata;
       6              : use s3_scrubber::tenant_snapshot::SnapshotDownloader;
       7              : use s3_scrubber::{
       8              :     init_logging, scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig,
       9              :     NodeKind, TraversingDepth,
      10              : };
      11              : 
      12              : use clap::{Parser, Subcommand};
      13              : use utils::id::TenantId;
      14              : 
      15            0 : #[derive(Parser)]
      16              : #[command(author, version, about, long_about = None)]
      17              : #[command(arg_required_else_help(true))]
      18              : struct Cli {
      19              :     #[command(subcommand)]
      20              :     command: Command,
      21              : 
      22            0 :     #[arg(short, long, default_value_t = false)]
      23            0 :     delete: bool,
      24              : }
      25              : 
      26            0 : #[derive(Subcommand, Debug)]
      27              : enum Command {
      28              :     FindGarbage {
      29              :         #[arg(short, long)]
      30            0 :         node_kind: NodeKind,
      31            0 :         #[arg(short, long, default_value_t=TraversingDepth::Tenant)]
      32            0 :         depth: TraversingDepth,
      33            0 :         #[arg(short, long, default_value_t = String::from("garbage.json"))]
      34            0 :         output_path: String,
      35              :     },
      36              :     PurgeGarbage {
      37              :         #[arg(short, long)]
      38            0 :         input_path: String,
      39            0 :         #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)]
      40            0 :         mode: PurgeMode,
      41              :     },
      42              :     #[command(verbatim_doc_comment)]
      43              :     ScanMetadata {
      44              :         #[arg(short, long)]
      45            0 :         node_kind: NodeKind,
      46            0 :         #[arg(short, long, default_value_t = false)]
      47            0 :         json: bool,
      48              :         #[arg(long = "tenant-id", num_args = 0..)]
      49            0 :         tenant_ids: Vec<TenantShardId>,
      50              :         #[arg(long, default_value = None)]
      51              :         /// For safekeeper node_kind only, points to db with debug dump
      52              :         dump_db_connstr: Option<String>,
      53              :         /// For safekeeper node_kind only, table in the db with debug dump
      54              :         #[arg(long, default_value = None)]
      55              :         dump_db_table: Option<String>,
      56              :     },
      57              :     TenantSnapshot {
      58              :         #[arg(long = "tenant-id")]
      59            0 :         tenant_id: TenantId,
      60            0 :         #[arg(long = "concurrency", short = 'j', default_value_t = 8)]
      61            0 :         concurrency: usize,
      62              :         #[arg(short, long)]
      63            0 :         output_path: Utf8PathBuf,
      64              :     },
      65              : }
      66              : 
      67              : #[tokio::main]
      68            0 : async fn main() -> anyhow::Result<()> {
      69            0 :     let cli = Cli::parse();
      70            0 : 
      71            0 :     let bucket_config = BucketConfig::from_env()?;
      72            0 : 
      73            0 :     let command_log_name = match &cli.command {
      74            0 :         Command::ScanMetadata { .. } => "scan",
      75            0 :         Command::FindGarbage { .. } => "find-garbage",
      76            0 :         Command::PurgeGarbage { .. } => "purge-garbage",
      77            0 :         Command::TenantSnapshot { .. } => "tenant-snapshot",
      78            0 :     };
      79            0 :     let _guard = init_logging(&format!(
      80            0 :         "{}_{}_{}_{}.log",
      81            0 :         std::env::args().next().unwrap(),
      82            0 :         command_log_name,
      83            0 :         bucket_config.bucket,
      84            0 :         chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
      85            0 :     ));
      86            0 : 
      87            0 :     match cli.command {
      88            0 :         Command::ScanMetadata {
      89            0 :             json,
      90            0 :             tenant_ids,
      91            0 :             node_kind,
      92            0 :             dump_db_connstr,
      93            0 :             dump_db_table,
      94            0 :         } => {
      95            0 :             if let NodeKind::Safekeeper = node_kind {
      96            0 :                 let dump_db_connstr =
      97            0 :                     dump_db_connstr.ok_or(anyhow::anyhow!("dump_db_connstr not specified"))?;
      98            0 :                 let dump_db_table =
      99            0 :                     dump_db_table.ok_or(anyhow::anyhow!("dump_db_table not specified"))?;
     100            0 : 
     101            0 :                 let summary = scan_safekeeper_metadata(
     102            0 :                     bucket_config.clone(),
     103            0 :                     tenant_ids.iter().map(|tshid| tshid.tenant_id).collect(),
     104            0 :                     dump_db_connstr,
     105            0 :                     dump_db_table,
     106            0 :                 )
     107            0 :                 .await?;
     108            0 :                 if json {
     109            0 :                     println!("{}", serde_json::to_string(&summary).unwrap())
     110            0 :                 } else {
     111            0 :                     println!("{}", summary.summary_string());
     112            0 :                 }
     113            0 :                 if summary.is_fatal() {
     114            0 :                     bail!("Fatal scrub errors detected");
     115            0 :                 }
     116            0 :                 if summary.is_empty() {
     117            0 :                     // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
     118            0 :                     // scrubber they were likely expecting to scan something, and if we see no timelines
     119            0 :                     // at all then it's likely due to some configuration issues like a bad prefix
     120            0 :                     bail!(
     121            0 :                         "No timelines found in bucket {} prefix {}",
     122            0 :                         bucket_config.bucket,
     123            0 :                         bucket_config
     124            0 :                             .prefix_in_bucket
     125            0 :                             .unwrap_or("<none>".to_string())
     126            0 :                     );
     127            0 :                 }
     128            0 :                 Ok(())
     129            0 :             } else {
     130            0 :                 match scan_metadata(bucket_config.clone(), tenant_ids).await {
     131            0 :                     Err(e) => {
     132            0 :                         tracing::error!("Failed: {e}");
     133            0 :                         Err(e)
     134            0 :                     }
     135            0 :                     Ok(summary) => {
     136            0 :                         if json {
     137            0 :                             println!("{}", serde_json::to_string(&summary).unwrap())
     138            0 :                         } else {
     139            0 :                             println!("{}", summary.summary_string());
     140            0 :                         }
     141            0 :                         if summary.is_fatal() {
     142            0 :                             Err(anyhow::anyhow!("Fatal scrub errors detected"))
     143            0 :                         } else if summary.is_empty() {
     144            0 :                             // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
     145            0 :                             // scrubber they were likely expecting to scan something, and if we see no timelines
     146            0 :                             // at all then it's likely due to some configuration issues like a bad prefix
     147            0 :                             Err(anyhow::anyhow!(
     148            0 :                                 "No timelines found in bucket {} prefix {}",
     149            0 :                                 bucket_config.bucket,
     150            0 :                                 bucket_config
     151            0 :                                     .prefix_in_bucket
     152            0 :                                     .unwrap_or("<none>".to_string())
     153            0 :                             ))
     154            0 :                         } else {
     155            0 :                             Ok(())
     156            0 :                         }
     157            0 :                     }
     158            0 :                 }
     159            0 :             }
     160            0 :         }
     161            0 :         Command::FindGarbage {
     162            0 :             node_kind,
     163            0 :             depth,
     164            0 :             output_path,
     165            0 :         } => {
     166            0 :             let console_config = ConsoleConfig::from_env()?;
     167            0 :             find_garbage(bucket_config, console_config, depth, node_kind, output_path).await
     168            0 :         }
     169            0 :         Command::PurgeGarbage { input_path, mode } => {
     170            0 :             purge_garbage(input_path, mode, !cli.delete).await
     171            0 :         }
     172            0 :         Command::TenantSnapshot {
     173            0 :             tenant_id,
     174            0 :             output_path,
     175            0 :             concurrency,
     176            0 :         } => {
     177            0 :             let downloader =
     178            0 :                 SnapshotDownloader::new(bucket_config, tenant_id, output_path, concurrency)?;
     179            0 :             downloader.download().await
     180            0 :         }
     181            0 :     }
     182            0 : }
        

Generated by: LCOV version 2.1-beta