|             Line data    Source code 
       1              : //! A helper tool to manage pageserver binary files.
       2              : //! Accepts a file as an argument, attempts to parse it with all ways possible
       3              : //! and prints its interpreted context.
       4              : //!
       5              : //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
       6              : 
       7              : mod draw_timeline_dir;
       8              : mod index_part;
       9              : mod key;
      10              : mod layer_map_analyzer;
      11              : mod layers;
      12              : 
      13              : use std::{
      14              :     str::FromStr,
      15              :     time::{Duration, SystemTime},
      16              : };
      17              : 
      18              : use camino::{Utf8Path, Utf8PathBuf};
      19              : use clap::{Parser, Subcommand};
      20              : use index_part::IndexPartCmd;
      21              : use layers::LayerCmd;
      22              : use pageserver::{
      23              :     context::{DownloadBehavior, RequestContext},
      24              :     page_cache,
      25              :     task_mgr::TaskKind,
      26              :     tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
      27              :     virtual_file,
      28              : };
      29              : use pageserver_api::shard::TenantShardId;
      30              : use postgres_ffi::ControlFileData;
      31              : use remote_storage::{RemotePath, RemoteStorageConfig};
      32              : use tokio_util::sync::CancellationToken;
      33              : use utils::{
      34              :     id::TimelineId,
      35              :     logging::{self, LogFormat, TracingErrorLayerEnablement},
      36              :     lsn::Lsn,
      37              :     project_git_version,
      38              : };
      39              : 
      40              : project_git_version!(GIT_VERSION);
      41              : 
      42            0 : #[derive(Parser)]
      43              : #[command(
      44              :     version = GIT_VERSION,
      45              :     about = "Neon Pageserver binutils",
      46              :     long_about = "Reads pageserver (and related) binary files management utility"
      47              : )]
      48              : #[command(propagate_version = true)]
      49              : struct CliOpts {
      50              :     #[command(subcommand)]
      51              :     command: Commands,
      52              : }
      53              : 
      54            0 : #[derive(Subcommand)]
      55              : enum Commands {
      56              :     Metadata(MetadataCmd),
      57              :     #[command(subcommand)]
      58              :     IndexPart(IndexPartCmd),
      59              :     PrintLayerFile(PrintLayerFileCmd),
      60              :     TimeTravelRemotePrefix(TimeTravelRemotePrefixCmd),
      61              :     DrawTimeline {},
      62              :     AnalyzeLayerMap(AnalyzeLayerMapCmd),
      63              :     #[command(subcommand)]
      64              :     Layer(LayerCmd),
      65              :     /// Debug print a hex key found from logs
      66              :     Key(key::DescribeKeyCommand),
      67              : }
      68              : 
      69              : /// Read and update pageserver metadata file
      70            0 : #[derive(Parser)]
      71              : struct MetadataCmd {
      72              :     /// Input metadata file path
      73            0 :     metadata_path: Utf8PathBuf,
      74              :     /// Replace disk consistent Lsn
      75              :     disk_consistent_lsn: Option<Lsn>,
      76              :     /// Replace previous record Lsn
      77              :     prev_record_lsn: Option<Lsn>,
      78              :     /// Replace latest gc cuttoff
      79              :     latest_gc_cuttoff: Option<Lsn>,
      80              : }
      81              : 
      82            0 : #[derive(Parser)]
      83              : struct PrintLayerFileCmd {
      84              :     /// Pageserver data path
      85            0 :     path: Utf8PathBuf,
      86              : }
      87              : 
      88              : /// Roll back the time for the specified prefix using S3 history.
      89              : ///
      90              : /// The command is fairly low level and powerful. Validation is only very light,
      91              : /// so it is more powerful, and thus potentially more dangerous.
      92            0 : #[derive(Parser)]
      93              : struct TimeTravelRemotePrefixCmd {
      94              :     /// A configuration string for the remote_storage configuration.
      95              :     ///
      96              :     /// Example: `remote_storage = { bucket_name = "aws-storage-bucket-name", bucket_region = "us-east-2" }`
      97            0 :     config_toml_str: String,
      98              :     /// remote prefix to time travel recover. For safety reasons, we require it to contain
      99              :     /// a timeline or tenant ID in the prefix.
     100            0 :     prefix: String,
     101              :     /// Timestamp to travel to. Given in format like `2024-01-20T10:45:45Z`. Assumes UTC and second accuracy.
     102            0 :     travel_to: String,
     103              :     /// Timestamp of the start of the operation, must be after any changes we want to roll back and after.
     104              :     /// You can use a few seconds before invoking the command. Same format as `travel_to`.
     105              :     done_if_after: Option<String>,
     106              : }
     107              : 
     108            0 : #[derive(Parser)]
     109              : struct AnalyzeLayerMapCmd {
     110              :     /// Pageserver data path
     111            0 :     path: Utf8PathBuf,
     112              :     /// Max holes
     113              :     max_holes: Option<usize>,
     114              : }
     115              : 
     116              : #[tokio::main]
     117            0 : async fn main() -> anyhow::Result<()> {
     118            0 :     logging::init(
     119            0 :         LogFormat::Plain,
     120            0 :         TracingErrorLayerEnablement::EnableWithRustLogFilter,
     121            0 :         logging::Output::Stdout,
     122            0 :     )?;
     123            0 : 
     124            0 :     logging::replace_panic_hook_with_tracing_panic_hook().forget();
     125            0 : 
     126            0 :     let cli = CliOpts::parse();
     127            0 : 
     128            0 :     match cli.command {
     129            0 :         Commands::Layer(cmd) => {
     130            0 :             layers::main(&cmd).await?;
     131            0 :         }
     132            0 :         Commands::Metadata(cmd) => {
     133            0 :             handle_metadata(&cmd)?;
     134            0 :         }
     135            0 :         Commands::IndexPart(cmd) => {
     136            0 :             index_part::main(&cmd).await?;
     137            0 :         }
     138            0 :         Commands::DrawTimeline {} => {
     139            0 :             draw_timeline_dir::main()?;
     140            0 :         }
     141            0 :         Commands::AnalyzeLayerMap(cmd) => {
     142            0 :             layer_map_analyzer::main(&cmd).await?;
     143            0 :         }
     144            0 :         Commands::PrintLayerFile(cmd) => {
     145            0 :             if let Err(e) = read_pg_control_file(&cmd.path) {
     146            0 :                 println!(
     147            0 :                     "Failed to read input file as a pg control one: {e:#}\n\
     148            0 :                     Attempting to read it as layer file"
     149            0 :                 );
     150            0 :                 print_layerfile(&cmd.path).await?;
     151            0 :             }
     152            0 :         }
     153            0 :         Commands::TimeTravelRemotePrefix(cmd) => {
     154            0 :             let timestamp = humantime::parse_rfc3339(&cmd.travel_to)
     155            0 :                 .map_err(|_e| anyhow::anyhow!("Invalid time for travel_to: '{}'", cmd.travel_to))?;
     156            0 : 
     157            0 :             let done_if_after = if let Some(done_if_after) = &cmd.done_if_after {
     158            0 :                 humantime::parse_rfc3339(done_if_after).map_err(|_e| {
     159            0 :                     anyhow::anyhow!("Invalid time for done_if_after: '{}'", done_if_after)
     160            0 :                 })?
     161            0 :             } else {
     162            0 :                 const SAFETY_MARGIN: Duration = Duration::from_secs(3);
     163            0 :                 tokio::time::sleep(SAFETY_MARGIN).await;
     164            0 :                 // Convert to string representation and back to get rid of sub-second values
     165            0 :                 let done_if_after = SystemTime::now();
     166            0 :                 tokio::time::sleep(SAFETY_MARGIN).await;
     167            0 :                 done_if_after
     168            0 :             };
     169            0 : 
     170            0 :             let timestamp = strip_subsecond(timestamp);
     171            0 :             let done_if_after = strip_subsecond(done_if_after);
     172            0 : 
     173            0 :             let Some(prefix) = validate_prefix(&cmd.prefix) else {
     174            0 :                 println!("specified prefix '{}' failed validation", cmd.prefix);
     175            0 :                 return Ok(());
     176            0 :             };
     177            0 :             let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
     178            0 :             let toml_item = toml_document
     179            0 :                 .get("remote_storage")
     180            0 :                 .expect("need remote_storage");
     181            0 :             let config = RemoteStorageConfig::from_toml(toml_item)?.expect("incomplete config");
     182            0 :             let storage = remote_storage::GenericRemoteStorage::from_config(&config);
     183            0 :             let cancel = CancellationToken::new();
     184            0 :             storage
     185            0 :                 .unwrap()
     186            0 :                 .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel)
     187            0 :                 .await?;
     188            0 :         }
     189            0 :         Commands::Key(dkc) => dkc.execute(),
     190            0 :     };
     191            0 :     Ok(())
     192            0 : }
     193              : 
     194            0 : fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {
     195            0 :     let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
     196            0 :     println!("{control_file:?}");
     197            0 :     let control_file_initdb = Lsn(control_file.checkPoint);
     198            0 :     println!(
     199            0 :         "pg_initdb_lsn: {}, aligned: {}",
     200            0 :         control_file_initdb,
     201            0 :         control_file_initdb.align()
     202            0 :     );
     203            0 :     Ok(())
     204            0 : }
     205              : 
     206            0 : async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
     207            0 :     // Basic initialization of things that don't change after startup
     208            0 :     virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
     209            0 :     page_cache::init(100);
     210            0 :     let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
     211            0 :     dump_layerfile_from_path(path, true, &ctx).await
     212            0 : }
     213              : 
     214            0 : fn handle_metadata(
     215            0 :     MetadataCmd {
     216            0 :         metadata_path: path,
     217            0 :         disk_consistent_lsn,
     218            0 :         prev_record_lsn,
     219            0 :         latest_gc_cuttoff,
     220            0 :     }: &MetadataCmd,
     221            0 : ) -> Result<(), anyhow::Error> {
     222            0 :     let metadata_bytes = std::fs::read(path)?;
     223            0 :     let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
     224            0 :     println!("Current metadata:\n{meta:?}");
     225            0 :     let mut update_meta = false;
     226              :     // TODO: simplify this part
     227            0 :     if let Some(disk_consistent_lsn) = disk_consistent_lsn {
     228            0 :         meta = TimelineMetadata::new(
     229            0 :             *disk_consistent_lsn,
     230            0 :             meta.prev_record_lsn(),
     231            0 :             meta.ancestor_timeline(),
     232            0 :             meta.ancestor_lsn(),
     233            0 :             meta.latest_gc_cutoff_lsn(),
     234            0 :             meta.initdb_lsn(),
     235            0 :             meta.pg_version(),
     236            0 :         );
     237            0 :         update_meta = true;
     238            0 :     }
     239            0 :     if let Some(prev_record_lsn) = prev_record_lsn {
     240            0 :         meta = TimelineMetadata::new(
     241            0 :             meta.disk_consistent_lsn(),
     242            0 :             Some(*prev_record_lsn),
     243            0 :             meta.ancestor_timeline(),
     244            0 :             meta.ancestor_lsn(),
     245            0 :             meta.latest_gc_cutoff_lsn(),
     246            0 :             meta.initdb_lsn(),
     247            0 :             meta.pg_version(),
     248            0 :         );
     249            0 :         update_meta = true;
     250            0 :     }
     251            0 :     if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
     252            0 :         meta = TimelineMetadata::new(
     253            0 :             meta.disk_consistent_lsn(),
     254            0 :             meta.prev_record_lsn(),
     255            0 :             meta.ancestor_timeline(),
     256            0 :             meta.ancestor_lsn(),
     257            0 :             *latest_gc_cuttoff,
     258            0 :             meta.initdb_lsn(),
     259            0 :             meta.pg_version(),
     260            0 :         );
     261            0 :         update_meta = true;
     262            0 :     }
     263              : 
     264            0 :     if update_meta {
     265            0 :         let metadata_bytes = meta.to_bytes()?;
     266            0 :         std::fs::write(path, metadata_bytes)?;
     267            0 :     }
     268              : 
     269            0 :     Ok(())
     270            0 : }
     271              : 
     272              : /// Ensures that the given S3 prefix is sufficiently constrained.
     273              : /// The command is very risky already and we don't want to expose something
     274              : /// that allows usually unintentional and quite catastrophic time travel of
     275              : /// an entire bucket, which would be a major catastrophy and away
     276              : /// by only one character change (similar to "rm -r /home /username/foobar").
     277           30 : fn validate_prefix(prefix: &str) -> Option<RemotePath> {
     278           30 :     if prefix.is_empty() {
     279              :         // Empty prefix means we want to specify the *whole* bucket
     280            2 :         return None;
     281           28 :     }
     282           28 :     let components = prefix.split('/').collect::<Vec<_>>();
     283           28 :     let (last, components) = {
     284           28 :         let last = components.last()?;
     285           28 :         if last.is_empty() {
     286              :             (
     287           14 :                 components.iter().nth_back(1)?,
     288           14 :                 &components[..(components.len() - 1)],
     289              :             )
     290              :         } else {
     291           14 :             (last, &components[..])
     292              :         }
     293              :     };
     294              :     'valid: {
     295           28 :         if let Ok(_timeline_id) = TimelineId::from_str(last) {
     296              :             // Ends in either a tenant or timeline ID
     297           10 :             break 'valid;
     298           18 :         }
     299           18 :         if *last == "timelines" {
     300            6 :             if let Some(before_last) = components.iter().nth_back(1) {
     301            6 :                 if let Ok(_tenant_id) = TenantShardId::from_str(before_last) {
     302              :                     // Has a valid tenant id
     303            6 :                     break 'valid;
     304            0 :                 }
     305            0 :             }
     306           12 :         }
     307              : 
     308           12 :         return None;
     309              :     }
     310           16 :     RemotePath::from_string(prefix).ok()
     311           30 : }
     312              : 
     313            0 : fn strip_subsecond(timestamp: SystemTime) -> SystemTime {
     314            0 :     let ts_str = humantime::format_rfc3339_seconds(timestamp).to_string();
     315            0 :     humantime::parse_rfc3339(&ts_str).expect("can't parse just created timestamp")
     316            0 : }
     317              : 
     318              : #[cfg(test)]
     319              : mod tests {
     320              :     use super::*;
     321              : 
     322              :     #[test]
     323            2 :     fn test_validate_prefix() {
     324            2 :         assert_eq!(validate_prefix(""), None);
     325            2 :         assert_eq!(validate_prefix("/"), None);
     326              :         #[track_caller]
     327           14 :         fn assert_valid(prefix: &str) {
     328           14 :             let remote_path = RemotePath::from_string(prefix).unwrap();
     329           14 :             assert_eq!(validate_prefix(prefix), Some(remote_path));
     330           14 :         }
     331            2 :         assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/");
     332            2 :         // Path is not relative but absolute
     333            2 :         assert_eq!(
     334            2 :             validate_prefix(
     335            2 :                 "/wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/"
     336            2 :             ),
     337            2 :             None
     338            2 :         );
     339            2 :         assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/");
     340            2 :         // Partial tenant IDs should be invalid, S3 will match all tenants with the specific ID prefix
     341            2 :         assert_eq!(validate_prefix("wal/3aa8fcc61f6d357410b7d"), None);
     342            2 :         assert_eq!(validate_prefix("wal"), None);
     343            2 :         assert_eq!(validate_prefix("/wal/"), None);
     344            2 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001");
     345            2 :         // Partial tenant ID
     346            2 :         assert_eq!(
     347            2 :             validate_prefix("pageserver/v1/tenants/3aa8fcc61f6d357410b"),
     348            2 :             None
     349            2 :         );
     350            2 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines");
     351            2 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines");
     352            2 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/");
     353            2 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683");
     354            2 :         assert_eq!(validate_prefix("pageserver/v1/tenants/"), None);
     355            2 :     }
     356              : }
         |