|             Line data    Source code 
       1              : //! A helper tool to manage pageserver binary files.
       2              : //! Accepts a file as an argument, attempts to parse it with all ways possible
       3              : //! and prints its interpreted context.
       4              : //!
       5              : //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
       6              : 
       7              : mod download_remote_object;
       8              : mod draw_timeline_dir;
       9              : mod index_part;
      10              : mod key;
      11              : mod layer_map_analyzer;
      12              : mod layers;
      13              : mod page_trace;
      14              : 
      15              : use std::str::FromStr;
      16              : use std::time::{Duration, SystemTime};
      17              : 
      18              : use camino::{Utf8Path, Utf8PathBuf};
      19              : use clap::{Parser, Subcommand};
      20              : use download_remote_object::DownloadRemoteObjectCmd;
      21              : use index_part::IndexPartCmd;
      22              : use layers::LayerCmd;
      23              : use page_trace::PageTraceCmd;
      24              : use pageserver::context::{DownloadBehavior, RequestContext};
      25              : use pageserver::page_cache;
      26              : use pageserver::task_mgr::TaskKind;
      27              : use pageserver::tenant::dump_layerfile_from_path;
      28              : use pageserver::tenant::metadata::TimelineMetadata;
      29              : use pageserver::virtual_file::api::IoMode;
      30              : use pageserver::virtual_file::{self};
      31              : use pageserver_api::shard::TenantShardId;
      32              : use postgres_ffi::ControlFileData;
      33              : use remote_storage::{RemotePath, RemoteStorageConfig};
      34              : use tokio_util::sync::CancellationToken;
      35              : use utils::id::TimelineId;
      36              : use utils::logging::{self, LogFormat, TracingErrorLayerEnablement};
      37              : use utils::lsn::Lsn;
      38              : use utils::project_git_version;
      39              : 
      40              : project_git_version!(GIT_VERSION);
      41              : 
      42              : #[derive(Parser)]
      43              : #[command(
      44              :     version = GIT_VERSION,
      45              :     about = "Neon Pageserver binutils",
      46              :     long_about = "Reads pageserver (and related) binary files management utility"
      47              : )]
      48              : #[command(propagate_version = true)]
      49              : struct CliOpts {
      50              :     #[command(subcommand)]
      51              :     command: Commands,
      52              : }
      53              : 
      54              : #[derive(Subcommand)]
      55              : enum Commands {
      56              :     Metadata(MetadataCmd),
      57              :     #[command(subcommand)]
      58              :     IndexPart(IndexPartCmd),
      59              :     PrintLayerFile(PrintLayerFileCmd),
      60              :     TimeTravelRemotePrefix(TimeTravelRemotePrefixCmd),
      61              :     DrawTimeline {},
      62              :     AnalyzeLayerMap(AnalyzeLayerMapCmd),
      63              :     #[command(subcommand)]
      64              :     Layer(LayerCmd),
      65              :     /// Debug print a hex key found from logs
      66              :     Key(key::DescribeKeyCommand),
      67              :     PageTrace(PageTraceCmd),
      68              :     DownloadRemoteObject(DownloadRemoteObjectCmd),
      69              : }
      70              : 
      71              : /// Read and update pageserver metadata file
      72              : #[derive(Parser)]
      73              : struct MetadataCmd {
      74              :     /// Input metadata file path
      75              :     metadata_path: Utf8PathBuf,
      76              :     /// Replace disk consistent Lsn
      77              :     disk_consistent_lsn: Option<Lsn>,
      78              :     /// Replace previous record Lsn
      79              :     prev_record_lsn: Option<Lsn>,
      80              :     /// Replace latest gc cuttoff
      81              :     latest_gc_cuttoff: Option<Lsn>,
      82              : }
      83              : 
      84              : #[derive(Parser)]
      85              : struct PrintLayerFileCmd {
      86              :     /// Pageserver data path
      87              :     path: Utf8PathBuf,
      88              : }
      89              : 
      90              : /// Roll back the time for the specified prefix using S3 history.
      91              : ///
      92              : /// The command is fairly low level and powerful. Validation is only very light,
      93              : /// so it is more powerful, and thus potentially more dangerous.
      94              : #[derive(Parser)]
      95              : struct TimeTravelRemotePrefixCmd {
      96              :     /// A configuration string for the remote_storage configuration.
      97              :     ///
      98              :     /// Example: `remote_storage = { bucket_name = "aws-storage-bucket-name", bucket_region = "us-east-2" }`
      99              :     config_toml_str: String,
     100              :     /// remote prefix to time travel recover. For safety reasons, we require it to contain
     101              :     /// a timeline or tenant ID in the prefix.
     102              :     prefix: String,
     103              :     /// Timestamp to travel to. Given in format like `2024-01-20T10:45:45Z`. Assumes UTC and second accuracy.
     104              :     travel_to: String,
     105              :     /// Timestamp of the start of the operation, must be after any changes we want to roll back and after.
     106              :     /// You can use a few seconds before invoking the command. Same format as `travel_to`.
     107              :     done_if_after: Option<String>,
     108              : }
     109              : 
     110              : #[derive(Parser)]
     111              : struct AnalyzeLayerMapCmd {
     112              :     /// Pageserver data path
     113              :     path: Utf8PathBuf,
     114              :     /// Max holes
     115              :     max_holes: Option<usize>,
     116              : }
     117              : 
     118              : #[tokio::main]
     119            0 : async fn main() -> anyhow::Result<()> {
     120            0 :     logging::init(
     121            0 :         LogFormat::Plain,
     122            0 :         TracingErrorLayerEnablement::EnableWithRustLogFilter,
     123            0 :         logging::Output::Stdout,
     124            0 :     )?;
     125              : 
     126            0 :     logging::replace_panic_hook_with_tracing_panic_hook().forget();
     127              : 
     128            0 :     let cli = CliOpts::parse();
     129              : 
     130            0 :     match cli.command {
     131            0 :         Commands::Layer(cmd) => {
     132            0 :             layers::main(&cmd).await?;
     133              :         }
     134            0 :         Commands::Metadata(cmd) => {
     135            0 :             handle_metadata(&cmd)?;
     136              :         }
     137            0 :         Commands::IndexPart(cmd) => {
     138            0 :             index_part::main(&cmd).await?;
     139              :         }
     140              :         Commands::DrawTimeline {} => {
     141            0 :             draw_timeline_dir::main()?;
     142              :         }
     143            0 :         Commands::AnalyzeLayerMap(cmd) => {
     144            0 :             layer_map_analyzer::main(&cmd).await?;
     145              :         }
     146            0 :         Commands::PrintLayerFile(cmd) => {
     147            0 :             if let Err(e) = read_pg_control_file(&cmd.path) {
     148            0 :                 println!(
     149            0 :                     "Failed to read input file as a pg control one: {e:#}\n\
     150            0 :                     Attempting to read it as layer file"
     151              :                 );
     152            0 :                 print_layerfile(&cmd.path).await?;
     153            0 :             }
     154              :         }
     155            0 :         Commands::TimeTravelRemotePrefix(cmd) => {
     156            0 :             let timestamp = humantime::parse_rfc3339(&cmd.travel_to)
     157            0 :                 .map_err(|_e| anyhow::anyhow!("Invalid time for travel_to: '{}'", cmd.travel_to))?;
     158              : 
     159            0 :             let done_if_after = if let Some(done_if_after) = &cmd.done_if_after {
     160            0 :                 humantime::parse_rfc3339(done_if_after).map_err(|_e| {
     161            0 :                     anyhow::anyhow!("Invalid time for done_if_after: '{}'", done_if_after)
     162            0 :                 })?
     163              :             } else {
     164              :                 const SAFETY_MARGIN: Duration = Duration::from_secs(3);
     165            0 :                 tokio::time::sleep(SAFETY_MARGIN).await;
     166              :                 // Convert to string representation and back to get rid of sub-second values
     167            0 :                 let done_if_after = SystemTime::now();
     168            0 :                 tokio::time::sleep(SAFETY_MARGIN).await;
     169            0 :                 done_if_after
     170              :             };
     171              : 
     172            0 :             let timestamp = strip_subsecond(timestamp);
     173            0 :             let done_if_after = strip_subsecond(done_if_after);
     174              : 
     175            0 :             let Some(prefix) = validate_prefix(&cmd.prefix) else {
     176            0 :                 println!("specified prefix '{}' failed validation", cmd.prefix);
     177            0 :                 return Ok(());
     178              :             };
     179            0 :             let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?;
     180            0 :             let storage = remote_storage::GenericRemoteStorage::from_config(&config).await;
     181            0 :             let cancel = CancellationToken::new();
     182              :             // Complexity limit: as we are running this command locally, we should have a lot of memory available, and we do not
     183              :             // need to limit the number of versions we are going to delete.
     184            0 :             storage
     185            0 :                 .unwrap()
     186            0 :                 .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel, None)
     187            0 :                 .await?;
     188              :         }
     189            0 :         Commands::Key(dkc) => dkc.execute(),
     190            0 :         Commands::PageTrace(cmd) => page_trace::main(&cmd)?,
     191            0 :         Commands::DownloadRemoteObject(cmd) => {
     192            0 :             download_remote_object::main(&cmd).await?;
     193              :         }
     194              :     };
     195            0 :     Ok(())
     196            0 : }
     197              : 
     198            0 : fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {
     199            0 :     let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
     200            0 :     println!("{control_file:?}");
     201            0 :     let control_file_initdb = Lsn(control_file.checkPoint);
     202            0 :     println!(
     203            0 :         "pg_initdb_lsn: {}, aligned: {}",
     204              :         control_file_initdb,
     205            0 :         control_file_initdb.align()
     206              :     );
     207            0 :     Ok(())
     208            0 : }
     209              : 
     210            0 : async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
     211              :     // Basic initialization of things that don't change after startup
     212            0 :     virtual_file::init(
     213              :         10,
     214            0 :         virtual_file::api::IoEngineKind::StdFs,
     215            0 :         IoMode::preferred(),
     216            0 :         virtual_file::SyncMode::Sync,
     217              :     );
     218            0 :     page_cache::init(100);
     219            0 :     let ctx =
     220            0 :         RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error).with_scope_debug_tools();
     221            0 :     dump_layerfile_from_path(path, true, &ctx).await
     222            0 : }
     223              : 
     224            0 : fn handle_metadata(
     225            0 :     MetadataCmd {
     226            0 :         metadata_path: path,
     227            0 :         disk_consistent_lsn,
     228            0 :         prev_record_lsn,
     229            0 :         latest_gc_cuttoff,
     230            0 :     }: &MetadataCmd,
     231            0 : ) -> Result<(), anyhow::Error> {
     232            0 :     let metadata_bytes = std::fs::read(path)?;
     233            0 :     let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
     234            0 :     println!("Current metadata:\n{meta:?}");
     235            0 :     let mut update_meta = false;
     236              :     // TODO: simplify this part
     237            0 :     if let Some(disk_consistent_lsn) = disk_consistent_lsn {
     238            0 :         meta = TimelineMetadata::new(
     239            0 :             *disk_consistent_lsn,
     240            0 :             meta.prev_record_lsn(),
     241            0 :             meta.ancestor_timeline(),
     242            0 :             meta.ancestor_lsn(),
     243            0 :             meta.latest_gc_cutoff_lsn(),
     244            0 :             meta.initdb_lsn(),
     245            0 :             meta.pg_version(),
     246            0 :         );
     247            0 :         update_meta = true;
     248            0 :     }
     249            0 :     if let Some(prev_record_lsn) = prev_record_lsn {
     250            0 :         meta = TimelineMetadata::new(
     251            0 :             meta.disk_consistent_lsn(),
     252            0 :             Some(*prev_record_lsn),
     253            0 :             meta.ancestor_timeline(),
     254            0 :             meta.ancestor_lsn(),
     255            0 :             meta.latest_gc_cutoff_lsn(),
     256            0 :             meta.initdb_lsn(),
     257            0 :             meta.pg_version(),
     258            0 :         );
     259            0 :         update_meta = true;
     260            0 :     }
     261            0 :     if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
     262            0 :         meta = TimelineMetadata::new(
     263            0 :             meta.disk_consistent_lsn(),
     264            0 :             meta.prev_record_lsn(),
     265            0 :             meta.ancestor_timeline(),
     266            0 :             meta.ancestor_lsn(),
     267            0 :             *latest_gc_cuttoff,
     268            0 :             meta.initdb_lsn(),
     269            0 :             meta.pg_version(),
     270            0 :         );
     271            0 :         update_meta = true;
     272            0 :     }
     273              : 
     274            0 :     if update_meta {
     275            0 :         let metadata_bytes = meta.to_bytes()?;
     276            0 :         std::fs::write(path, metadata_bytes)?;
     277            0 :     }
     278              : 
     279            0 :     Ok(())
     280            0 : }
     281              : 
     282              : /// Ensures that the given S3 prefix is sufficiently constrained.
     283              : /// The command is very risky already and we don't want to expose something
     284              : /// that allows usually unintentional and quite catastrophic time travel of
     285              : /// an entire bucket, which would be a major catastrophy and away
     286              : /// by only one character change (similar to "rm -r /home /username/foobar").
     287           15 : fn validate_prefix(prefix: &str) -> Option<RemotePath> {
     288           15 :     if prefix.is_empty() {
     289              :         // Empty prefix means we want to specify the *whole* bucket
     290            1 :         return None;
     291           14 :     }
     292           14 :     let components = prefix.split('/').collect::<Vec<_>>();
     293           14 :     let (last, components) = {
     294           14 :         let last = components.last()?;
     295           14 :         if last.is_empty() {
     296              :             (
     297            7 :                 components.iter().nth_back(1)?,
     298            7 :                 &components[..(components.len() - 1)],
     299              :             )
     300              :         } else {
     301            7 :             (last, &components[..])
     302              :         }
     303              :     };
     304              :     'valid: {
     305           14 :         if let Ok(_timeline_id) = TimelineId::from_str(last) {
     306              :             // Ends in either a tenant or timeline ID
     307            5 :             break 'valid;
     308            9 :         }
     309            9 :         if *last == "timelines" {
     310            3 :             if let Some(before_last) = components.iter().nth_back(1) {
     311            3 :                 if let Ok(_tenant_id) = TenantShardId::from_str(before_last) {
     312              :                     // Has a valid tenant id
     313            3 :                     break 'valid;
     314            0 :                 }
     315            0 :             }
     316            6 :         }
     317              : 
     318            6 :         return None;
     319              :     }
     320            8 :     RemotePath::from_string(prefix).ok()
     321           15 : }
     322              : 
     323            0 : fn strip_subsecond(timestamp: SystemTime) -> SystemTime {
     324            0 :     let ts_str = humantime::format_rfc3339_seconds(timestamp).to_string();
     325            0 :     humantime::parse_rfc3339(&ts_str).expect("can't parse just created timestamp")
     326            0 : }
     327              : 
     328              : #[cfg(test)]
     329              : mod tests {
     330              :     use super::*;
     331              : 
     332              :     #[test]
     333            1 :     fn test_validate_prefix() {
     334            1 :         assert_eq!(validate_prefix(""), None);
     335            1 :         assert_eq!(validate_prefix("/"), None);
     336              :         #[track_caller]
     337            7 :         fn assert_valid(prefix: &str) {
     338            7 :             let remote_path = RemotePath::from_string(prefix).unwrap();
     339            7 :             assert_eq!(validate_prefix(prefix), Some(remote_path));
     340            7 :         }
     341            1 :         assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/");
     342              :         // Path is not relative but absolute
     343            1 :         assert_eq!(
     344            1 :             validate_prefix(
     345            1 :                 "/wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/"
     346              :             ),
     347              :             None
     348              :         );
     349            1 :         assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/");
     350              :         // Partial tenant IDs should be invalid, S3 will match all tenants with the specific ID prefix
     351            1 :         assert_eq!(validate_prefix("wal/3aa8fcc61f6d357410b7d"), None);
     352            1 :         assert_eq!(validate_prefix("wal"), None);
     353            1 :         assert_eq!(validate_prefix("/wal/"), None);
     354            1 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001");
     355              :         // Partial tenant ID
     356            1 :         assert_eq!(
     357            1 :             validate_prefix("pageserver/v1/tenants/3aa8fcc61f6d357410b"),
     358              :             None
     359              :         );
     360            1 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines");
     361            1 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines");
     362            1 :         assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/");
     363            1 :         assert_valid(
     364            1 :             "pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683",
     365              :         );
     366            1 :         assert_eq!(validate_prefix("pageserver/v1/tenants/"), None);
     367            1 :     }
     368              : }
         |