Line data Source code
1 : //! A helper tool to manage pageserver binary files.
2 : //! Accepts a file as an argument, attempts to parse it with all ways possible
3 : //! and prints its interpreted context.
4 : //!
5 : //! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
6 :
7 : mod draw_timeline_dir;
8 : mod index_part;
9 : mod layer_map_analyzer;
10 : mod layers;
11 :
12 : use std::{
13 : str::FromStr,
14 : time::{Duration, SystemTime},
15 : };
16 :
17 : use camino::{Utf8Path, Utf8PathBuf};
18 : use clap::{Parser, Subcommand};
19 : use index_part::IndexPartCmd;
20 : use layers::LayerCmd;
21 : use pageserver::{
22 : context::{DownloadBehavior, RequestContext},
23 : page_cache,
24 : task_mgr::TaskKind,
25 : tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
26 : virtual_file,
27 : };
28 : use pageserver_api::shard::TenantShardId;
29 : use postgres_ffi::ControlFileData;
30 : use remote_storage::{RemotePath, RemoteStorageConfig};
31 : use tokio_util::sync::CancellationToken;
32 : use utils::{
33 : id::TimelineId,
34 : logging::{self, LogFormat, TracingErrorLayerEnablement},
35 : lsn::Lsn,
36 : project_git_version,
37 : };
38 :
39 : project_git_version!(GIT_VERSION);
40 :
41 0 : #[derive(Parser)]
42 : #[command(
43 : version = GIT_VERSION,
44 : about = "Neon Pageserver binutils",
45 : long_about = "Reads pageserver (and related) binary files management utility"
46 : )]
47 : #[command(propagate_version = true)]
48 : struct CliOpts {
49 : #[command(subcommand)]
50 : command: Commands,
51 : }
52 :
53 0 : #[derive(Subcommand)]
54 : enum Commands {
55 : Metadata(MetadataCmd),
56 : #[command(subcommand)]
57 : IndexPart(IndexPartCmd),
58 : PrintLayerFile(PrintLayerFileCmd),
59 : TimeTravelRemotePrefix(TimeTravelRemotePrefixCmd),
60 : DrawTimeline {},
61 : AnalyzeLayerMap(AnalyzeLayerMapCmd),
62 : #[command(subcommand)]
63 : Layer(LayerCmd),
64 : }
65 :
66 : /// Read and update pageserver metadata file
67 0 : #[derive(Parser)]
68 : struct MetadataCmd {
69 : /// Input metadata file path
70 0 : metadata_path: Utf8PathBuf,
71 : /// Replace disk consistent Lsn
72 : disk_consistent_lsn: Option<Lsn>,
73 : /// Replace previous record Lsn
74 : prev_record_lsn: Option<Lsn>,
75 : /// Replace latest gc cuttoff
76 : latest_gc_cuttoff: Option<Lsn>,
77 : }
78 :
79 0 : #[derive(Parser)]
80 : struct PrintLayerFileCmd {
81 : /// Pageserver data path
82 0 : path: Utf8PathBuf,
83 : }
84 :
85 : /// Roll back the time for the specified prefix using S3 history.
86 : ///
87 : /// The command is fairly low level and powerful. Validation is only very light,
88 : /// so it is more powerful, and thus potentially more dangerous.
89 0 : #[derive(Parser)]
90 : struct TimeTravelRemotePrefixCmd {
91 : /// A configuration string for the remote_storage configuration.
92 : ///
93 : /// Example: `remote_storage = { bucket_name = "aws-storage-bucket-name", bucket_region = "us-east-2" }`
94 0 : config_toml_str: String,
95 : /// remote prefix to time travel recover. For safety reasons, we require it to contain
96 : /// a timeline or tenant ID in the prefix.
97 0 : prefix: String,
98 : /// Timestamp to travel to. Given in format like `2024-01-20T10:45:45Z`. Assumes UTC and second accuracy.
99 0 : travel_to: String,
100 : /// Timestamp of the start of the operation, must be after any changes we want to roll back and after.
101 : /// You can use a few seconds before invoking the command. Same format as `travel_to`.
102 : done_if_after: Option<String>,
103 : }
104 :
105 0 : #[derive(Parser)]
106 : struct AnalyzeLayerMapCmd {
107 : /// Pageserver data path
108 0 : path: Utf8PathBuf,
109 : /// Max holes
110 : max_holes: Option<usize>,
111 : }
112 :
113 : #[tokio::main]
114 0 : async fn main() -> anyhow::Result<()> {
115 0 : logging::init(
116 0 : LogFormat::Plain,
117 0 : TracingErrorLayerEnablement::EnableWithRustLogFilter,
118 0 : logging::Output::Stdout,
119 0 : )?;
120 0 :
121 0 : logging::replace_panic_hook_with_tracing_panic_hook().forget();
122 0 :
123 0 : let cli = CliOpts::parse();
124 0 :
125 0 : match cli.command {
126 0 : Commands::Layer(cmd) => {
127 0 : layers::main(&cmd).await?;
128 0 : }
129 0 : Commands::Metadata(cmd) => {
130 0 : handle_metadata(&cmd)?;
131 0 : }
132 0 : Commands::IndexPart(cmd) => {
133 0 : index_part::main(&cmd).await?;
134 0 : }
135 0 : Commands::DrawTimeline {} => {
136 0 : draw_timeline_dir::main()?;
137 0 : }
138 0 : Commands::AnalyzeLayerMap(cmd) => {
139 0 : layer_map_analyzer::main(&cmd).await?;
140 0 : }
141 0 : Commands::PrintLayerFile(cmd) => {
142 0 : if let Err(e) = read_pg_control_file(&cmd.path) {
143 0 : println!(
144 0 : "Failed to read input file as a pg control one: {e:#}\n\
145 0 : Attempting to read it as layer file"
146 0 : );
147 0 : print_layerfile(&cmd.path).await?;
148 0 : }
149 0 : }
150 0 : Commands::TimeTravelRemotePrefix(cmd) => {
151 0 : let timestamp = humantime::parse_rfc3339(&cmd.travel_to)
152 0 : .map_err(|_e| anyhow::anyhow!("Invalid time for travel_to: '{}'", cmd.travel_to))?;
153 0 :
154 0 : let done_if_after = if let Some(done_if_after) = &cmd.done_if_after {
155 0 : humantime::parse_rfc3339(done_if_after).map_err(|_e| {
156 0 : anyhow::anyhow!("Invalid time for done_if_after: '{}'", done_if_after)
157 0 : })?
158 0 : } else {
159 0 : const SAFETY_MARGIN: Duration = Duration::from_secs(3);
160 0 : tokio::time::sleep(SAFETY_MARGIN).await;
161 0 : // Convert to string representation and back to get rid of sub-second values
162 0 : let done_if_after = SystemTime::now();
163 0 : tokio::time::sleep(SAFETY_MARGIN).await;
164 0 : done_if_after
165 0 : };
166 0 :
167 0 : let timestamp = strip_subsecond(timestamp);
168 0 : let done_if_after = strip_subsecond(done_if_after);
169 0 :
170 0 : let Some(prefix) = validate_prefix(&cmd.prefix) else {
171 0 : println!("specified prefix '{}' failed validation", cmd.prefix);
172 0 : return Ok(());
173 0 : };
174 0 : let toml_document = toml_edit::Document::from_str(&cmd.config_toml_str)?;
175 0 : let toml_item = toml_document
176 0 : .get("remote_storage")
177 0 : .expect("need remote_storage");
178 0 : let config = RemoteStorageConfig::from_toml(toml_item)?.expect("incomplete config");
179 0 : let storage = remote_storage::GenericRemoteStorage::from_config(&config);
180 0 : let cancel = CancellationToken::new();
181 0 : storage
182 0 : .unwrap()
183 0 : .time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel)
184 0 : .await?;
185 0 : }
186 0 : };
187 0 : Ok(())
188 0 : }
189 :
190 0 : fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {
191 0 : let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
192 0 : println!("{control_file:?}");
193 0 : let control_file_initdb = Lsn(control_file.checkPoint);
194 0 : println!(
195 0 : "pg_initdb_lsn: {}, aligned: {}",
196 0 : control_file_initdb,
197 0 : control_file_initdb.align()
198 0 : );
199 0 : Ok(())
200 0 : }
201 :
202 0 : async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
203 0 : // Basic initialization of things that don't change after startup
204 0 : virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
205 0 : page_cache::init(100);
206 0 : let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
207 0 : dump_layerfile_from_path(path, true, &ctx).await
208 0 : }
209 :
210 0 : fn handle_metadata(
211 0 : MetadataCmd {
212 0 : metadata_path: path,
213 0 : disk_consistent_lsn,
214 0 : prev_record_lsn,
215 0 : latest_gc_cuttoff,
216 0 : }: &MetadataCmd,
217 0 : ) -> Result<(), anyhow::Error> {
218 0 : let metadata_bytes = std::fs::read(path)?;
219 0 : let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
220 0 : println!("Current metadata:\n{meta:?}");
221 0 : let mut update_meta = false;
222 0 : if let Some(disk_consistent_lsn) = disk_consistent_lsn {
223 0 : meta = TimelineMetadata::new(
224 0 : *disk_consistent_lsn,
225 0 : meta.prev_record_lsn(),
226 0 : meta.ancestor_timeline(),
227 0 : meta.ancestor_lsn(),
228 0 : meta.latest_gc_cutoff_lsn(),
229 0 : meta.initdb_lsn(),
230 0 : meta.pg_version(),
231 0 : );
232 0 : update_meta = true;
233 0 : }
234 0 : if let Some(prev_record_lsn) = prev_record_lsn {
235 0 : meta = TimelineMetadata::new(
236 0 : meta.disk_consistent_lsn(),
237 0 : Some(*prev_record_lsn),
238 0 : meta.ancestor_timeline(),
239 0 : meta.ancestor_lsn(),
240 0 : meta.latest_gc_cutoff_lsn(),
241 0 : meta.initdb_lsn(),
242 0 : meta.pg_version(),
243 0 : );
244 0 : update_meta = true;
245 0 : }
246 0 : if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
247 0 : meta = TimelineMetadata::new(
248 0 : meta.disk_consistent_lsn(),
249 0 : meta.prev_record_lsn(),
250 0 : meta.ancestor_timeline(),
251 0 : meta.ancestor_lsn(),
252 0 : *latest_gc_cuttoff,
253 0 : meta.initdb_lsn(),
254 0 : meta.pg_version(),
255 0 : );
256 0 : update_meta = true;
257 0 : }
258 :
259 0 : if update_meta {
260 0 : let metadata_bytes = meta.to_bytes()?;
261 0 : std::fs::write(path, metadata_bytes)?;
262 0 : }
263 :
264 0 : Ok(())
265 0 : }
266 :
267 : /// Ensures that the given S3 prefix is sufficiently constrained.
268 : /// The command is very risky already and we don't want to expose something
269 : /// that allows usually unintentional and quite catastrophic time travel of
270 : /// an entire bucket, which would be a major catastrophy and away
271 : /// by only one character change (similar to "rm -r /home /username/foobar").
272 30 : fn validate_prefix(prefix: &str) -> Option<RemotePath> {
273 30 : if prefix.is_empty() {
274 : // Empty prefix means we want to specify the *whole* bucket
275 2 : return None;
276 28 : }
277 28 : let components = prefix.split('/').collect::<Vec<_>>();
278 28 : let (last, components) = {
279 28 : let last = components.last()?;
280 28 : if last.is_empty() {
281 : (
282 14 : components.iter().nth_back(1)?,
283 14 : &components[..(components.len() - 1)],
284 : )
285 : } else {
286 14 : (last, &components[..])
287 : }
288 : };
289 : 'valid: {
290 28 : if let Ok(_timeline_id) = TimelineId::from_str(last) {
291 : // Ends in either a tenant or timeline ID
292 10 : break 'valid;
293 18 : }
294 18 : if *last == "timelines" {
295 6 : if let Some(before_last) = components.iter().nth_back(1) {
296 6 : if let Ok(_tenant_id) = TenantShardId::from_str(before_last) {
297 : // Has a valid tenant id
298 6 : break 'valid;
299 0 : }
300 0 : }
301 12 : }
302 :
303 12 : return None;
304 : }
305 16 : RemotePath::from_string(prefix).ok()
306 30 : }
307 :
308 0 : fn strip_subsecond(timestamp: SystemTime) -> SystemTime {
309 0 : let ts_str = humantime::format_rfc3339_seconds(timestamp).to_string();
310 0 : humantime::parse_rfc3339(&ts_str).expect("can't parse just created timestamp")
311 0 : }
312 :
313 : #[cfg(test)]
314 : mod tests {
315 : use super::*;
316 :
317 : #[test]
318 2 : fn test_validate_prefix() {
319 2 : assert_eq!(validate_prefix(""), None);
320 2 : assert_eq!(validate_prefix("/"), None);
321 : #[track_caller]
322 14 : fn assert_valid(prefix: &str) {
323 14 : let remote_path = RemotePath::from_string(prefix).unwrap();
324 14 : assert_eq!(validate_prefix(prefix), Some(remote_path));
325 14 : }
326 2 : assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/");
327 2 : // Path is not relative but absolute
328 2 : assert_eq!(
329 2 : validate_prefix(
330 2 : "/wal/3aa8fcc61f6d357410b7de754b1d9001/641e5342083b2235ee3deb8066819683/"
331 2 : ),
332 2 : None
333 2 : );
334 2 : assert_valid("wal/3aa8fcc61f6d357410b7de754b1d9001/");
335 2 : // Partial tenant IDs should be invalid, S3 will match all tenants with the specific ID prefix
336 2 : assert_eq!(validate_prefix("wal/3aa8fcc61f6d357410b7d"), None);
337 2 : assert_eq!(validate_prefix("wal"), None);
338 2 : assert_eq!(validate_prefix("/wal/"), None);
339 2 : assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001");
340 2 : // Partial tenant ID
341 2 : assert_eq!(
342 2 : validate_prefix("pageserver/v1/tenants/3aa8fcc61f6d357410b"),
343 2 : None
344 2 : );
345 2 : assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines");
346 2 : assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines");
347 2 : assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/");
348 2 : assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683");
349 2 : assert_eq!(validate_prefix("pageserver/v1/tenants/"), None);
350 2 : }
351 : }
|