Line data Source code
1 : use camino::Utf8PathBuf;
2 : use clap::Parser;
3 : use tokio_util::sync::CancellationToken;
4 :
5 : /// Download a specific object from remote storage to a local file.
6 : ///
7 : /// The remote storage configuration is supplied via the `REMOTE_STORAGE_CONFIG` environment
8 : /// variable, in the same TOML format that the pageserver itself understands. This allows the
9 : /// command to work with any cloud supported by the `remote_storage` crate (currently AWS S3,
10 : /// Azure Blob Storage and local files), as long as the credentials are available via the
11 : /// standard environment variables expected by the underlying SDKs.
12 : ///
13 : /// Examples for setting the environment variable:
14 : ///
15 : /// ```bash
16 : /// # AWS S3 (region can also be provided via AWS_REGION)
17 : /// export REMOTE_STORAGE_CONFIG='remote_storage = { bucket_name = "my-bucket", bucket_region = "us-east-2" }'
18 : ///
19 : /// # Azure Blob Storage (account key picked up from AZURE_STORAGE_ACCOUNT_KEY)
20 : /// export REMOTE_STORAGE_CONFIG='remote_storage = { container = "my-container", account = "my-account" }'
21 : /// ```
22 : #[derive(Parser)]
23 : pub(crate) struct DownloadRemoteObjectCmd {
24 : /// Key / path of the object to download (relative to the remote storage prefix).
25 : ///
26 : /// Examples:
27 : /// "wal/3aa8f.../00000001000000000000000A"
28 : /// "pageserver/v1/tenants/<tenant_id>/timelines/<timeline_id>/layer_12345"
29 : pub remote_path: String,
30 :
31 : /// Path of the local file to create. Existing file will be overwritten.
32 : ///
33 : /// Examples:
34 : /// "./segment"
35 : /// "/tmp/layer_12345.parquet"
36 : pub output_file: Utf8PathBuf,
37 : }
38 :
39 0 : pub(crate) async fn main(cmd: &DownloadRemoteObjectCmd) -> anyhow::Result<()> {
40 : use remote_storage::{DownloadOpts, GenericRemoteStorage, RemotePath, RemoteStorageConfig};
41 :
42 : // Fetch remote storage configuration from the environment
43 0 : let config_str = std::env::var("REMOTE_STORAGE_CONFIG").map_err(|_| {
44 0 : anyhow::anyhow!(
45 0 : "'REMOTE_STORAGE_CONFIG' environment variable must be set to a valid remote storage TOML config"
46 : )
47 0 : })?;
48 :
49 0 : let config = RemoteStorageConfig::from_toml_str(&config_str)?;
50 :
51 : // Initialise remote storage client
52 0 : let storage = GenericRemoteStorage::from_config(&config).await?;
53 :
54 : // RemotePath must be relative – leading slashes confuse the parser.
55 0 : let remote_path_str = cmd.remote_path.trim_start_matches('/');
56 0 : let remote_path = RemotePath::from_string(remote_path_str)?;
57 :
58 0 : let cancel = CancellationToken::new();
59 :
60 0 : println!(
61 0 : "Downloading '{remote_path}' from remote storage bucket {:?} ...",
62 0 : config.storage.bucket_name()
63 : );
64 :
65 : // Start the actual download
66 0 : let download = storage
67 0 : .download(&remote_path, &DownloadOpts::default(), &cancel)
68 0 : .await?;
69 :
70 : // Stream to file
71 0 : let mut reader = tokio_util::io::StreamReader::new(download.download_stream);
72 0 : let tmp_path = cmd.output_file.with_extension("tmp");
73 0 : let mut file = tokio::fs::File::create(&tmp_path).await?;
74 0 : tokio::io::copy(&mut reader, &mut file).await?;
75 0 : file.sync_all().await?;
76 : // Atomically move into place
77 0 : tokio::fs::rename(&tmp_path, &cmd.output_file).await?;
78 :
79 0 : println!(
80 0 : "Downloaded to '{}'. Last modified: {:?}, etag: {}",
81 : cmd.output_file, download.last_modified, download.etag
82 : );
83 :
84 0 : Ok(())
85 0 : }
|