Line data Source code
1 : //! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
2 : //! No other modules from this tree are supposed to be used directly by the external code.
3 : //!
4 : //! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
5 : //! * [`local_fs`] allows to use local file system as an external storage
6 : //! * [`s3_bucket`] uses AWS S3 bucket as an external storage
7 : //! * [`azure_blob`] allows to use Azure Blob storage as an external storage
8 : //!
9 : #![deny(unsafe_code)]
10 : #![deny(clippy::undocumented_unsafe_blocks)]
11 :
12 : mod azure_blob;
13 : mod error;
14 : mod local_fs;
15 : mod s3_bucket;
16 : mod simulate_failures;
17 : mod support;
18 :
19 : use std::{
20 : collections::HashMap,
21 : fmt::Debug,
22 : num::{NonZeroU32, NonZeroUsize},
23 : pin::Pin,
24 : str::FromStr,
25 : sync::Arc,
26 : time::{Duration, SystemTime},
27 : };
28 :
29 : use anyhow::{bail, Context};
30 : use aws_sdk_s3::types::StorageClass;
31 : use camino::{Utf8Path, Utf8PathBuf};
32 :
33 : use bytes::Bytes;
34 : use futures::stream::Stream;
35 : use serde::{Deserialize, Serialize};
36 : use tokio::sync::Semaphore;
37 : use tokio_util::sync::CancellationToken;
38 : use toml_edit::Item;
39 : use tracing::info;
40 :
41 : pub use self::{
42 : azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
43 : simulate_failures::UnreliableWrapper,
44 : };
45 : use s3_bucket::RequestKind;
46 :
47 : /// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
48 : pub use azure_core::Etag;
49 :
50 : pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
51 :
52 : /// Currently, sync happens with AWS S3, that has two limits on requests per second:
53 : /// ~200 RPS for IAM services
54 : /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
55 : /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
56 : /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
57 : pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
58 : /// Set this limit analogously to the S3 limit
59 : ///
60 : /// Here, a limit of max 20k concurrent connections was noted.
61 : /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
62 : pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
63 : /// No limits on the client side, which currenltly means 1000 for AWS S3.
64 : /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
65 : pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
66 :
67 : /// As defined in S3 docs
68 : pub const MAX_KEYS_PER_DELETE: usize = 1000;
69 :
70 : const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
71 :
72 : /// Path on the remote storage, relative to some inner prefix.
73 : /// The prefix is an implementation detail, that allows representing local paths
74 : /// as the remote ones, stripping the local storage prefix away.
75 : #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
76 : pub struct RemotePath(Utf8PathBuf);
77 :
78 : impl Serialize for RemotePath {
79 0 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
80 0 : where
81 0 : S: serde::Serializer,
82 0 : {
83 0 : serializer.collect_str(self)
84 0 : }
85 : }
86 :
87 : impl<'de> Deserialize<'de> for RemotePath {
88 0 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
89 0 : where
90 0 : D: serde::Deserializer<'de>,
91 0 : {
92 0 : let str = String::deserialize(deserializer)?;
93 0 : Ok(Self(Utf8PathBuf::from(&str)))
94 0 : }
95 : }
96 :
97 : impl std::fmt::Display for RemotePath {
98 196 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 196 : std::fmt::Display::fmt(&self.0, f)
100 196 : }
101 : }
102 :
103 : impl RemotePath {
104 3105 : pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
105 3105 : anyhow::ensure!(
106 3105 : relative_path.is_relative(),
107 4 : "Path {relative_path:?} is not relative"
108 : );
109 3101 : Ok(Self(relative_path.to_path_buf()))
110 3105 : }
111 :
112 2823 : pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
113 2823 : Self::new(Utf8Path::new(relative_path))
114 2823 : }
115 :
116 2926 : pub fn with_base(&self, base_path: &Utf8Path) -> Utf8PathBuf {
117 2926 : base_path.join(&self.0)
118 2926 : }
119 :
120 14 : pub fn object_name(&self) -> Option<&str> {
121 14 : self.0.file_name()
122 14 : }
123 :
124 99 : pub fn join(&self, segment: &Utf8Path) -> Self {
125 99 : Self(self.0.join(segment))
126 99 : }
127 :
128 494 : pub fn get_path(&self) -> &Utf8PathBuf {
129 494 : &self.0
130 494 : }
131 :
132 0 : pub fn extension(&self) -> Option<&str> {
133 0 : self.0.extension()
134 0 : }
135 :
136 56 : pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
137 56 : self.0.strip_prefix(&p.0)
138 56 : }
139 :
140 133 : pub fn add_trailing_slash(&self) -> Self {
141 133 : // Unwrap safety inputs are guararnteed to be valid UTF-8
142 133 : Self(format!("{}/", self.0).try_into().unwrap())
143 133 : }
144 : }
145 :
146 : /// We don't need callers to be able to pass arbitrary delimiters: just control
147 : /// whether listings will use a '/' separator or not.
148 : ///
149 : /// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
150 : /// NoDelimiter mode will only populate `keys`.
151 : pub enum ListingMode {
152 : WithDelimiter,
153 : NoDelimiter,
154 : }
155 :
156 : #[derive(Default)]
157 : pub struct Listing {
158 : pub prefixes: Vec<RemotePath>,
159 : pub keys: Vec<RemotePath>,
160 : }
161 :
162 : /// Storage (potentially remote) API to manage its state.
163 : /// This storage tries to be unaware of any layered repository context,
164 : /// providing basic CRUD operations for storage files.
165 : #[allow(async_fn_in_trait)]
166 : pub trait RemoteStorage: Send + Sync + 'static {
167 : /// List objects in remote storage, with semantics matching AWS S3's ListObjectsV2.
168 : /// (see `<https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>`)
169 : ///
170 : /// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not
171 : /// from the absolute root of the bucket.
172 : ///
173 : /// `mode` configures whether to use a delimiter. Without a delimiter all keys
174 : /// within the prefix are listed in the `keys` of the result. With a delimiter, any "directories" at the top level of
175 : /// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are
176 : /// returned in `keys` ().
177 : ///
178 : /// `max_keys` controls the maximum number of keys that will be returned. If this is None, this function
179 : /// will iteratively call listobjects until it runs out of keys. Note that this is not safe to use on
180 : /// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.
181 : ///
182 : async fn list(
183 : &self,
184 : prefix: Option<&RemotePath>,
185 : _mode: ListingMode,
186 : max_keys: Option<NonZeroU32>,
187 : cancel: &CancellationToken,
188 : ) -> Result<Listing, DownloadError>;
189 :
190 : /// Streams the local file contents into remote into the remote storage entry.
191 : ///
192 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
193 : /// set to `TimeoutOrCancel`.
194 : async fn upload(
195 : &self,
196 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
197 : // S3 PUT request requires the content length to be specified,
198 : // otherwise it starts to fail with the concurrent connection count increasing.
199 : data_size_bytes: usize,
200 : to: &RemotePath,
201 : metadata: Option<StorageMetadata>,
202 : cancel: &CancellationToken,
203 : ) -> anyhow::Result<()>;
204 :
205 : /// Streams the remote storage entry contents.
206 : ///
207 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
208 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
209 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
210 : ///
211 : /// Returns the metadata, if any was stored with the file previously.
212 : async fn download(
213 : &self,
214 : from: &RemotePath,
215 : cancel: &CancellationToken,
216 : ) -> Result<Download, DownloadError>;
217 :
218 : /// Streams a given byte range of the remote storage entry contents.
219 : ///
220 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
221 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
222 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
223 : ///
224 : /// Returns the metadata, if any was stored with the file previously.
225 : async fn download_byte_range(
226 : &self,
227 : from: &RemotePath,
228 : start_inclusive: u64,
229 : end_exclusive: Option<u64>,
230 : cancel: &CancellationToken,
231 : ) -> Result<Download, DownloadError>;
232 :
233 : /// Delete a single path from remote storage.
234 : ///
235 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
236 : /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.
237 : async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;
238 :
239 : /// Delete a multiple paths from remote storage.
240 : ///
241 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
242 : /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
243 : /// through.
244 : async fn delete_objects<'a>(
245 : &self,
246 : paths: &'a [RemotePath],
247 : cancel: &CancellationToken,
248 : ) -> anyhow::Result<()>;
249 :
250 : /// Copy a remote object inside a bucket from one path to another.
251 : async fn copy(
252 : &self,
253 : from: &RemotePath,
254 : to: &RemotePath,
255 : cancel: &CancellationToken,
256 : ) -> anyhow::Result<()>;
257 :
258 : /// Resets the content of everything with the given prefix to the given state
259 : async fn time_travel_recover(
260 : &self,
261 : prefix: Option<&RemotePath>,
262 : timestamp: SystemTime,
263 : done_if_after: SystemTime,
264 : cancel: &CancellationToken,
265 : ) -> Result<(), TimeTravelError>;
266 :
267 : /// Query how busy we currently are: may be used by callers which wish to politely
268 : /// back off if there are already a lot of operations underway.
269 : fn activity(&self) -> RemoteStorageActivity;
270 : }
271 :
272 : pub struct RemoteStorageActivity {
273 : pub read_available: usize,
274 : pub read_total: usize,
275 : pub write_available: usize,
276 : pub write_total: usize,
277 : }
278 :
279 : /// DownloadStream is sensitive to the timeout and cancellation used with the original
280 : /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
281 : /// with `tokio::io::copy_buf`.
282 : // This has 'static because safekeepers do not use cancellation tokens (yet)
283 : pub type DownloadStream =
284 : Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;
285 :
286 : pub struct Download {
287 : pub download_stream: DownloadStream,
288 : /// The last time the file was modified (`last-modified` HTTP header)
289 : pub last_modified: SystemTime,
290 : /// A way to identify this specific version of the resource (`etag` HTTP header)
291 : pub etag: Etag,
292 : /// Extra key-value data, associated with the current remote file.
293 : pub metadata: Option<StorageMetadata>,
294 : }
295 :
296 : impl Debug for Download {
297 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
298 0 : f.debug_struct("Download")
299 0 : .field("metadata", &self.metadata)
300 0 : .finish()
301 0 : }
302 : }
303 :
304 : /// Every storage, currently supported.
305 : /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
306 : #[derive(Clone)]
307 : // Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
308 : pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
309 : LocalFs(LocalFs),
310 : AwsS3(Arc<S3Bucket>),
311 : AzureBlob(Arc<AzureBlobStorage>),
312 : Unreliable(Other),
313 : }
314 :
315 : impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
316 136 : pub async fn list(
317 136 : &self,
318 136 : prefix: Option<&RemotePath>,
319 136 : mode: ListingMode,
320 136 : max_keys: Option<NonZeroU32>,
321 136 : cancel: &CancellationToken,
322 136 : ) -> anyhow::Result<Listing, DownloadError> {
323 136 : match self {
324 510 : Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
325 0 : Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
326 0 : Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
327 0 : Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
328 : }
329 136 : }
330 :
331 : /// See [`RemoteStorage::upload`]
332 2554 : pub async fn upload(
333 2554 : &self,
334 2554 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
335 2554 : data_size_bytes: usize,
336 2554 : to: &RemotePath,
337 2554 : metadata: Option<StorageMetadata>,
338 2554 : cancel: &CancellationToken,
339 2554 : ) -> anyhow::Result<()> {
340 2554 : match self {
341 30637 : Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
342 0 : Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
343 0 : Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
344 75 : Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
345 : }
346 2527 : }
347 :
348 42 : pub async fn download(
349 42 : &self,
350 42 : from: &RemotePath,
351 42 : cancel: &CancellationToken,
352 42 : ) -> Result<Download, DownloadError> {
353 42 : match self {
354 70 : Self::LocalFs(s) => s.download(from, cancel).await,
355 0 : Self::AwsS3(s) => s.download(from, cancel).await,
356 0 : Self::AzureBlob(s) => s.download(from, cancel).await,
357 0 : Self::Unreliable(s) => s.download(from, cancel).await,
358 : }
359 42 : }
360 :
361 0 : pub async fn download_byte_range(
362 0 : &self,
363 0 : from: &RemotePath,
364 0 : start_inclusive: u64,
365 0 : end_exclusive: Option<u64>,
366 0 : cancel: &CancellationToken,
367 0 : ) -> Result<Download, DownloadError> {
368 0 : match self {
369 0 : Self::LocalFs(s) => {
370 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
371 0 : .await
372 : }
373 0 : Self::AwsS3(s) => {
374 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
375 0 : .await
376 : }
377 0 : Self::AzureBlob(s) => {
378 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
379 0 : .await
380 : }
381 0 : Self::Unreliable(s) => {
382 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
383 0 : .await
384 : }
385 : }
386 0 : }
387 :
388 : /// See [`RemoteStorage::delete`]
389 2 : pub async fn delete(
390 2 : &self,
391 2 : path: &RemotePath,
392 2 : cancel: &CancellationToken,
393 2 : ) -> anyhow::Result<()> {
394 2 : match self {
395 2 : Self::LocalFs(s) => s.delete(path, cancel).await,
396 0 : Self::AwsS3(s) => s.delete(path, cancel).await,
397 0 : Self::AzureBlob(s) => s.delete(path, cancel).await,
398 0 : Self::Unreliable(s) => s.delete(path, cancel).await,
399 : }
400 2 : }
401 :
402 : /// See [`RemoteStorage::delete_objects`]
403 6 : pub async fn delete_objects(
404 6 : &self,
405 6 : paths: &[RemotePath],
406 6 : cancel: &CancellationToken,
407 6 : ) -> anyhow::Result<()> {
408 6 : match self {
409 6 : Self::LocalFs(s) => s.delete_objects(paths, cancel).await,
410 0 : Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
411 0 : Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
412 0 : Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
413 : }
414 6 : }
415 :
416 : /// See [`RemoteStorage::copy`]
417 0 : pub async fn copy_object(
418 0 : &self,
419 0 : from: &RemotePath,
420 0 : to: &RemotePath,
421 0 : cancel: &CancellationToken,
422 0 : ) -> anyhow::Result<()> {
423 0 : match self {
424 0 : Self::LocalFs(s) => s.copy(from, to, cancel).await,
425 0 : Self::AwsS3(s) => s.copy(from, to, cancel).await,
426 0 : Self::AzureBlob(s) => s.copy(from, to, cancel).await,
427 0 : Self::Unreliable(s) => s.copy(from, to, cancel).await,
428 : }
429 0 : }
430 :
431 : /// See [`RemoteStorage::time_travel_recover`].
432 0 : pub async fn time_travel_recover(
433 0 : &self,
434 0 : prefix: Option<&RemotePath>,
435 0 : timestamp: SystemTime,
436 0 : done_if_after: SystemTime,
437 0 : cancel: &CancellationToken,
438 0 : ) -> Result<(), TimeTravelError> {
439 0 : match self {
440 0 : Self::LocalFs(s) => {
441 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
442 0 : .await
443 : }
444 0 : Self::AwsS3(s) => {
445 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
446 0 : .await
447 : }
448 0 : Self::AzureBlob(s) => {
449 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
450 0 : .await
451 : }
452 0 : Self::Unreliable(s) => {
453 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
454 0 : .await
455 : }
456 : }
457 0 : }
458 :
459 0 : pub fn activity(&self) -> RemoteStorageActivity {
460 0 : match self {
461 0 : Self::LocalFs(s) => s.activity(),
462 0 : Self::AwsS3(s) => s.activity(),
463 0 : Self::AzureBlob(s) => s.activity(),
464 0 : Self::Unreliable(s) => s.activity(),
465 : }
466 0 : }
467 : }
468 :
469 : impl GenericRemoteStorage {
470 170 : pub fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
471 170 : let timeout = storage_config.timeout;
472 170 : Ok(match &storage_config.storage {
473 146 : RemoteStorageKind::LocalFs(path) => {
474 146 : info!("Using fs root '{path}' as a remote storage");
475 146 : Self::LocalFs(LocalFs::new(path.clone(), timeout)?)
476 : }
477 18 : RemoteStorageKind::AwsS3(s3_config) => {
478 18 : // The profile and access key id are only printed here for debugging purposes,
479 18 : // their values don't indicate the eventually taken choice for auth.
480 18 : let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
481 18 : let access_key_id =
482 18 : std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
483 18 : info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
484 : s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
485 18 : Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout)?))
486 : }
487 6 : RemoteStorageKind::AzureContainer(azure_config) => {
488 6 : info!("Using azure container '{}' in region '{}' as a remote storage, prefix in container: '{:?}'",
489 : azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
490 6 : Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config, timeout)?))
491 : }
492 : })
493 170 : }
494 :
495 4 : pub fn unreliable_wrapper(s: Self, fail_first: u64) -> Self {
496 4 : Self::Unreliable(Arc::new(UnreliableWrapper::new(s, fail_first)))
497 4 : }
498 :
499 : /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.
500 1265 : pub async fn upload_storage_object(
501 1265 : &self,
502 1265 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
503 1265 : from_size_bytes: usize,
504 1265 : to: &RemotePath,
505 1265 : cancel: &CancellationToken,
506 1265 : ) -> anyhow::Result<()> {
507 1265 : self.upload(from, from_size_bytes, to, None, cancel)
508 3569 : .await
509 1262 : .with_context(|| {
510 0 : format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
511 1262 : })
512 1262 : }
513 :
514 : /// Downloads the storage object into the `to_path` provided.
515 : /// `byte_range` could be specified to dowload only a part of the file, if needed.
516 0 : pub async fn download_storage_object(
517 0 : &self,
518 0 : byte_range: Option<(u64, Option<u64>)>,
519 0 : from: &RemotePath,
520 0 : cancel: &CancellationToken,
521 0 : ) -> Result<Download, DownloadError> {
522 0 : match byte_range {
523 0 : Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
524 0 : None => self.download(from, cancel).await,
525 : }
526 0 : }
527 : }
528 :
529 : /// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
530 : /// Immutable, cannot be changed once the file is created.
531 : #[derive(Debug, Clone, PartialEq, Eq)]
532 : pub struct StorageMetadata(HashMap<String, String>);
533 :
534 : impl<const N: usize> From<[(&str, &str); N]> for StorageMetadata {
535 0 : fn from(arr: [(&str, &str); N]) -> Self {
536 0 : let map: HashMap<String, String> = arr
537 0 : .iter()
538 0 : .map(|(k, v)| (k.to_string(), v.to_string()))
539 0 : .collect();
540 0 : Self(map)
541 0 : }
542 : }
543 :
544 : /// External backup storage configuration, enough for creating a client for that storage.
545 : #[derive(Debug, Clone, PartialEq, Eq)]
546 : pub struct RemoteStorageConfig {
547 : /// The storage connection configuration.
548 : pub storage: RemoteStorageKind,
549 : /// A common timeout enforced for all requests after concurrency limiter permit has been
550 : /// acquired.
551 : pub timeout: Duration,
552 : }
553 :
554 : /// A kind of a remote storage to connect to, with its connection configuration.
555 : #[derive(Debug, Clone, PartialEq, Eq)]
556 : pub enum RemoteStorageKind {
557 : /// Storage based on local file system.
558 : /// Specify a root folder to place all stored files into.
559 : LocalFs(Utf8PathBuf),
560 : /// AWS S3 based storage, storing all files in the S3 bucket
561 : /// specified by the config
562 : AwsS3(S3Config),
563 : /// Azure Blob based storage, storing all files in the container
564 : /// specified by the config
565 : AzureContainer(AzureConfig),
566 : }
567 :
568 : /// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
569 : #[derive(Clone, PartialEq, Eq)]
570 : pub struct S3Config {
571 : /// Name of the bucket to connect to.
572 : pub bucket_name: String,
573 : /// The region where the bucket is located at.
574 : pub bucket_region: String,
575 : /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
576 : pub prefix_in_bucket: Option<String>,
577 : /// A base URL to send S3 requests to.
578 : /// By default, the endpoint is derived from a region name, assuming it's
579 : /// an AWS S3 region name, erroring on wrong region name.
580 : /// Endpoint provides a way to support other S3 flavors and their regions.
581 : ///
582 : /// Example: `http://127.0.0.1:5000`
583 : pub endpoint: Option<String>,
584 : /// AWS S3 has various limits on its API calls, we need not to exceed those.
585 : /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
586 : pub concurrency_limit: NonZeroUsize,
587 : pub max_keys_per_list_response: Option<i32>,
588 : pub upload_storage_class: Option<StorageClass>,
589 : }
590 :
591 : impl Debug for S3Config {
592 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
593 0 : f.debug_struct("S3Config")
594 0 : .field("bucket_name", &self.bucket_name)
595 0 : .field("bucket_region", &self.bucket_region)
596 0 : .field("prefix_in_bucket", &self.prefix_in_bucket)
597 0 : .field("concurrency_limit", &self.concurrency_limit)
598 0 : .field(
599 0 : "max_keys_per_list_response",
600 0 : &self.max_keys_per_list_response,
601 0 : )
602 0 : .finish()
603 0 : }
604 : }
605 :
606 : /// Azure bucket coordinates and access credentials to manage the bucket contents (read and write).
607 : #[derive(Clone, PartialEq, Eq)]
608 : pub struct AzureConfig {
609 : /// Name of the container to connect to.
610 : pub container_name: String,
611 : /// The region where the bucket is located at.
612 : pub container_region: String,
613 : /// A "subfolder" in the container, to use the same container separately by multiple remote storage users at once.
614 : pub prefix_in_container: Option<String>,
615 : /// Azure has various limits on its API calls, we need not to exceed those.
616 : /// See [`DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT`] for more details.
617 : pub concurrency_limit: NonZeroUsize,
618 : pub max_keys_per_list_response: Option<i32>,
619 : }
620 :
621 : impl Debug for AzureConfig {
622 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
623 0 : f.debug_struct("AzureConfig")
624 0 : .field("bucket_name", &self.container_name)
625 0 : .field("bucket_region", &self.container_region)
626 0 : .field("prefix_in_bucket", &self.prefix_in_container)
627 0 : .field("concurrency_limit", &self.concurrency_limit)
628 0 : .field(
629 0 : "max_keys_per_list_response",
630 0 : &self.max_keys_per_list_response,
631 0 : )
632 0 : .finish()
633 0 : }
634 : }
635 :
636 : impl RemoteStorageConfig {
637 : pub const DEFAULT_TIMEOUT: Duration = std::time::Duration::from_secs(120);
638 :
639 34 : pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<Option<RemoteStorageConfig>> {
640 34 : let local_path = toml.get("local_path");
641 34 : let bucket_name = toml.get("bucket_name");
642 34 : let bucket_region = toml.get("bucket_region");
643 34 : let container_name = toml.get("container_name");
644 34 : let container_region = toml.get("container_region");
645 :
646 34 : let use_azure = container_name.is_some() && container_region.is_some();
647 :
648 34 : let default_concurrency_limit = if use_azure {
649 0 : DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT
650 : } else {
651 34 : DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT
652 : };
653 34 : let concurrency_limit = NonZeroUsize::new(
654 34 : parse_optional_integer("concurrency_limit", toml)?.unwrap_or(default_concurrency_limit),
655 34 : )
656 34 : .context("Failed to parse 'concurrency_limit' as a positive integer")?;
657 :
658 34 : let max_keys_per_list_response =
659 34 : parse_optional_integer::<i32, _>("max_keys_per_list_response", toml)
660 34 : .context("Failed to parse 'max_keys_per_list_response' as a positive integer")?
661 34 : .or(DEFAULT_MAX_KEYS_PER_LIST_RESPONSE);
662 :
663 34 : let endpoint = toml
664 34 : .get("endpoint")
665 34 : .map(|endpoint| parse_toml_string("endpoint", endpoint))
666 34 : .transpose()?;
667 :
668 34 : let timeout = toml
669 34 : .get("timeout")
670 34 : .map(|timeout| {
671 2 : timeout
672 2 : .as_str()
673 2 : .ok_or_else(|| anyhow::Error::msg("timeout was not a string"))
674 34 : })
675 34 : .transpose()
676 34 : .and_then(|timeout| {
677 34 : timeout
678 34 : .map(humantime::parse_duration)
679 34 : .transpose()
680 34 : .map_err(anyhow::Error::new)
681 34 : })
682 34 : .context("parse timeout")?
683 34 : .unwrap_or(Self::DEFAULT_TIMEOUT);
684 34 :
685 34 : if timeout < Duration::from_secs(1) {
686 0 : bail!("timeout was specified as {timeout:?} which is too low");
687 34 : }
688 :
689 12 : let storage = match (
690 34 : local_path,
691 34 : bucket_name,
692 34 : bucket_region,
693 34 : container_name,
694 34 : container_region,
695 : ) {
696 : // no 'local_path' nor 'bucket_name' options are provided, consider this remote storage disabled
697 22 : (None, None, None, None, None) => return Ok(None),
698 : (_, Some(_), None, ..) => {
699 0 : bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
700 : }
701 : (_, None, Some(_), ..) => {
702 0 : bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
703 : }
704 6 : (None, Some(bucket_name), Some(bucket_region), ..) => {
705 6 : RemoteStorageKind::AwsS3(S3Config {
706 6 : bucket_name: parse_toml_string("bucket_name", bucket_name)?,
707 6 : bucket_region: parse_toml_string("bucket_region", bucket_region)?,
708 6 : prefix_in_bucket: toml
709 6 : .get("prefix_in_bucket")
710 6 : .map(|prefix_in_bucket| {
711 6 : parse_toml_string("prefix_in_bucket", prefix_in_bucket)
712 6 : })
713 6 : .transpose()?,
714 6 : endpoint,
715 6 : concurrency_limit,
716 6 : max_keys_per_list_response,
717 6 : upload_storage_class: toml
718 6 : .get("upload_storage_class")
719 6 : .map(|prefix_in_bucket| -> anyhow::Result<_> {
720 0 : let s = parse_toml_string("upload_storage_class", prefix_in_bucket)?;
721 0 : let storage_class = StorageClass::from_str(&s).expect("infallible");
722 : #[allow(deprecated)]
723 0 : if matches!(storage_class, StorageClass::Unknown(_)) {
724 0 : bail!("Specified storage class unknown to SDK: '{s}'. Allowed values: {:?}", StorageClass::values());
725 0 : }
726 0 : Ok(storage_class)
727 6 : })
728 6 : .transpose()?,
729 : })
730 : }
731 : (_, _, _, Some(_), None) => {
732 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
733 : }
734 : (_, _, _, None, Some(_)) => {
735 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
736 : }
737 0 : (None, None, None, Some(container_name), Some(container_region)) => {
738 0 : RemoteStorageKind::AzureContainer(AzureConfig {
739 0 : container_name: parse_toml_string("container_name", container_name)?,
740 0 : container_region: parse_toml_string("container_region", container_region)?,
741 0 : prefix_in_container: toml
742 0 : .get("prefix_in_container")
743 0 : .map(|prefix_in_container| {
744 0 : parse_toml_string("prefix_in_container", prefix_in_container)
745 0 : })
746 0 : .transpose()?,
747 0 : concurrency_limit,
748 0 : max_keys_per_list_response,
749 : })
750 : }
751 6 : (Some(local_path), None, None, None, None) => RemoteStorageKind::LocalFs(
752 6 : Utf8PathBuf::from(parse_toml_string("local_path", local_path)?),
753 : ),
754 : (Some(_), Some(_), ..) => {
755 0 : bail!("'local_path' and 'bucket_name' are mutually exclusive")
756 : }
757 : (Some(_), _, _, Some(_), Some(_)) => {
758 0 : bail!("local_path and 'container_name' are mutually exclusive")
759 : }
760 : };
761 :
762 12 : Ok(Some(RemoteStorageConfig { storage, timeout }))
763 34 : }
764 : }
765 :
766 : // Helper functions to parse a toml Item
767 68 : fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
768 68 : where
769 68 : I: TryFrom<i64, Error = E>,
770 68 : E: std::error::Error + Send + Sync + 'static,
771 68 : {
772 68 : let toml_integer = match item.get(name) {
773 4 : Some(item) => item
774 4 : .as_integer()
775 4 : .with_context(|| format!("configure option {name} is not an integer"))?,
776 64 : None => return Ok(None),
777 : };
778 :
779 4 : I::try_from(toml_integer)
780 4 : .map(Some)
781 4 : .with_context(|| format!("configure option {name} is too large"))
782 68 : }
783 :
784 30 : fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
785 30 : let s = item
786 30 : .as_str()
787 30 : .with_context(|| format!("configure option {name} is not a string"))?;
788 30 : Ok(s.to_string())
789 30 : }
790 :
791 : struct ConcurrencyLimiter {
792 : // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
793 : // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
794 : // The helps to ensure we don't exceed the thresholds.
795 : write: Arc<Semaphore>,
796 : read: Arc<Semaphore>,
797 :
798 : write_total: usize,
799 : read_total: usize,
800 : }
801 :
802 : impl ConcurrencyLimiter {
803 374 : fn for_kind(&self, kind: RequestKind) -> &Arc<Semaphore> {
804 374 : match kind {
805 32 : RequestKind::Get => &self.read,
806 154 : RequestKind::Put => &self.write,
807 30 : RequestKind::List => &self.read,
808 149 : RequestKind::Delete => &self.write,
809 3 : RequestKind::Copy => &self.write,
810 6 : RequestKind::TimeTravel => &self.write,
811 : }
812 374 : }
813 :
814 349 : async fn acquire(
815 349 : &self,
816 349 : kind: RequestKind,
817 349 : ) -> Result<tokio::sync::SemaphorePermit<'_>, tokio::sync::AcquireError> {
818 0 : self.for_kind(kind).acquire().await
819 0 : }
820 :
821 25 : async fn acquire_owned(
822 25 : &self,
823 25 : kind: RequestKind,
824 25 : ) -> Result<tokio::sync::OwnedSemaphorePermit, tokio::sync::AcquireError> {
825 0 : Arc::clone(self.for_kind(kind)).acquire_owned().await
826 0 : }
827 :
828 0 : fn activity(&self) -> RemoteStorageActivity {
829 0 : RemoteStorageActivity {
830 0 : read_available: self.read.available_permits(),
831 0 : read_total: self.read_total,
832 0 : write_available: self.write.available_permits(),
833 0 : write_total: self.write_total,
834 0 : }
835 0 : }
836 :
837 34 : fn new(limit: usize) -> ConcurrencyLimiter {
838 34 : Self {
839 34 : read: Arc::new(Semaphore::new(limit)),
840 34 : write: Arc::new(Semaphore::new(limit)),
841 34 : read_total: limit,
842 34 : write_total: limit,
843 34 : }
844 34 : }
845 : }
846 :
847 : #[cfg(test)]
848 : mod tests {
849 : use super::*;
850 :
851 : #[test]
852 2 : fn test_object_name() {
853 2 : let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
854 2 : assert_eq!(k.object_name(), Some("c"));
855 :
856 2 : let k = RemotePath::new(Utf8Path::new("a/b/c/")).unwrap();
857 2 : assert_eq!(k.object_name(), Some("c"));
858 :
859 2 : let k = RemotePath::new(Utf8Path::new("a/")).unwrap();
860 2 : assert_eq!(k.object_name(), Some("a"));
861 :
862 : // XXX is it impossible to have an empty key?
863 2 : let k = RemotePath::new(Utf8Path::new("")).unwrap();
864 2 : assert_eq!(k.object_name(), None);
865 2 : }
866 :
867 : #[test]
868 2 : fn rempte_path_cannot_be_created_from_absolute_ones() {
869 2 : let err = RemotePath::new(Utf8Path::new("/")).expect_err("Should fail on absolute paths");
870 2 : assert_eq!(err.to_string(), "Path \"/\" is not relative");
871 2 : }
872 :
873 : #[test]
874 2 : fn parse_localfs_config_with_timeout() {
875 2 : let input = "local_path = '.'
876 2 : timeout = '5s'";
877 2 :
878 2 : let toml = input.parse::<toml_edit::Document>().unwrap();
879 2 :
880 2 : let config = RemoteStorageConfig::from_toml(toml.as_item())
881 2 : .unwrap()
882 2 : .expect("it exists");
883 2 :
884 2 : assert_eq!(
885 2 : config,
886 2 : RemoteStorageConfig {
887 2 : storage: RemoteStorageKind::LocalFs(Utf8PathBuf::from(".")),
888 2 : timeout: Duration::from_secs(5)
889 2 : }
890 2 : );
891 2 : }
892 : }
|