Line data Source code
1 : //! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
2 : //! No other modules from this tree are supposed to be used directly by the external code.
3 : //!
4 : //! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
5 : //! * [`local_fs`] allows to use local file system as an external storage
6 : //! * [`s3_bucket`] uses AWS S3 bucket as an external storage
7 : //! * [`azure_blob`] allows to use Azure Blob storage as an external storage
8 : //!
9 : #![deny(unsafe_code)]
10 : #![deny(clippy::undocumented_unsafe_blocks)]
11 :
12 : mod azure_blob;
13 : mod error;
14 : mod local_fs;
15 : mod metrics;
16 : mod s3_bucket;
17 : mod simulate_failures;
18 : mod support;
19 :
20 : use std::{
21 : collections::HashMap,
22 : fmt::Debug,
23 : num::{NonZeroU32, NonZeroUsize},
24 : pin::Pin,
25 : str::FromStr,
26 : sync::Arc,
27 : time::{Duration, SystemTime},
28 : };
29 :
30 : use anyhow::{bail, Context};
31 : use aws_sdk_s3::types::StorageClass;
32 : use camino::{Utf8Path, Utf8PathBuf};
33 :
34 : use bytes::Bytes;
35 : use futures::stream::Stream;
36 : use serde::{Deserialize, Serialize};
37 : use tokio::sync::Semaphore;
38 : use tokio_util::sync::CancellationToken;
39 : use toml_edit::Item;
40 : use tracing::info;
41 :
42 : pub use self::{
43 : azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
44 : simulate_failures::UnreliableWrapper,
45 : };
46 : use s3_bucket::RequestKind;
47 :
48 : /// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
49 : pub use azure_core::Etag;
50 :
51 : pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
52 :
53 : /// Currently, sync happens with AWS S3, that has two limits on requests per second:
54 : /// ~200 RPS for IAM services
55 : /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
56 : /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
57 : /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
58 : pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
59 : /// Set this limit analogously to the S3 limit
60 : ///
61 : /// Here, a limit of max 20k concurrent connections was noted.
62 : /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
63 : pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
64 : /// No limits on the client side, which currenltly means 1000 for AWS S3.
65 : /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
66 : pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
67 :
68 : /// As defined in S3 docs
69 : pub const MAX_KEYS_PER_DELETE: usize = 1000;
70 :
71 : const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
72 :
73 : /// Path on the remote storage, relative to some inner prefix.
74 : /// The prefix is an implementation detail, that allows representing local paths
75 : /// as the remote ones, stripping the local storage prefix away.
76 : #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
77 : pub struct RemotePath(Utf8PathBuf);
78 :
79 : impl Serialize for RemotePath {
80 0 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
81 0 : where
82 0 : S: serde::Serializer,
83 0 : {
84 0 : serializer.collect_str(self)
85 0 : }
86 : }
87 :
88 : impl<'de> Deserialize<'de> for RemotePath {
89 0 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
90 0 : where
91 0 : D: serde::Deserializer<'de>,
92 0 : {
93 0 : let str = String::deserialize(deserializer)?;
94 0 : Ok(Self(Utf8PathBuf::from(&str)))
95 0 : }
96 : }
97 :
98 : impl std::fmt::Display for RemotePath {
99 251 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 251 : std::fmt::Display::fmt(&self.0, f)
101 251 : }
102 : }
103 :
104 : impl RemotePath {
105 3528 : pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
106 3528 : anyhow::ensure!(
107 3528 : relative_path.is_relative(),
108 6 : "Path {relative_path:?} is not relative"
109 : );
110 3522 : Ok(Self(relative_path.to_path_buf()))
111 3528 : }
112 :
113 3170 : pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
114 3170 : Self::new(Utf8Path::new(relative_path))
115 3170 : }
116 :
117 3262 : pub fn with_base(&self, base_path: &Utf8Path) -> Utf8PathBuf {
118 3262 : base_path.join(&self.0)
119 3262 : }
120 :
121 22 : pub fn object_name(&self) -> Option<&str> {
122 22 : self.0.file_name()
123 22 : }
124 :
125 99 : pub fn join(&self, path: impl AsRef<Utf8Path>) -> Self {
126 99 : Self(self.0.join(path))
127 99 : }
128 :
129 523 : pub fn get_path(&self) -> &Utf8PathBuf {
130 523 : &self.0
131 523 : }
132 :
133 0 : pub fn extension(&self) -> Option<&str> {
134 0 : self.0.extension()
135 0 : }
136 :
137 78 : pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
138 78 : self.0.strip_prefix(&p.0)
139 78 : }
140 :
141 160 : pub fn add_trailing_slash(&self) -> Self {
142 160 : // Unwrap safety inputs are guararnteed to be valid UTF-8
143 160 : Self(format!("{}/", self.0).try_into().unwrap())
144 160 : }
145 : }
146 :
147 : /// We don't need callers to be able to pass arbitrary delimiters: just control
148 : /// whether listings will use a '/' separator or not.
149 : ///
150 : /// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
151 : /// NoDelimiter mode will only populate `keys`.
152 : pub enum ListingMode {
153 : WithDelimiter,
154 : NoDelimiter,
155 : }
156 :
157 : #[derive(Default)]
158 : pub struct Listing {
159 : pub prefixes: Vec<RemotePath>,
160 : pub keys: Vec<RemotePath>,
161 : }
162 :
163 : /// Storage (potentially remote) API to manage its state.
164 : /// This storage tries to be unaware of any layered repository context,
165 : /// providing basic CRUD operations for storage files.
166 : #[allow(async_fn_in_trait)]
167 : pub trait RemoteStorage: Send + Sync + 'static {
168 : /// List objects in remote storage, with semantics matching AWS S3's ListObjectsV2.
169 : /// (see `<https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>`)
170 : ///
171 : /// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not
172 : /// from the absolute root of the bucket.
173 : ///
174 : /// `mode` configures whether to use a delimiter. Without a delimiter all keys
175 : /// within the prefix are listed in the `keys` of the result. With a delimiter, any "directories" at the top level of
176 : /// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are
177 : /// returned in `keys` ().
178 : ///
179 : /// `max_keys` controls the maximum number of keys that will be returned. If this is None, this function
180 : /// will iteratively call listobjects until it runs out of keys. Note that this is not safe to use on
181 : /// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.
182 : ///
183 : async fn list(
184 : &self,
185 : prefix: Option<&RemotePath>,
186 : _mode: ListingMode,
187 : max_keys: Option<NonZeroU32>,
188 : cancel: &CancellationToken,
189 : ) -> Result<Listing, DownloadError>;
190 :
191 : /// Streams the local file contents into remote into the remote storage entry.
192 : ///
193 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
194 : /// set to `TimeoutOrCancel`.
195 : async fn upload(
196 : &self,
197 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
198 : // S3 PUT request requires the content length to be specified,
199 : // otherwise it starts to fail with the concurrent connection count increasing.
200 : data_size_bytes: usize,
201 : to: &RemotePath,
202 : metadata: Option<StorageMetadata>,
203 : cancel: &CancellationToken,
204 : ) -> anyhow::Result<()>;
205 :
206 : /// Streams the remote storage entry contents.
207 : ///
208 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
209 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
210 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
211 : ///
212 : /// Returns the metadata, if any was stored with the file previously.
213 : async fn download(
214 : &self,
215 : from: &RemotePath,
216 : cancel: &CancellationToken,
217 : ) -> Result<Download, DownloadError>;
218 :
219 : /// Streams a given byte range of the remote storage entry contents.
220 : ///
221 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
222 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
223 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
224 : ///
225 : /// Returns the metadata, if any was stored with the file previously.
226 : async fn download_byte_range(
227 : &self,
228 : from: &RemotePath,
229 : start_inclusive: u64,
230 : end_exclusive: Option<u64>,
231 : cancel: &CancellationToken,
232 : ) -> Result<Download, DownloadError>;
233 :
234 : /// Delete a single path from remote storage.
235 : ///
236 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
237 : /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.
238 : async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;
239 :
240 : /// Delete a multiple paths from remote storage.
241 : ///
242 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
243 : /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
244 : /// through.
245 : async fn delete_objects<'a>(
246 : &self,
247 : paths: &'a [RemotePath],
248 : cancel: &CancellationToken,
249 : ) -> anyhow::Result<()>;
250 :
251 : /// Copy a remote object inside a bucket from one path to another.
252 : async fn copy(
253 : &self,
254 : from: &RemotePath,
255 : to: &RemotePath,
256 : cancel: &CancellationToken,
257 : ) -> anyhow::Result<()>;
258 :
259 : /// Resets the content of everything with the given prefix to the given state
260 : async fn time_travel_recover(
261 : &self,
262 : prefix: Option<&RemotePath>,
263 : timestamp: SystemTime,
264 : done_if_after: SystemTime,
265 : cancel: &CancellationToken,
266 : ) -> Result<(), TimeTravelError>;
267 : }
268 :
269 : /// DownloadStream is sensitive to the timeout and cancellation used with the original
270 : /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
271 : /// with `tokio::io::copy_buf`.
272 : // This has 'static because safekeepers do not use cancellation tokens (yet)
273 : pub type DownloadStream =
274 : Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;
275 :
276 : pub struct Download {
277 : pub download_stream: DownloadStream,
278 : /// The last time the file was modified (`last-modified` HTTP header)
279 : pub last_modified: SystemTime,
280 : /// A way to identify this specific version of the resource (`etag` HTTP header)
281 : pub etag: Etag,
282 : /// Extra key-value data, associated with the current remote file.
283 : pub metadata: Option<StorageMetadata>,
284 : }
285 :
286 : impl Debug for Download {
287 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
288 0 : f.debug_struct("Download")
289 0 : .field("metadata", &self.metadata)
290 0 : .finish()
291 0 : }
292 : }
293 :
294 : /// Every storage, currently supported.
295 : /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
296 : #[derive(Clone)]
297 : // Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
298 : pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
299 : LocalFs(LocalFs),
300 : AwsS3(Arc<S3Bucket>),
301 : AzureBlob(Arc<AzureBlobStorage>),
302 : Unreliable(Other),
303 : }
304 :
305 : impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
306 193 : pub async fn list(
307 193 : &self,
308 193 : prefix: Option<&RemotePath>,
309 193 : mode: ListingMode,
310 193 : max_keys: Option<NonZeroU32>,
311 193 : cancel: &CancellationToken,
312 193 : ) -> anyhow::Result<Listing, DownloadError> {
313 193 : match self {
314 612 : Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
315 120 : Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
316 60 : Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
317 0 : Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
318 : }
319 193 : }
320 :
321 : /// See [`RemoteStorage::upload`]
322 2850 : pub async fn upload(
323 2850 : &self,
324 2850 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
325 2850 : data_size_bytes: usize,
326 2850 : to: &RemotePath,
327 2850 : metadata: Option<StorageMetadata>,
328 2850 : cancel: &CancellationToken,
329 2850 : ) -> anyhow::Result<()> {
330 2850 : match self {
331 31314 : Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
332 578 : Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
333 267 : Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
334 76 : Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
335 : }
336 2818 : }
337 :
338 59 : pub async fn download(
339 59 : &self,
340 59 : from: &RemotePath,
341 59 : cancel: &CancellationToken,
342 59 : ) -> Result<Download, DownloadError> {
343 59 : match self {
344 67 : Self::LocalFs(s) => s.download(from, cancel).await,
345 27 : Self::AwsS3(s) => s.download(from, cancel).await,
346 10 : Self::AzureBlob(s) => s.download(from, cancel).await,
347 0 : Self::Unreliable(s) => s.download(from, cancel).await,
348 : }
349 59 : }
350 :
351 15 : pub async fn download_byte_range(
352 15 : &self,
353 15 : from: &RemotePath,
354 15 : start_inclusive: u64,
355 15 : end_exclusive: Option<u64>,
356 15 : cancel: &CancellationToken,
357 15 : ) -> Result<Download, DownloadError> {
358 15 : match self {
359 0 : Self::LocalFs(s) => {
360 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
361 0 : .await
362 : }
363 10 : Self::AwsS3(s) => {
364 10 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
365 10 : .await
366 : }
367 5 : Self::AzureBlob(s) => {
368 5 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
369 26 : .await
370 : }
371 0 : Self::Unreliable(s) => {
372 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
373 0 : .await
374 : }
375 : }
376 15 : }
377 :
378 : /// See [`RemoteStorage::delete`]
379 136 : pub async fn delete(
380 136 : &self,
381 136 : path: &RemotePath,
382 136 : cancel: &CancellationToken,
383 136 : ) -> anyhow::Result<()> {
384 136 : match self {
385 2 : Self::LocalFs(s) => s.delete(path, cancel).await,
386 281 : Self::AwsS3(s) => s.delete(path, cancel).await,
387 221 : Self::AzureBlob(s) => s.delete(path, cancel).await,
388 0 : Self::Unreliable(s) => s.delete(path, cancel).await,
389 : }
390 136 : }
391 :
392 : /// See [`RemoteStorage::delete_objects`]
393 21 : pub async fn delete_objects(
394 21 : &self,
395 21 : paths: &[RemotePath],
396 21 : cancel: &CancellationToken,
397 21 : ) -> anyhow::Result<()> {
398 21 : match self {
399 6 : Self::LocalFs(s) => s.delete_objects(paths, cancel).await,
400 54 : Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
401 25 : Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
402 0 : Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
403 : }
404 21 : }
405 :
406 : /// See [`RemoteStorage::copy`]
407 3 : pub async fn copy_object(
408 3 : &self,
409 3 : from: &RemotePath,
410 3 : to: &RemotePath,
411 3 : cancel: &CancellationToken,
412 3 : ) -> anyhow::Result<()> {
413 3 : match self {
414 0 : Self::LocalFs(s) => s.copy(from, to, cancel).await,
415 2 : Self::AwsS3(s) => s.copy(from, to, cancel).await,
416 5 : Self::AzureBlob(s) => s.copy(from, to, cancel).await,
417 0 : Self::Unreliable(s) => s.copy(from, to, cancel).await,
418 : }
419 3 : }
420 :
421 : /// See [`RemoteStorage::time_travel_recover`].
422 6 : pub async fn time_travel_recover(
423 6 : &self,
424 6 : prefix: Option<&RemotePath>,
425 6 : timestamp: SystemTime,
426 6 : done_if_after: SystemTime,
427 6 : cancel: &CancellationToken,
428 6 : ) -> Result<(), TimeTravelError> {
429 6 : match self {
430 0 : Self::LocalFs(s) => {
431 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
432 0 : .await
433 : }
434 6 : Self::AwsS3(s) => {
435 6 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
436 67 : .await
437 : }
438 0 : Self::AzureBlob(s) => {
439 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
440 0 : .await
441 : }
442 0 : Self::Unreliable(s) => {
443 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
444 0 : .await
445 : }
446 : }
447 6 : }
448 : }
449 :
450 : impl GenericRemoteStorage {
451 197 : pub fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
452 197 : let timeout = storage_config.timeout;
453 197 : Ok(match &storage_config.storage {
454 173 : RemoteStorageKind::LocalFs(path) => {
455 173 : info!("Using fs root '{path}' as a remote storage");
456 173 : Self::LocalFs(LocalFs::new(path.clone(), timeout)?)
457 : }
458 18 : RemoteStorageKind::AwsS3(s3_config) => {
459 18 : // The profile and access key id are only printed here for debugging purposes,
460 18 : // their values don't indicate the eventually taken choice for auth.
461 18 : let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
462 18 : let access_key_id =
463 18 : std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
464 18 : info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
465 : s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
466 18 : Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout)?))
467 : }
468 6 : RemoteStorageKind::AzureContainer(azure_config) => {
469 6 : let storage_account = azure_config
470 6 : .storage_account
471 6 : .as_deref()
472 6 : .unwrap_or("<AZURE_STORAGE_ACCOUNT>");
473 6 : info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
474 : azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
475 6 : Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config, timeout)?))
476 : }
477 : })
478 197 : }
479 :
480 4 : pub fn unreliable_wrapper(s: Self, fail_first: u64) -> Self {
481 4 : Self::Unreliable(Arc::new(UnreliableWrapper::new(s, fail_first)))
482 4 : }
483 :
484 : /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.
485 1357 : pub async fn upload_storage_object(
486 1357 : &self,
487 1357 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
488 1357 : from_size_bytes: usize,
489 1357 : to: &RemotePath,
490 1357 : cancel: &CancellationToken,
491 1357 : ) -> anyhow::Result<()> {
492 1357 : self.upload(from, from_size_bytes, to, None, cancel)
493 3724 : .await
494 1346 : .with_context(|| {
495 0 : format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
496 1346 : })
497 1346 : }
498 :
499 : /// Downloads the storage object into the `to_path` provided.
500 : /// `byte_range` could be specified to dowload only a part of the file, if needed.
501 0 : pub async fn download_storage_object(
502 0 : &self,
503 0 : byte_range: Option<(u64, Option<u64>)>,
504 0 : from: &RemotePath,
505 0 : cancel: &CancellationToken,
506 0 : ) -> Result<Download, DownloadError> {
507 0 : match byte_range {
508 0 : Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
509 0 : None => self.download(from, cancel).await,
510 : }
511 0 : }
512 : }
513 :
514 : /// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
515 : /// Immutable, cannot be changed once the file is created.
516 : #[derive(Debug, Clone, PartialEq, Eq)]
517 : pub struct StorageMetadata(HashMap<String, String>);
518 :
519 : impl<const N: usize> From<[(&str, &str); N]> for StorageMetadata {
520 0 : fn from(arr: [(&str, &str); N]) -> Self {
521 0 : let map: HashMap<String, String> = arr
522 0 : .iter()
523 0 : .map(|(k, v)| (k.to_string(), v.to_string()))
524 0 : .collect();
525 0 : Self(map)
526 0 : }
527 : }
528 :
529 : /// External backup storage configuration, enough for creating a client for that storage.
530 : #[derive(Debug, Clone, PartialEq, Eq)]
531 : pub struct RemoteStorageConfig {
532 : /// The storage connection configuration.
533 : pub storage: RemoteStorageKind,
534 : /// A common timeout enforced for all requests after concurrency limiter permit has been
535 : /// acquired.
536 : pub timeout: Duration,
537 : }
538 :
539 : /// A kind of a remote storage to connect to, with its connection configuration.
540 : #[derive(Debug, Clone, PartialEq, Eq)]
541 : pub enum RemoteStorageKind {
542 : /// Storage based on local file system.
543 : /// Specify a root folder to place all stored files into.
544 : LocalFs(Utf8PathBuf),
545 : /// AWS S3 based storage, storing all files in the S3 bucket
546 : /// specified by the config
547 : AwsS3(S3Config),
548 : /// Azure Blob based storage, storing all files in the container
549 : /// specified by the config
550 : AzureContainer(AzureConfig),
551 : }
552 :
553 : /// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
554 : #[derive(Clone, PartialEq, Eq)]
555 : pub struct S3Config {
556 : /// Name of the bucket to connect to.
557 : pub bucket_name: String,
558 : /// The region where the bucket is located at.
559 : pub bucket_region: String,
560 : /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
561 : pub prefix_in_bucket: Option<String>,
562 : /// A base URL to send S3 requests to.
563 : /// By default, the endpoint is derived from a region name, assuming it's
564 : /// an AWS S3 region name, erroring on wrong region name.
565 : /// Endpoint provides a way to support other S3 flavors and their regions.
566 : ///
567 : /// Example: `http://127.0.0.1:5000`
568 : pub endpoint: Option<String>,
569 : /// AWS S3 has various limits on its API calls, we need not to exceed those.
570 : /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
571 : pub concurrency_limit: NonZeroUsize,
572 : pub max_keys_per_list_response: Option<i32>,
573 : pub upload_storage_class: Option<StorageClass>,
574 : }
575 :
576 : impl Debug for S3Config {
577 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
578 0 : f.debug_struct("S3Config")
579 0 : .field("bucket_name", &self.bucket_name)
580 0 : .field("bucket_region", &self.bucket_region)
581 0 : .field("prefix_in_bucket", &self.prefix_in_bucket)
582 0 : .field("concurrency_limit", &self.concurrency_limit)
583 0 : .field(
584 0 : "max_keys_per_list_response",
585 0 : &self.max_keys_per_list_response,
586 0 : )
587 0 : .finish()
588 0 : }
589 : }
590 :
591 : /// Azure bucket coordinates and access credentials to manage the bucket contents (read and write).
592 : #[derive(Clone, PartialEq, Eq)]
593 : pub struct AzureConfig {
594 : /// Name of the container to connect to.
595 : pub container_name: String,
596 : /// Name of the storage account the container is inside of
597 : pub storage_account: Option<String>,
598 : /// The region where the bucket is located at.
599 : pub container_region: String,
600 : /// A "subfolder" in the container, to use the same container separately by multiple remote storage users at once.
601 : pub prefix_in_container: Option<String>,
602 : /// Azure has various limits on its API calls, we need not to exceed those.
603 : /// See [`DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT`] for more details.
604 : pub concurrency_limit: NonZeroUsize,
605 : pub max_keys_per_list_response: Option<i32>,
606 : }
607 :
608 : impl Debug for AzureConfig {
609 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
610 0 : f.debug_struct("AzureConfig")
611 0 : .field("bucket_name", &self.container_name)
612 0 : .field("storage_account", &self.storage_account)
613 0 : .field("bucket_region", &self.container_region)
614 0 : .field("prefix_in_container", &self.prefix_in_container)
615 0 : .field("concurrency_limit", &self.concurrency_limit)
616 0 : .field(
617 0 : "max_keys_per_list_response",
618 0 : &self.max_keys_per_list_response,
619 0 : )
620 0 : .finish()
621 0 : }
622 : }
623 :
624 : impl RemoteStorageConfig {
625 : pub const DEFAULT_TIMEOUT: Duration = std::time::Duration::from_secs(120);
626 :
627 36 : pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<Option<RemoteStorageConfig>> {
628 36 : let local_path = toml.get("local_path");
629 36 : let bucket_name = toml.get("bucket_name");
630 36 : let bucket_region = toml.get("bucket_region");
631 36 : let container_name = toml.get("container_name");
632 36 : let container_region = toml.get("container_region");
633 :
634 36 : let use_azure = container_name.is_some() && container_region.is_some();
635 :
636 36 : let default_concurrency_limit = if use_azure {
637 0 : DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT
638 : } else {
639 36 : DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT
640 : };
641 36 : let concurrency_limit = NonZeroUsize::new(
642 36 : parse_optional_integer("concurrency_limit", toml)?.unwrap_or(default_concurrency_limit),
643 36 : )
644 36 : .context("Failed to parse 'concurrency_limit' as a positive integer")?;
645 :
646 36 : let max_keys_per_list_response =
647 36 : parse_optional_integer::<i32, _>("max_keys_per_list_response", toml)
648 36 : .context("Failed to parse 'max_keys_per_list_response' as a positive integer")?
649 36 : .or(DEFAULT_MAX_KEYS_PER_LIST_RESPONSE);
650 :
651 36 : let endpoint = toml
652 36 : .get("endpoint")
653 36 : .map(|endpoint| parse_toml_string("endpoint", endpoint))
654 36 : .transpose()?;
655 :
656 36 : let timeout = toml
657 36 : .get("timeout")
658 36 : .map(|timeout| {
659 4 : timeout
660 4 : .as_str()
661 4 : .ok_or_else(|| anyhow::Error::msg("timeout was not a string"))
662 36 : })
663 36 : .transpose()
664 36 : .and_then(|timeout| {
665 36 : timeout
666 36 : .map(humantime::parse_duration)
667 36 : .transpose()
668 36 : .map_err(anyhow::Error::new)
669 36 : })
670 36 : .context("parse timeout")?
671 36 : .unwrap_or(Self::DEFAULT_TIMEOUT);
672 36 :
673 36 : if timeout < Duration::from_secs(1) {
674 0 : bail!("timeout was specified as {timeout:?} which is too low");
675 36 : }
676 :
677 14 : let storage = match (
678 36 : local_path,
679 36 : bucket_name,
680 36 : bucket_region,
681 36 : container_name,
682 36 : container_region,
683 : ) {
684 : // no 'local_path' nor 'bucket_name' options are provided, consider this remote storage disabled
685 22 : (None, None, None, None, None) => return Ok(None),
686 : (_, Some(_), None, ..) => {
687 0 : bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
688 : }
689 : (_, None, Some(_), ..) => {
690 0 : bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
691 : }
692 6 : (None, Some(bucket_name), Some(bucket_region), ..) => {
693 6 : RemoteStorageKind::AwsS3(S3Config {
694 6 : bucket_name: parse_toml_string("bucket_name", bucket_name)?,
695 6 : bucket_region: parse_toml_string("bucket_region", bucket_region)?,
696 6 : prefix_in_bucket: toml
697 6 : .get("prefix_in_bucket")
698 6 : .map(|prefix_in_bucket| {
699 6 : parse_toml_string("prefix_in_bucket", prefix_in_bucket)
700 6 : })
701 6 : .transpose()?,
702 6 : endpoint,
703 6 : concurrency_limit,
704 6 : max_keys_per_list_response,
705 6 : upload_storage_class: toml
706 6 : .get("upload_storage_class")
707 6 : .map(|prefix_in_bucket| -> anyhow::Result<_> {
708 0 : let s = parse_toml_string("upload_storage_class", prefix_in_bucket)?;
709 0 : let storage_class = StorageClass::from_str(&s).expect("infallible");
710 : #[allow(deprecated)]
711 0 : if matches!(storage_class, StorageClass::Unknown(_)) {
712 0 : bail!("Specified storage class unknown to SDK: '{s}'. Allowed values: {:?}", StorageClass::values());
713 0 : }
714 0 : Ok(storage_class)
715 6 : })
716 6 : .transpose()?,
717 : })
718 : }
719 : (_, _, _, Some(_), None) => {
720 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
721 : }
722 : (_, _, _, None, Some(_)) => {
723 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
724 : }
725 0 : (None, None, None, Some(container_name), Some(container_region)) => {
726 0 : RemoteStorageKind::AzureContainer(AzureConfig {
727 0 : container_name: parse_toml_string("container_name", container_name)?,
728 0 : storage_account: toml
729 0 : .get("storage_account")
730 0 : .map(|storage_account| {
731 0 : parse_toml_string("storage_account", storage_account)
732 0 : })
733 0 : .transpose()?,
734 0 : container_region: parse_toml_string("container_region", container_region)?,
735 0 : prefix_in_container: toml
736 0 : .get("prefix_in_container")
737 0 : .map(|prefix_in_container| {
738 0 : parse_toml_string("prefix_in_container", prefix_in_container)
739 0 : })
740 0 : .transpose()?,
741 0 : concurrency_limit,
742 0 : max_keys_per_list_response,
743 : })
744 : }
745 8 : (Some(local_path), None, None, None, None) => RemoteStorageKind::LocalFs(
746 8 : Utf8PathBuf::from(parse_toml_string("local_path", local_path)?),
747 : ),
748 : (Some(_), Some(_), ..) => {
749 0 : bail!("'local_path' and 'bucket_name' are mutually exclusive")
750 : }
751 : (Some(_), _, _, Some(_), Some(_)) => {
752 0 : bail!("local_path and 'container_name' are mutually exclusive")
753 : }
754 : };
755 :
756 14 : Ok(Some(RemoteStorageConfig { storage, timeout }))
757 36 : }
758 : }
759 :
760 : // Helper functions to parse a toml Item
761 72 : fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
762 72 : where
763 72 : I: TryFrom<i64, Error = E>,
764 72 : E: std::error::Error + Send + Sync + 'static,
765 72 : {
766 72 : let toml_integer = match item.get(name) {
767 4 : Some(item) => item
768 4 : .as_integer()
769 4 : .with_context(|| format!("configure option {name} is not an integer"))?,
770 68 : None => return Ok(None),
771 : };
772 :
773 4 : I::try_from(toml_integer)
774 4 : .map(Some)
775 4 : .with_context(|| format!("configure option {name} is too large"))
776 72 : }
777 :
778 32 : fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
779 32 : let s = item
780 32 : .as_str()
781 32 : .with_context(|| format!("configure option {name} is not a string"))?;
782 32 : Ok(s.to_string())
783 32 : }
784 :
785 : struct ConcurrencyLimiter {
786 : // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
787 : // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
788 : // The helps to ensure we don't exceed the thresholds.
789 : write: Arc<Semaphore>,
790 : read: Arc<Semaphore>,
791 : }
792 :
793 : impl ConcurrencyLimiter {
794 373 : fn for_kind(&self, kind: RequestKind) -> &Arc<Semaphore> {
795 373 : match kind {
796 32 : RequestKind::Get => &self.read,
797 153 : RequestKind::Put => &self.write,
798 30 : RequestKind::List => &self.read,
799 149 : RequestKind::Delete => &self.write,
800 3 : RequestKind::Copy => &self.write,
801 6 : RequestKind::TimeTravel => &self.write,
802 : }
803 373 : }
804 :
805 348 : async fn acquire(
806 348 : &self,
807 348 : kind: RequestKind,
808 348 : ) -> Result<tokio::sync::SemaphorePermit<'_>, tokio::sync::AcquireError> {
809 348 : self.for_kind(kind).acquire().await
810 348 : }
811 :
812 25 : async fn acquire_owned(
813 25 : &self,
814 25 : kind: RequestKind,
815 25 : ) -> Result<tokio::sync::OwnedSemaphorePermit, tokio::sync::AcquireError> {
816 25 : Arc::clone(self.for_kind(kind)).acquire_owned().await
817 25 : }
818 :
819 44 : fn new(limit: usize) -> ConcurrencyLimiter {
820 44 : Self {
821 44 : read: Arc::new(Semaphore::new(limit)),
822 44 : write: Arc::new(Semaphore::new(limit)),
823 44 : }
824 44 : }
825 : }
826 :
827 : #[cfg(test)]
828 : mod tests {
829 : use super::*;
830 :
831 : #[test]
832 4 : fn test_object_name() {
833 4 : let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
834 4 : assert_eq!(k.object_name(), Some("c"));
835 :
836 4 : let k = RemotePath::new(Utf8Path::new("a/b/c/")).unwrap();
837 4 : assert_eq!(k.object_name(), Some("c"));
838 :
839 4 : let k = RemotePath::new(Utf8Path::new("a/")).unwrap();
840 4 : assert_eq!(k.object_name(), Some("a"));
841 :
842 : // XXX is it impossible to have an empty key?
843 4 : let k = RemotePath::new(Utf8Path::new("")).unwrap();
844 4 : assert_eq!(k.object_name(), None);
845 4 : }
846 :
847 : #[test]
848 4 : fn rempte_path_cannot_be_created_from_absolute_ones() {
849 4 : let err = RemotePath::new(Utf8Path::new("/")).expect_err("Should fail on absolute paths");
850 4 : assert_eq!(err.to_string(), "Path \"/\" is not relative");
851 4 : }
852 :
853 : #[test]
854 4 : fn parse_localfs_config_with_timeout() {
855 4 : let input = "local_path = '.'
856 4 : timeout = '5s'";
857 4 :
858 4 : let toml = input.parse::<toml_edit::Document>().unwrap();
859 4 :
860 4 : let config = RemoteStorageConfig::from_toml(toml.as_item())
861 4 : .unwrap()
862 4 : .expect("it exists");
863 4 :
864 4 : assert_eq!(
865 4 : config,
866 4 : RemoteStorageConfig {
867 4 : storage: RemoteStorageKind::LocalFs(Utf8PathBuf::from(".")),
868 4 : timeout: Duration::from_secs(5)
869 4 : }
870 4 : );
871 4 : }
872 : }
|