Line data Source code
1 : //! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
2 : //! No other modules from this tree are supposed to be used directly by the external code.
3 : //!
4 : //! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
5 : //! * [`local_fs`] allows to use local file system as an external storage
6 : //! * [`s3_bucket`] uses AWS S3 bucket as an external storage
7 : //! * [`azure_blob`] allows to use Azure Blob storage as an external storage
8 : //!
9 : #![deny(unsafe_code)]
10 : #![deny(clippy::undocumented_unsafe_blocks)]
11 :
12 : mod azure_blob;
13 : mod config;
14 : mod error;
15 : mod local_fs;
16 : mod metrics;
17 : mod s3_bucket;
18 : mod simulate_failures;
19 : mod support;
20 :
21 : use std::{
22 : collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
23 : };
24 :
25 : use anyhow::Context;
26 : use camino::{Utf8Path, Utf8PathBuf};
27 :
28 : use bytes::Bytes;
29 : use futures::{stream::Stream, StreamExt};
30 : use serde::{Deserialize, Serialize};
31 : use tokio::sync::Semaphore;
32 : use tokio_util::sync::CancellationToken;
33 : use tracing::info;
34 :
35 : pub use self::{
36 : azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
37 : simulate_failures::UnreliableWrapper,
38 : };
39 : use s3_bucket::RequestKind;
40 :
41 : pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config};
42 :
43 : /// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here.
44 : pub use azure_core::Etag;
45 :
46 : pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
47 :
48 : /// Default concurrency limit for S3 operations
49 : ///
50 : /// Currently, sync happens with AWS S3, that has two limits on requests per second:
51 : /// ~200 RPS for IAM services
52 : /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
53 : /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
54 : /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
55 : pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
56 : /// Set this limit analogously to the S3 limit
57 : ///
58 : /// Here, a limit of max 20k concurrent connections was noted.
59 : /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
60 : pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
61 : /// No limits on the client side, which currenltly means 1000 for AWS S3.
62 : /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
63 : pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
64 :
65 : /// As defined in S3 docs
66 : pub const MAX_KEYS_PER_DELETE: usize = 1000;
67 :
68 : const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
69 :
70 : /// Path on the remote storage, relative to some inner prefix.
71 : /// The prefix is an implementation detail, that allows representing local paths
72 : /// as the remote ones, stripping the local storage prefix away.
73 : #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
74 : pub struct RemotePath(Utf8PathBuf);
75 :
76 : impl Serialize for RemotePath {
77 0 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
78 0 : where
79 0 : S: serde::Serializer,
80 0 : {
81 0 : serializer.collect_str(self)
82 0 : }
83 : }
84 :
85 : impl<'de> Deserialize<'de> for RemotePath {
86 0 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
87 0 : where
88 0 : D: serde::Deserializer<'de>,
89 0 : {
90 0 : let str = String::deserialize(deserializer)?;
91 0 : Ok(Self(Utf8PathBuf::from(&str)))
92 0 : }
93 : }
94 :
95 : impl std::fmt::Display for RemotePath {
96 726 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 726 : std::fmt::Display::fmt(&self.0, f)
98 726 : }
99 : }
100 :
101 : impl RemotePath {
102 9715 : pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
103 9715 : anyhow::ensure!(
104 9715 : relative_path.is_relative(),
105 4 : "Path {relative_path:?} is not relative"
106 : );
107 9711 : Ok(Self(relative_path.to_path_buf()))
108 9715 : }
109 :
110 9351 : pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
111 9351 : Self::new(Utf8Path::new(relative_path))
112 9351 : }
113 :
114 9501 : pub fn with_base(&self, base_path: &Utf8Path) -> Utf8PathBuf {
115 9501 : base_path.join(&self.0)
116 9501 : }
117 :
118 30 : pub fn object_name(&self) -> Option<&str> {
119 30 : self.0.file_name()
120 30 : }
121 :
122 171 : pub fn join(&self, path: impl AsRef<Utf8Path>) -> Self {
123 171 : Self(self.0.join(path))
124 171 : }
125 :
126 599 : pub fn get_path(&self) -> &Utf8PathBuf {
127 599 : &self.0
128 599 : }
129 :
130 135 : pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
131 135 : self.0.strip_prefix(&p.0)
132 135 : }
133 :
134 576 : pub fn add_trailing_slash(&self) -> Self {
135 576 : // Unwrap safety inputs are guararnteed to be valid UTF-8
136 576 : Self(format!("{}/", self.0).try_into().unwrap())
137 576 : }
138 : }
139 :
140 : /// We don't need callers to be able to pass arbitrary delimiters: just control
141 : /// whether listings will use a '/' separator or not.
142 : ///
143 : /// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
144 : /// NoDelimiter mode will only populate `keys`.
145 : #[derive(Copy, Clone)]
146 : pub enum ListingMode {
147 : WithDelimiter,
148 : NoDelimiter,
149 : }
150 :
151 : #[derive(PartialEq, Eq, Debug, Clone)]
152 : pub struct ListingObject {
153 : pub key: RemotePath,
154 : pub last_modified: SystemTime,
155 : pub size: u64,
156 : }
157 :
158 : #[derive(Default)]
159 : pub struct Listing {
160 : pub prefixes: Vec<RemotePath>,
161 : pub keys: Vec<ListingObject>,
162 : }
163 :
164 : /// Storage (potentially remote) API to manage its state.
165 : /// This storage tries to be unaware of any layered repository context,
166 : /// providing basic CRUD operations for storage files.
167 : #[allow(async_fn_in_trait)]
168 : pub trait RemoteStorage: Send + Sync + 'static {
169 : /// List objects in remote storage, with semantics matching AWS S3's [`ListObjectsV2`].
170 : ///
171 : /// The stream is guaranteed to return at least one element, even in the case of errors
172 : /// (in that case it's an `Err()`), or an empty `Listing`.
173 : ///
174 : /// The stream is not ending if it returns an error, as long as [`is_permanent`] returns false on the error.
175 : /// The `next` function can be retried, and maybe in a future retry, there will be success.
176 : ///
177 : /// Note that the prefix is relative to any `prefix_in_bucket` configured for the client, not
178 : /// from the absolute root of the bucket.
179 : ///
180 : /// `mode` configures whether to use a delimiter. Without a delimiter, all keys
181 : /// within the prefix are listed in the `keys` of the result. With a delimiter, any "directories" at the top level of
182 : /// the prefix are returned in the `prefixes` of the result, and keys in the top level of the prefix are
183 : /// returned in `keys` ().
184 : ///
185 : /// `max_keys` controls the maximum number of keys that will be returned. If this is None, this function
186 : /// will iteratively call listobjects until it runs out of keys. Note that this is not safe to use on
187 : /// unlimted size buckets, as the full list of objects is allocated into a monolithic data structure.
188 : ///
189 : /// [`ListObjectsV2`]: <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>
190 : /// [`is_permanent`]: DownloadError::is_permanent
191 : fn list_streaming(
192 : &self,
193 : prefix: Option<&RemotePath>,
194 : mode: ListingMode,
195 : max_keys: Option<NonZeroU32>,
196 : cancel: &CancellationToken,
197 : ) -> impl Stream<Item = Result<Listing, DownloadError>> + Send;
198 :
199 30 : async fn list(
200 30 : &self,
201 30 : prefix: Option<&RemotePath>,
202 30 : mode: ListingMode,
203 30 : max_keys: Option<NonZeroU32>,
204 30 : cancel: &CancellationToken,
205 30 : ) -> Result<Listing, DownloadError> {
206 30 : let mut stream = std::pin::pin!(self.list_streaming(prefix, mode, max_keys, cancel));
207 125 : let mut combined = stream.next().await.expect("At least one item required")?;
208 46 : while let Some(list) = stream.next().await {
209 12 : let list = list?;
210 12 : combined.keys.extend(list.keys.into_iter());
211 12 : combined.prefixes.extend_from_slice(&list.prefixes);
212 : }
213 30 : Ok(combined)
214 30 : }
215 :
216 : /// Obtain metadata information about an object.
217 : async fn head_object(
218 : &self,
219 : key: &RemotePath,
220 : cancel: &CancellationToken,
221 : ) -> Result<ListingObject, DownloadError>;
222 :
223 : /// Streams the local file contents into remote into the remote storage entry.
224 : ///
225 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
226 : /// set to `TimeoutOrCancel`.
227 : async fn upload(
228 : &self,
229 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
230 : // S3 PUT request requires the content length to be specified,
231 : // otherwise it starts to fail with the concurrent connection count increasing.
232 : data_size_bytes: usize,
233 : to: &RemotePath,
234 : metadata: Option<StorageMetadata>,
235 : cancel: &CancellationToken,
236 : ) -> anyhow::Result<()>;
237 :
238 : /// Streams the remote storage entry contents.
239 : ///
240 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
241 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
242 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
243 : ///
244 : /// Returns the metadata, if any was stored with the file previously.
245 : async fn download(
246 : &self,
247 : from: &RemotePath,
248 : cancel: &CancellationToken,
249 : ) -> Result<Download, DownloadError>;
250 :
251 : /// Streams a given byte range of the remote storage entry contents.
252 : ///
253 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
254 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
255 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
256 : ///
257 : /// Returns the metadata, if any was stored with the file previously.
258 : async fn download_byte_range(
259 : &self,
260 : from: &RemotePath,
261 : start_inclusive: u64,
262 : end_exclusive: Option<u64>,
263 : cancel: &CancellationToken,
264 : ) -> Result<Download, DownloadError>;
265 :
266 : /// Delete a single path from remote storage.
267 : ///
268 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
269 : /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.
270 : async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;
271 :
272 : /// Delete a multiple paths from remote storage.
273 : ///
274 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
275 : /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
276 : /// through.
277 : async fn delete_objects<'a>(
278 : &self,
279 : paths: &'a [RemotePath],
280 : cancel: &CancellationToken,
281 : ) -> anyhow::Result<()>;
282 :
283 : /// Copy a remote object inside a bucket from one path to another.
284 : async fn copy(
285 : &self,
286 : from: &RemotePath,
287 : to: &RemotePath,
288 : cancel: &CancellationToken,
289 : ) -> anyhow::Result<()>;
290 :
291 : /// Resets the content of everything with the given prefix to the given state
292 : async fn time_travel_recover(
293 : &self,
294 : prefix: Option<&RemotePath>,
295 : timestamp: SystemTime,
296 : done_if_after: SystemTime,
297 : cancel: &CancellationToken,
298 : ) -> Result<(), TimeTravelError>;
299 : }
300 :
301 : /// Data part of an ongoing [`Download`].
302 : ///
303 : /// `DownloadStream` is sensitive to the timeout and cancellation used with the original
304 : /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
305 : /// with `tokio::io::copy_buf`.
306 : // This has 'static because safekeepers do not use cancellation tokens (yet)
307 : pub type DownloadStream =
308 : Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;
309 :
310 : pub struct Download {
311 : pub download_stream: DownloadStream,
312 : /// The last time the file was modified (`last-modified` HTTP header)
313 : pub last_modified: SystemTime,
314 : /// A way to identify this specific version of the resource (`etag` HTTP header)
315 : pub etag: Etag,
316 : /// Extra key-value data, associated with the current remote file.
317 : pub metadata: Option<StorageMetadata>,
318 : }
319 :
320 : impl Debug for Download {
321 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
322 0 : f.debug_struct("Download")
323 0 : .field("metadata", &self.metadata)
324 0 : .finish()
325 0 : }
326 : }
327 :
328 : /// Every storage, currently supported.
329 : /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
330 : // Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
331 : #[derive(Clone)]
332 : pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
333 : LocalFs(LocalFs),
334 : AwsS3(Arc<S3Bucket>),
335 : AzureBlob(Arc<AzureBlobStorage>),
336 : Unreliable(Other),
337 : }
338 :
339 : impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
340 : // See [`RemoteStorage::list`].
341 618 : pub async fn list(
342 618 : &self,
343 618 : prefix: Option<&RemotePath>,
344 618 : mode: ListingMode,
345 618 : max_keys: Option<NonZeroU32>,
346 618 : cancel: &CancellationToken,
347 618 : ) -> Result<Listing, DownloadError> {
348 618 : match self {
349 2259 : Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
350 108 : Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
351 63 : Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
352 0 : Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
353 : }
354 618 : }
355 :
356 : // See [`RemoteStorage::list_streaming`].
357 3 : pub fn list_streaming<'a>(
358 3 : &'a self,
359 3 : prefix: Option<&'a RemotePath>,
360 3 : mode: ListingMode,
361 3 : max_keys: Option<NonZeroU32>,
362 3 : cancel: &'a CancellationToken,
363 3 : ) -> impl Stream<Item = Result<Listing, DownloadError>> + 'a + Send {
364 3 : match self {
365 0 : Self::LocalFs(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel))
366 0 : as Pin<Box<dyn Stream<Item = Result<Listing, DownloadError>> + Send>>,
367 2 : Self::AwsS3(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
368 1 : Self::AzureBlob(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
369 0 : Self::Unreliable(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)),
370 : }
371 3 : }
372 :
373 : // See [`RemoteStorage::head_object`].
374 0 : pub async fn head_object(
375 0 : &self,
376 0 : key: &RemotePath,
377 0 : cancel: &CancellationToken,
378 0 : ) -> Result<ListingObject, DownloadError> {
379 0 : match self {
380 0 : Self::LocalFs(s) => s.head_object(key, cancel).await,
381 0 : Self::AwsS3(s) => s.head_object(key, cancel).await,
382 0 : Self::AzureBlob(s) => s.head_object(key, cancel).await,
383 0 : Self::Unreliable(s) => s.head_object(key, cancel).await,
384 : }
385 0 : }
386 :
387 : /// See [`RemoteStorage::upload`]
388 8487 : pub async fn upload(
389 8487 : &self,
390 8487 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
391 8487 : data_size_bytes: usize,
392 8487 : to: &RemotePath,
393 8487 : metadata: Option<StorageMetadata>,
394 8487 : cancel: &CancellationToken,
395 8487 : ) -> anyhow::Result<()> {
396 8487 : match self {
397 105175 : Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
398 528 : Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
399 270 : Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
400 38 : Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
401 : }
402 8265 : }
403 :
404 143 : pub async fn download(
405 143 : &self,
406 143 : from: &RemotePath,
407 143 : cancel: &CancellationToken,
408 143 : ) -> Result<Download, DownloadError> {
409 143 : match self {
410 207 : Self::LocalFs(s) => s.download(from, cancel).await,
411 26 : Self::AwsS3(s) => s.download(from, cancel).await,
412 10 : Self::AzureBlob(s) => s.download(from, cancel).await,
413 0 : Self::Unreliable(s) => s.download(from, cancel).await,
414 : }
415 143 : }
416 :
417 15 : pub async fn download_byte_range(
418 15 : &self,
419 15 : from: &RemotePath,
420 15 : start_inclusive: u64,
421 15 : end_exclusive: Option<u64>,
422 15 : cancel: &CancellationToken,
423 15 : ) -> Result<Download, DownloadError> {
424 15 : match self {
425 0 : Self::LocalFs(s) => {
426 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
427 0 : .await
428 : }
429 10 : Self::AwsS3(s) => {
430 10 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
431 10 : .await
432 : }
433 5 : Self::AzureBlob(s) => {
434 5 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
435 25 : .await
436 : }
437 0 : Self::Unreliable(s) => {
438 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
439 0 : .await
440 : }
441 : }
442 15 : }
443 :
444 : /// See [`RemoteStorage::delete`]
445 140 : pub async fn delete(
446 140 : &self,
447 140 : path: &RemotePath,
448 140 : cancel: &CancellationToken,
449 140 : ) -> anyhow::Result<()> {
450 140 : match self {
451 6 : Self::LocalFs(s) => s.delete(path, cancel).await,
452 285 : Self::AwsS3(s) => s.delete(path, cancel).await,
453 222 : Self::AzureBlob(s) => s.delete(path, cancel).await,
454 0 : Self::Unreliable(s) => s.delete(path, cancel).await,
455 : }
456 140 : }
457 :
458 : /// See [`RemoteStorage::delete_objects`]
459 33 : pub async fn delete_objects(
460 33 : &self,
461 33 : paths: &[RemotePath],
462 33 : cancel: &CancellationToken,
463 33 : ) -> anyhow::Result<()> {
464 33 : match self {
465 18 : Self::LocalFs(s) => s.delete_objects(paths, cancel).await,
466 57 : Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
467 25 : Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
468 0 : Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
469 : }
470 33 : }
471 :
472 : /// See [`RemoteStorage::copy`]
473 3 : pub async fn copy_object(
474 3 : &self,
475 3 : from: &RemotePath,
476 3 : to: &RemotePath,
477 3 : cancel: &CancellationToken,
478 3 : ) -> anyhow::Result<()> {
479 3 : match self {
480 0 : Self::LocalFs(s) => s.copy(from, to, cancel).await,
481 3 : Self::AwsS3(s) => s.copy(from, to, cancel).await,
482 5 : Self::AzureBlob(s) => s.copy(from, to, cancel).await,
483 0 : Self::Unreliable(s) => s.copy(from, to, cancel).await,
484 : }
485 3 : }
486 :
487 : /// See [`RemoteStorage::time_travel_recover`].
488 6 : pub async fn time_travel_recover(
489 6 : &self,
490 6 : prefix: Option<&RemotePath>,
491 6 : timestamp: SystemTime,
492 6 : done_if_after: SystemTime,
493 6 : cancel: &CancellationToken,
494 6 : ) -> Result<(), TimeTravelError> {
495 6 : match self {
496 0 : Self::LocalFs(s) => {
497 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
498 0 : .await
499 : }
500 6 : Self::AwsS3(s) => {
501 6 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
502 90 : .await
503 : }
504 0 : Self::AzureBlob(s) => {
505 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
506 0 : .await
507 : }
508 0 : Self::Unreliable(s) => {
509 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
510 0 : .await
511 : }
512 : }
513 6 : }
514 : }
515 :
516 : impl GenericRemoteStorage {
517 616 : pub async fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
518 616 : let timeout = storage_config.timeout;
519 616 : Ok(match &storage_config.storage {
520 592 : RemoteStorageKind::LocalFs { local_path: path } => {
521 592 : info!("Using fs root '{path}' as a remote storage");
522 592 : Self::LocalFs(LocalFs::new(path.clone(), timeout)?)
523 : }
524 18 : RemoteStorageKind::AwsS3(s3_config) => {
525 18 : // The profile and access key id are only printed here for debugging purposes,
526 18 : // their values don't indicate the eventually taken choice for auth.
527 18 : let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
528 18 : let access_key_id =
529 18 : std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
530 18 : info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
531 : s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
532 18 : Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?))
533 : }
534 6 : RemoteStorageKind::AzureContainer(azure_config) => {
535 6 : let storage_account = azure_config
536 6 : .storage_account
537 6 : .as_deref()
538 6 : .unwrap_or("<AZURE_STORAGE_ACCOUNT>");
539 6 : info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'",
540 : azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
541 6 : Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config, timeout)?))
542 : }
543 : })
544 616 : }
545 :
546 2 : pub fn unreliable_wrapper(s: Self, fail_first: u64) -> Self {
547 2 : Self::Unreliable(Arc::new(UnreliableWrapper::new(s, fail_first)))
548 2 : }
549 :
550 : /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.
551 4242 : pub async fn upload_storage_object(
552 4242 : &self,
553 4242 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
554 4242 : from_size_bytes: usize,
555 4242 : to: &RemotePath,
556 4242 : cancel: &CancellationToken,
557 4242 : ) -> anyhow::Result<()> {
558 4242 : self.upload(from, from_size_bytes, to, None, cancel)
559 12617 : .await
560 4216 : .with_context(|| {
561 0 : format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
562 4216 : })
563 4216 : }
564 :
565 : /// Downloads the storage object into the `to_path` provided.
566 : /// `byte_range` could be specified to dowload only a part of the file, if needed.
567 0 : pub async fn download_storage_object(
568 0 : &self,
569 0 : byte_range: Option<(u64, Option<u64>)>,
570 0 : from: &RemotePath,
571 0 : cancel: &CancellationToken,
572 0 : ) -> Result<Download, DownloadError> {
573 0 : match byte_range {
574 0 : Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
575 0 : None => self.download(from, cancel).await,
576 : }
577 0 : }
578 :
579 : /// The name of the bucket/container/etc.
580 0 : pub fn bucket_name(&self) -> Option<&str> {
581 0 : match self {
582 0 : Self::LocalFs(_s) => None,
583 0 : Self::AwsS3(s) => Some(s.bucket_name()),
584 0 : Self::AzureBlob(s) => Some(s.container_name()),
585 0 : Self::Unreliable(_s) => None,
586 : }
587 0 : }
588 : }
589 :
590 : /// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
591 : /// Immutable, cannot be changed once the file is created.
592 : #[derive(Debug, Clone, PartialEq, Eq)]
593 : pub struct StorageMetadata(HashMap<String, String>);
594 :
595 : impl<const N: usize> From<[(&str, &str); N]> for StorageMetadata {
596 0 : fn from(arr: [(&str, &str); N]) -> Self {
597 0 : let map: HashMap<String, String> = arr
598 0 : .iter()
599 0 : .map(|(k, v)| (k.to_string(), v.to_string()))
600 0 : .collect();
601 0 : Self(map)
602 0 : }
603 : }
604 :
605 : struct ConcurrencyLimiter {
606 : // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
607 : // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
608 : // The helps to ensure we don't exceed the thresholds.
609 : write: Arc<Semaphore>,
610 : read: Arc<Semaphore>,
611 : }
612 :
613 : impl ConcurrencyLimiter {
614 376 : fn for_kind(&self, kind: RequestKind) -> &Arc<Semaphore> {
615 376 : match kind {
616 32 : RequestKind::Get => &self.read,
617 153 : RequestKind::Put => &self.write,
618 33 : RequestKind::List => &self.read,
619 149 : RequestKind::Delete => &self.write,
620 3 : RequestKind::Copy => &self.write,
621 6 : RequestKind::TimeTravel => &self.write,
622 0 : RequestKind::Head => &self.read,
623 : }
624 376 : }
625 :
626 351 : async fn acquire(
627 351 : &self,
628 351 : kind: RequestKind,
629 351 : ) -> Result<tokio::sync::SemaphorePermit<'_>, tokio::sync::AcquireError> {
630 351 : self.for_kind(kind).acquire().await
631 351 : }
632 :
633 25 : async fn acquire_owned(
634 25 : &self,
635 25 : kind: RequestKind,
636 25 : ) -> Result<tokio::sync::OwnedSemaphorePermit, tokio::sync::AcquireError> {
637 25 : Arc::clone(self.for_kind(kind)).acquire_owned().await
638 25 : }
639 :
640 39 : fn new(limit: usize) -> ConcurrencyLimiter {
641 39 : Self {
642 39 : read: Arc::new(Semaphore::new(limit)),
643 39 : write: Arc::new(Semaphore::new(limit)),
644 39 : }
645 39 : }
646 : }
647 :
648 : #[cfg(test)]
649 : mod tests {
650 : use super::*;
651 :
652 : #[test]
653 3 : fn test_object_name() {
654 3 : let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
655 3 : assert_eq!(k.object_name(), Some("c"));
656 :
657 3 : let k = RemotePath::new(Utf8Path::new("a/b/c/")).unwrap();
658 3 : assert_eq!(k.object_name(), Some("c"));
659 :
660 3 : let k = RemotePath::new(Utf8Path::new("a/")).unwrap();
661 3 : assert_eq!(k.object_name(), Some("a"));
662 :
663 : // XXX is it impossible to have an empty key?
664 3 : let k = RemotePath::new(Utf8Path::new("")).unwrap();
665 3 : assert_eq!(k.object_name(), None);
666 3 : }
667 :
668 : #[test]
669 3 : fn rempte_path_cannot_be_created_from_absolute_ones() {
670 3 : let err = RemotePath::new(Utf8Path::new("/")).expect_err("Should fail on absolute paths");
671 3 : assert_eq!(err.to_string(), "Path \"/\" is not relative");
672 3 : }
673 : }
|