Line data Source code
1 : //! A set of generic storage abstractions for the page server to use when backing up and restoring its state from the external storage.
2 : //! No other modules from this tree are supposed to be used directly by the external code.
3 : //!
4 : //! [`RemoteStorage`] trait a CRUD-like generic abstraction to use for adapting external storages with a few implementations:
5 : //! * [`local_fs`] allows to use local file system as an external storage
6 : //! * [`s3_bucket`] uses AWS S3 bucket as an external storage
7 : //! * [`azure_blob`] allows to use Azure Blob storage as an external storage
8 : //!
9 : #![deny(unsafe_code)]
10 : #![deny(clippy::undocumented_unsafe_blocks)]
11 :
12 : mod azure_blob;
13 : mod error;
14 : mod local_fs;
15 : mod s3_bucket;
16 : mod simulate_failures;
17 : mod support;
18 :
19 : use std::{
20 : collections::HashMap,
21 : fmt::Debug,
22 : num::{NonZeroU32, NonZeroUsize},
23 : pin::Pin,
24 : sync::Arc,
25 : time::{Duration, SystemTime},
26 : };
27 :
28 : use anyhow::{bail, Context};
29 : use camino::{Utf8Path, Utf8PathBuf};
30 :
31 : use bytes::Bytes;
32 : use futures::stream::Stream;
33 : use serde::{Deserialize, Serialize};
34 : use tokio::sync::Semaphore;
35 : use tokio_util::sync::CancellationToken;
36 : use toml_edit::Item;
37 : use tracing::info;
38 :
39 : pub use self::{
40 : azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket,
41 : simulate_failures::UnreliableWrapper,
42 : };
43 : use s3_bucket::RequestKind;
44 :
45 : pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
46 :
47 : /// Currently, sync happens with AWS S3, that has two limits on requests per second:
48 : /// ~200 RPS for IAM services
49 : /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
50 : /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
51 : /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
52 : pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
53 : /// We set this a little bit low as we currently buffer the entire file into RAM
54 : ///
55 : /// Here, a limit of max 20k concurrent connections was noted.
56 : /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
57 : pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 30;
58 : /// No limits on the client side, which currenltly means 1000 for AWS S3.
59 : /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
60 : pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
61 :
62 : /// As defined in S3 docs
63 : pub const MAX_KEYS_PER_DELETE: usize = 1000;
64 :
65 : const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
66 :
67 : /// Path on the remote storage, relative to some inner prefix.
68 : /// The prefix is an implementation detail, that allows representing local paths
69 : /// as the remote ones, stripping the local storage prefix away.
70 663 : #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
71 : pub struct RemotePath(Utf8PathBuf);
72 :
73 : impl Serialize for RemotePath {
74 0 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
75 0 : where
76 0 : S: serde::Serializer,
77 0 : {
78 0 : serializer.collect_str(self)
79 0 : }
80 : }
81 :
82 : impl<'de> Deserialize<'de> for RemotePath {
83 0 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
84 0 : where
85 0 : D: serde::Deserializer<'de>,
86 0 : {
87 0 : let str = String::deserialize(deserializer)?;
88 0 : Ok(Self(Utf8PathBuf::from(&str)))
89 0 : }
90 : }
91 :
92 : impl std::fmt::Display for RemotePath {
93 16 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 16 : std::fmt::Display::fmt(&self.0, f)
95 16 : }
96 : }
97 :
98 : impl RemotePath {
99 1807 : pub fn new(relative_path: &Utf8Path) -> anyhow::Result<Self> {
100 1807 : anyhow::ensure!(
101 1807 : relative_path.is_relative(),
102 2 : "Path {relative_path:?} is not relative"
103 : );
104 1805 : Ok(Self(relative_path.to_path_buf()))
105 1807 : }
106 :
107 1009 : pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
108 1009 : Self::new(Utf8Path::new(relative_path))
109 1009 : }
110 :
111 1594 : pub fn with_base(&self, base_path: &Utf8Path) -> Utf8PathBuf {
112 1594 : base_path.join(&self.0)
113 1594 : }
114 :
115 14 : pub fn object_name(&self) -> Option<&str> {
116 14 : self.0.file_name()
117 14 : }
118 :
119 102 : pub fn join(&self, segment: &Utf8Path) -> Self {
120 102 : Self(self.0.join(segment))
121 102 : }
122 :
123 496 : pub fn get_path(&self) -> &Utf8PathBuf {
124 496 : &self.0
125 496 : }
126 :
127 0 : pub fn extension(&self) -> Option<&str> {
128 0 : self.0.extension()
129 0 : }
130 :
131 13 : pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
132 13 : self.0.strip_prefix(&p.0)
133 13 : }
134 : }
135 :
136 : /// We don't need callers to be able to pass arbitrary delimiters: just control
137 : /// whether listings will use a '/' separator or not.
138 : ///
139 : /// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The
140 : /// NoDelimiter mode will only populate `keys`.
141 : pub enum ListingMode {
142 : WithDelimiter,
143 : NoDelimiter,
144 : }
145 :
146 100 : #[derive(Default)]
147 : pub struct Listing {
148 : pub prefixes: Vec<RemotePath>,
149 : pub keys: Vec<RemotePath>,
150 : }
151 :
152 : /// Storage (potentially remote) API to manage its state.
153 : /// This storage tries to be unaware of any layered repository context,
154 : /// providing basic CRUD operations for storage files.
155 : #[allow(async_fn_in_trait)]
156 : pub trait RemoteStorage: Send + Sync + 'static {
157 : /// Lists all top level subdirectories for a given prefix
158 : /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
159 : /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
160 : /// so this method doesnt need to.
161 9 : async fn list_prefixes(
162 9 : &self,
163 9 : prefix: Option<&RemotePath>,
164 9 : cancel: &CancellationToken,
165 9 : ) -> Result<Vec<RemotePath>, DownloadError> {
166 0 : let result = self
167 0 : .list(prefix, ListingMode::WithDelimiter, None, cancel)
168 0 : .await?
169 : .prefixes;
170 0 : Ok(result)
171 0 : }
172 : /// Lists all files in directory "recursively"
173 : /// (not really recursively, because AWS has a flat namespace)
174 : /// Note: This is subtely different than list_prefixes,
175 : /// because it is for listing files instead of listing
176 : /// names sharing common prefixes.
177 : /// For example,
178 : /// list_files("foo/bar") = ["foo/bar/cat123.txt",
179 : /// "foo/bar/cat567.txt", "foo/bar/dog123.txt", "foo/bar/dog456.txt"]
180 : /// whereas,
181 : /// list_prefixes("foo/bar/") = ["cat", "dog"]
182 : /// See `test_real_s3.rs` for more details.
183 : ///
184 : /// max_keys limits max number of keys returned; None means unlimited.
185 27 : async fn list_files(
186 27 : &self,
187 27 : prefix: Option<&RemotePath>,
188 27 : max_keys: Option<NonZeroU32>,
189 27 : cancel: &CancellationToken,
190 27 : ) -> Result<Vec<RemotePath>, DownloadError> {
191 6 : let result = self
192 6 : .list(prefix, ListingMode::NoDelimiter, max_keys, cancel)
193 12 : .await?
194 : .keys;
195 6 : Ok(result)
196 6 : }
197 :
198 : async fn list(
199 : &self,
200 : prefix: Option<&RemotePath>,
201 : _mode: ListingMode,
202 : max_keys: Option<NonZeroU32>,
203 : cancel: &CancellationToken,
204 : ) -> Result<Listing, DownloadError>;
205 :
206 : /// Streams the local file contents into remote into the remote storage entry.
207 : ///
208 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
209 : /// set to `TimeoutOrCancel`.
210 : async fn upload(
211 : &self,
212 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
213 : // S3 PUT request requires the content length to be specified,
214 : // otherwise it starts to fail with the concurrent connection count increasing.
215 : data_size_bytes: usize,
216 : to: &RemotePath,
217 : metadata: Option<StorageMetadata>,
218 : cancel: &CancellationToken,
219 : ) -> anyhow::Result<()>;
220 :
221 : /// Streams the remote storage entry contents.
222 : ///
223 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
224 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
225 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
226 : ///
227 : /// Returns the metadata, if any was stored with the file previously.
228 : async fn download(
229 : &self,
230 : from: &RemotePath,
231 : cancel: &CancellationToken,
232 : ) -> Result<Download, DownloadError>;
233 :
234 : /// Streams a given byte range of the remote storage entry contents.
235 : ///
236 : /// The returned download stream will obey initial timeout and cancellation signal by erroring
237 : /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
238 : /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
239 : ///
240 : /// Returns the metadata, if any was stored with the file previously.
241 : async fn download_byte_range(
242 : &self,
243 : from: &RemotePath,
244 : start_inclusive: u64,
245 : end_exclusive: Option<u64>,
246 : cancel: &CancellationToken,
247 : ) -> Result<Download, DownloadError>;
248 :
249 : /// Delete a single path from remote storage.
250 : ///
251 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
252 : /// set to `TimeoutOrCancel`. In such situation it is unknown if the deletion went through.
253 : async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()>;
254 :
255 : /// Delete a multiple paths from remote storage.
256 : ///
257 : /// If the operation fails because of timeout or cancellation, the root cause of the error will be
258 : /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
259 : /// through.
260 : async fn delete_objects<'a>(
261 : &self,
262 : paths: &'a [RemotePath],
263 : cancel: &CancellationToken,
264 : ) -> anyhow::Result<()>;
265 :
266 : /// Copy a remote object inside a bucket from one path to another.
267 : async fn copy(
268 : &self,
269 : from: &RemotePath,
270 : to: &RemotePath,
271 : cancel: &CancellationToken,
272 : ) -> anyhow::Result<()>;
273 :
274 : /// Resets the content of everything with the given prefix to the given state
275 : async fn time_travel_recover(
276 : &self,
277 : prefix: Option<&RemotePath>,
278 : timestamp: SystemTime,
279 : done_if_after: SystemTime,
280 : cancel: &CancellationToken,
281 : ) -> Result<(), TimeTravelError>;
282 : }
283 :
284 : /// DownloadStream is sensitive to the timeout and cancellation used with the original
285 : /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
286 : /// with `tokio::io::copy_buf`.
287 : // This has 'static because safekeepers do not use cancellation tokens (yet)
288 : pub type DownloadStream =
289 : Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static>>;
290 :
291 : pub struct Download {
292 : pub download_stream: DownloadStream,
293 : /// The last time the file was modified (`last-modified` HTTP header)
294 : pub last_modified: Option<SystemTime>,
295 : /// A way to identify this specific version of the resource (`etag` HTTP header)
296 : pub etag: Option<String>,
297 : /// Extra key-value data, associated with the current remote file.
298 : pub metadata: Option<StorageMetadata>,
299 : }
300 :
301 : impl Debug for Download {
302 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
303 0 : f.debug_struct("Download")
304 0 : .field("metadata", &self.metadata)
305 0 : .finish()
306 0 : }
307 : }
308 :
309 : /// Every storage, currently supported.
310 : /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
311 492 : #[derive(Clone)]
312 : // Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
313 : pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
314 : LocalFs(LocalFs),
315 : AwsS3(Arc<S3Bucket>),
316 : AzureBlob(Arc<AzureBlobStorage>),
317 : Unreliable(Other),
318 : }
319 :
320 : impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
321 88 : pub async fn list(
322 88 : &self,
323 88 : prefix: Option<&RemotePath>,
324 88 : mode: ListingMode,
325 88 : max_keys: Option<NonZeroU32>,
326 88 : cancel: &CancellationToken,
327 88 : ) -> anyhow::Result<Listing, DownloadError> {
328 88 : match self {
329 88 : Self::LocalFs(s) => s.list(prefix, mode, max_keys, cancel).await,
330 0 : Self::AwsS3(s) => s.list(prefix, mode, max_keys, cancel).await,
331 0 : Self::AzureBlob(s) => s.list(prefix, mode, max_keys, cancel).await,
332 0 : Self::Unreliable(s) => s.list(prefix, mode, max_keys, cancel).await,
333 : }
334 88 : }
335 :
336 : // A function for listing all the files in a "directory"
337 : // Example:
338 : // list_files("foo/bar") = ["foo/bar/a.txt", "foo/bar/b.txt"]
339 : //
340 : // max_keys limits max number of keys returned; None means unlimited.
341 6 : pub async fn list_files(
342 6 : &self,
343 6 : folder: Option<&RemotePath>,
344 6 : max_keys: Option<NonZeroU32>,
345 6 : cancel: &CancellationToken,
346 6 : ) -> Result<Vec<RemotePath>, DownloadError> {
347 6 : match self {
348 12 : Self::LocalFs(s) => s.list_files(folder, max_keys, cancel).await,
349 0 : Self::AwsS3(s) => s.list_files(folder, max_keys, cancel).await,
350 0 : Self::AzureBlob(s) => s.list_files(folder, max_keys, cancel).await,
351 0 : Self::Unreliable(s) => s.list_files(folder, max_keys, cancel).await,
352 : }
353 6 : }
354 :
355 : // lists common *prefixes*, if any of files
356 : // Example:
357 : // list_prefixes("foo123","foo567","bar123","bar432") = ["foo", "bar"]
358 0 : pub async fn list_prefixes(
359 0 : &self,
360 0 : prefix: Option<&RemotePath>,
361 0 : cancel: &CancellationToken,
362 0 : ) -> Result<Vec<RemotePath>, DownloadError> {
363 0 : match self {
364 0 : Self::LocalFs(s) => s.list_prefixes(prefix, cancel).await,
365 0 : Self::AwsS3(s) => s.list_prefixes(prefix, cancel).await,
366 0 : Self::AzureBlob(s) => s.list_prefixes(prefix, cancel).await,
367 0 : Self::Unreliable(s) => s.list_prefixes(prefix, cancel).await,
368 : }
369 0 : }
370 :
371 : /// See [`RemoteStorage::upload`]
372 1410 : pub async fn upload(
373 1410 : &self,
374 1410 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
375 1410 : data_size_bytes: usize,
376 1410 : to: &RemotePath,
377 1410 : metadata: Option<StorageMetadata>,
378 1410 : cancel: &CancellationToken,
379 1410 : ) -> anyhow::Result<()> {
380 1410 : match self {
381 19855 : Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
382 0 : Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
383 0 : Self::AzureBlob(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
384 76 : Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata, cancel).await,
385 : }
386 1398 : }
387 :
388 36 : pub async fn download(
389 36 : &self,
390 36 : from: &RemotePath,
391 36 : cancel: &CancellationToken,
392 36 : ) -> Result<Download, DownloadError> {
393 36 : match self {
394 36 : Self::LocalFs(s) => s.download(from, cancel).await,
395 0 : Self::AwsS3(s) => s.download(from, cancel).await,
396 0 : Self::AzureBlob(s) => s.download(from, cancel).await,
397 0 : Self::Unreliable(s) => s.download(from, cancel).await,
398 : }
399 36 : }
400 :
401 0 : pub async fn download_byte_range(
402 0 : &self,
403 0 : from: &RemotePath,
404 0 : start_inclusive: u64,
405 0 : end_exclusive: Option<u64>,
406 0 : cancel: &CancellationToken,
407 0 : ) -> Result<Download, DownloadError> {
408 0 : match self {
409 0 : Self::LocalFs(s) => {
410 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
411 0 : .await
412 : }
413 0 : Self::AwsS3(s) => {
414 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
415 0 : .await
416 : }
417 0 : Self::AzureBlob(s) => {
418 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
419 0 : .await
420 : }
421 0 : Self::Unreliable(s) => {
422 0 : s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
423 0 : .await
424 : }
425 : }
426 0 : }
427 :
428 : /// See [`RemoteStorage::delete`]
429 2 : pub async fn delete(
430 2 : &self,
431 2 : path: &RemotePath,
432 2 : cancel: &CancellationToken,
433 2 : ) -> anyhow::Result<()> {
434 2 : match self {
435 2 : Self::LocalFs(s) => s.delete(path, cancel).await,
436 0 : Self::AwsS3(s) => s.delete(path, cancel).await,
437 0 : Self::AzureBlob(s) => s.delete(path, cancel).await,
438 0 : Self::Unreliable(s) => s.delete(path, cancel).await,
439 : }
440 2 : }
441 :
442 : /// See [`RemoteStorage::delete_objects`]
443 6 : pub async fn delete_objects(
444 6 : &self,
445 6 : paths: &[RemotePath],
446 6 : cancel: &CancellationToken,
447 6 : ) -> anyhow::Result<()> {
448 6 : match self {
449 6 : Self::LocalFs(s) => s.delete_objects(paths, cancel).await,
450 0 : Self::AwsS3(s) => s.delete_objects(paths, cancel).await,
451 0 : Self::AzureBlob(s) => s.delete_objects(paths, cancel).await,
452 0 : Self::Unreliable(s) => s.delete_objects(paths, cancel).await,
453 : }
454 6 : }
455 :
456 : /// See [`RemoteStorage::copy`]
457 0 : pub async fn copy_object(
458 0 : &self,
459 0 : from: &RemotePath,
460 0 : to: &RemotePath,
461 0 : cancel: &CancellationToken,
462 0 : ) -> anyhow::Result<()> {
463 0 : match self {
464 0 : Self::LocalFs(s) => s.copy(from, to, cancel).await,
465 0 : Self::AwsS3(s) => s.copy(from, to, cancel).await,
466 0 : Self::AzureBlob(s) => s.copy(from, to, cancel).await,
467 0 : Self::Unreliable(s) => s.copy(from, to, cancel).await,
468 : }
469 0 : }
470 :
471 : /// See [`RemoteStorage::time_travel_recover`].
472 0 : pub async fn time_travel_recover(
473 0 : &self,
474 0 : prefix: Option<&RemotePath>,
475 0 : timestamp: SystemTime,
476 0 : done_if_after: SystemTime,
477 0 : cancel: &CancellationToken,
478 0 : ) -> Result<(), TimeTravelError> {
479 0 : match self {
480 0 : Self::LocalFs(s) => {
481 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
482 0 : .await
483 : }
484 0 : Self::AwsS3(s) => {
485 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
486 0 : .await
487 : }
488 0 : Self::AzureBlob(s) => {
489 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
490 0 : .await
491 : }
492 0 : Self::Unreliable(s) => {
493 0 : s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
494 0 : .await
495 : }
496 : }
497 0 : }
498 : }
499 :
500 : impl GenericRemoteStorage {
501 130 : pub fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
502 130 : let timeout = storage_config.timeout;
503 130 : Ok(match &storage_config.storage {
504 106 : RemoteStorageKind::LocalFs(path) => {
505 106 : info!("Using fs root '{path}' as a remote storage");
506 106 : Self::LocalFs(LocalFs::new(path.clone(), timeout)?)
507 : }
508 18 : RemoteStorageKind::AwsS3(s3_config) => {
509 18 : // The profile and access key id are only printed here for debugging purposes,
510 18 : // their values don't indicate the eventually taken choice for auth.
511 18 : let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "<none>".into());
512 18 : let access_key_id =
513 18 : std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "<none>".into());
514 18 : info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}",
515 18 : s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
516 18 : Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout)?))
517 : }
518 6 : RemoteStorageKind::AzureContainer(azure_config) => {
519 6 : info!("Using azure container '{}' in region '{}' as a remote storage, prefix in container: '{:?}'",
520 6 : azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container);
521 6 : Self::AzureBlob(Arc::new(AzureBlobStorage::new(azure_config, timeout)?))
522 : }
523 : })
524 130 : }
525 :
526 4 : pub fn unreliable_wrapper(s: Self, fail_first: u64) -> Self {
527 4 : Self::Unreliable(Arc::new(UnreliableWrapper::new(s, fail_first)))
528 4 : }
529 :
530 : /// See [`RemoteStorage::upload`], which this method calls with `None` as metadata.
531 747 : pub async fn upload_storage_object(
532 747 : &self,
533 747 : from: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
534 747 : from_size_bytes: usize,
535 747 : to: &RemotePath,
536 747 : cancel: &CancellationToken,
537 747 : ) -> anyhow::Result<()> {
538 747 : self.upload(from, from_size_bytes, to, None, cancel)
539 2429 : .await
540 745 : .with_context(|| {
541 0 : format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
542 745 : })
543 745 : }
544 :
545 : /// Downloads the storage object into the `to_path` provided.
546 : /// `byte_range` could be specified to dowload only a part of the file, if needed.
547 0 : pub async fn download_storage_object(
548 0 : &self,
549 0 : byte_range: Option<(u64, Option<u64>)>,
550 0 : from: &RemotePath,
551 0 : cancel: &CancellationToken,
552 0 : ) -> Result<Download, DownloadError> {
553 0 : match byte_range {
554 0 : Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
555 0 : None => self.download(from, cancel).await,
556 : }
557 0 : }
558 : }
559 :
560 : /// Extra set of key-value pairs that contain arbitrary metadata about the storage entry.
561 : /// Immutable, cannot be changed once the file is created.
562 4 : #[derive(Debug, Clone, PartialEq, Eq)]
563 : pub struct StorageMetadata(HashMap<String, String>);
564 :
565 : /// External backup storage configuration, enough for creating a client for that storage.
566 12 : #[derive(Debug, Clone, PartialEq, Eq)]
567 : pub struct RemoteStorageConfig {
568 : /// The storage connection configuration.
569 : pub storage: RemoteStorageKind,
570 : /// A common timeout enforced for all requests after concurrency limiter permit has been
571 : /// acquired.
572 : pub timeout: Duration,
573 : }
574 :
575 : /// A kind of a remote storage to connect to, with its connection configuration.
576 12 : #[derive(Debug, Clone, PartialEq, Eq)]
577 : pub enum RemoteStorageKind {
578 : /// Storage based on local file system.
579 : /// Specify a root folder to place all stored files into.
580 : LocalFs(Utf8PathBuf),
581 : /// AWS S3 based storage, storing all files in the S3 bucket
582 : /// specified by the config
583 : AwsS3(S3Config),
584 : /// Azure Blob based storage, storing all files in the container
585 : /// specified by the config
586 : AzureContainer(AzureConfig),
587 : }
588 :
589 : /// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
590 6 : #[derive(Clone, PartialEq, Eq)]
591 : pub struct S3Config {
592 : /// Name of the bucket to connect to.
593 : pub bucket_name: String,
594 : /// The region where the bucket is located at.
595 : pub bucket_region: String,
596 : /// A "subfolder" in the bucket, to use the same bucket separately by multiple remote storage users at once.
597 : pub prefix_in_bucket: Option<String>,
598 : /// A base URL to send S3 requests to.
599 : /// By default, the endpoint is derived from a region name, assuming it's
600 : /// an AWS S3 region name, erroring on wrong region name.
601 : /// Endpoint provides a way to support other S3 flavors and their regions.
602 : ///
603 : /// Example: `http://127.0.0.1:5000`
604 : pub endpoint: Option<String>,
605 : /// AWS S3 has various limits on its API calls, we need not to exceed those.
606 : /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
607 : pub concurrency_limit: NonZeroUsize,
608 : pub max_keys_per_list_response: Option<i32>,
609 : }
610 :
611 : impl Debug for S3Config {
612 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
613 0 : f.debug_struct("S3Config")
614 0 : .field("bucket_name", &self.bucket_name)
615 0 : .field("bucket_region", &self.bucket_region)
616 0 : .field("prefix_in_bucket", &self.prefix_in_bucket)
617 0 : .field("concurrency_limit", &self.concurrency_limit)
618 0 : .field(
619 0 : "max_keys_per_list_response",
620 0 : &self.max_keys_per_list_response,
621 0 : )
622 0 : .finish()
623 0 : }
624 : }
625 :
626 : /// Azure bucket coordinates and access credentials to manage the bucket contents (read and write).
627 0 : #[derive(Clone, PartialEq, Eq)]
628 : pub struct AzureConfig {
629 : /// Name of the container to connect to.
630 : pub container_name: String,
631 : /// The region where the bucket is located at.
632 : pub container_region: String,
633 : /// A "subfolder" in the container, to use the same container separately by multiple remote storage users at once.
634 : pub prefix_in_container: Option<String>,
635 : /// Azure has various limits on its API calls, we need not to exceed those.
636 : /// See [`DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT`] for more details.
637 : pub concurrency_limit: NonZeroUsize,
638 : pub max_keys_per_list_response: Option<i32>,
639 : }
640 :
641 : impl Debug for AzureConfig {
642 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
643 0 : f.debug_struct("AzureConfig")
644 0 : .field("bucket_name", &self.container_name)
645 0 : .field("bucket_region", &self.container_region)
646 0 : .field("prefix_in_bucket", &self.prefix_in_container)
647 0 : .field("concurrency_limit", &self.concurrency_limit)
648 0 : .field(
649 0 : "max_keys_per_list_response",
650 0 : &self.max_keys_per_list_response,
651 0 : )
652 0 : .finish()
653 0 : }
654 : }
655 :
656 : impl RemoteStorageConfig {
657 : pub const DEFAULT_TIMEOUT: Duration = std::time::Duration::from_secs(120);
658 :
659 22 : pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<Option<RemoteStorageConfig>> {
660 22 : let local_path = toml.get("local_path");
661 22 : let bucket_name = toml.get("bucket_name");
662 22 : let bucket_region = toml.get("bucket_region");
663 22 : let container_name = toml.get("container_name");
664 22 : let container_region = toml.get("container_region");
665 :
666 22 : let use_azure = container_name.is_some() && container_region.is_some();
667 :
668 22 : let default_concurrency_limit = if use_azure {
669 0 : DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT
670 : } else {
671 22 : DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT
672 : };
673 22 : let concurrency_limit = NonZeroUsize::new(
674 22 : parse_optional_integer("concurrency_limit", toml)?.unwrap_or(default_concurrency_limit),
675 22 : )
676 22 : .context("Failed to parse 'concurrency_limit' as a positive integer")?;
677 :
678 22 : let max_keys_per_list_response =
679 22 : parse_optional_integer::<i32, _>("max_keys_per_list_response", toml)
680 22 : .context("Failed to parse 'max_keys_per_list_response' as a positive integer")?
681 22 : .or(DEFAULT_MAX_KEYS_PER_LIST_RESPONSE);
682 :
683 22 : let endpoint = toml
684 22 : .get("endpoint")
685 22 : .map(|endpoint| parse_toml_string("endpoint", endpoint))
686 22 : .transpose()?;
687 :
688 22 : let timeout = toml
689 22 : .get("timeout")
690 22 : .map(|timeout| {
691 2 : timeout
692 2 : .as_str()
693 2 : .ok_or_else(|| anyhow::Error::msg("timeout was not a string"))
694 22 : })
695 22 : .transpose()
696 22 : .and_then(|timeout| {
697 22 : timeout
698 22 : .map(humantime::parse_duration)
699 22 : .transpose()
700 22 : .map_err(anyhow::Error::new)
701 22 : })
702 22 : .context("parse timeout")?
703 22 : .unwrap_or(Self::DEFAULT_TIMEOUT);
704 22 :
705 22 : if timeout < Duration::from_secs(1) {
706 0 : bail!("timeout was specified as {timeout:?} which is too low");
707 22 : }
708 :
709 12 : let storage = match (
710 22 : local_path,
711 22 : bucket_name,
712 22 : bucket_region,
713 22 : container_name,
714 22 : container_region,
715 : ) {
716 : // no 'local_path' nor 'bucket_name' options are provided, consider this remote storage disabled
717 10 : (None, None, None, None, None) => return Ok(None),
718 : (_, Some(_), None, ..) => {
719 0 : bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
720 : }
721 : (_, None, Some(_), ..) => {
722 0 : bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
723 : }
724 6 : (None, Some(bucket_name), Some(bucket_region), ..) => {
725 6 : RemoteStorageKind::AwsS3(S3Config {
726 6 : bucket_name: parse_toml_string("bucket_name", bucket_name)?,
727 6 : bucket_region: parse_toml_string("bucket_region", bucket_region)?,
728 6 : prefix_in_bucket: toml
729 6 : .get("prefix_in_bucket")
730 6 : .map(|prefix_in_bucket| {
731 6 : parse_toml_string("prefix_in_bucket", prefix_in_bucket)
732 6 : })
733 6 : .transpose()?,
734 6 : endpoint,
735 6 : concurrency_limit,
736 6 : max_keys_per_list_response,
737 : })
738 : }
739 : (_, _, _, Some(_), None) => {
740 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
741 : }
742 : (_, _, _, None, Some(_)) => {
743 0 : bail!("'container_name' option is mandatory if 'container_region' is given ")
744 : }
745 0 : (None, None, None, Some(container_name), Some(container_region)) => {
746 0 : RemoteStorageKind::AzureContainer(AzureConfig {
747 0 : container_name: parse_toml_string("container_name", container_name)?,
748 0 : container_region: parse_toml_string("container_region", container_region)?,
749 0 : prefix_in_container: toml
750 0 : .get("prefix_in_container")
751 0 : .map(|prefix_in_container| {
752 0 : parse_toml_string("prefix_in_container", prefix_in_container)
753 0 : })
754 0 : .transpose()?,
755 0 : concurrency_limit,
756 0 : max_keys_per_list_response,
757 : })
758 : }
759 6 : (Some(local_path), None, None, None, None) => RemoteStorageKind::LocalFs(
760 6 : Utf8PathBuf::from(parse_toml_string("local_path", local_path)?),
761 : ),
762 : (Some(_), Some(_), ..) => {
763 0 : bail!("'local_path' and 'bucket_name' are mutually exclusive")
764 : }
765 : (Some(_), _, _, Some(_), Some(_)) => {
766 0 : bail!("local_path and 'container_name' are mutually exclusive")
767 : }
768 : };
769 :
770 12 : Ok(Some(RemoteStorageConfig { storage, timeout }))
771 22 : }
772 : }
773 :
774 : // Helper functions to parse a toml Item
775 44 : fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
776 44 : where
777 44 : I: TryFrom<i64, Error = E>,
778 44 : E: std::error::Error + Send + Sync + 'static,
779 44 : {
780 44 : let toml_integer = match item.get(name) {
781 4 : Some(item) => item
782 4 : .as_integer()
783 4 : .with_context(|| format!("configure option {name} is not an integer"))?,
784 40 : None => return Ok(None),
785 : };
786 :
787 4 : I::try_from(toml_integer)
788 4 : .map(Some)
789 4 : .with_context(|| format!("configure option {name} is too large"))
790 44 : }
791 :
792 30 : fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
793 30 : let s = item
794 30 : .as_str()
795 30 : .with_context(|| format!("configure option {name} is not a string"))?;
796 30 : Ok(s.to_string())
797 30 : }
798 :
799 : struct ConcurrencyLimiter {
800 : // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
801 : // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
802 : // The helps to ensure we don't exceed the thresholds.
803 : write: Arc<Semaphore>,
804 : read: Arc<Semaphore>,
805 : }
806 :
807 : impl ConcurrencyLimiter {
808 374 : fn for_kind(&self, kind: RequestKind) -> &Arc<Semaphore> {
809 374 : match kind {
810 32 : RequestKind::Get => &self.read,
811 154 : RequestKind::Put => &self.write,
812 30 : RequestKind::List => &self.read,
813 149 : RequestKind::Delete => &self.write,
814 3 : RequestKind::Copy => &self.write,
815 6 : RequestKind::TimeTravel => &self.write,
816 : }
817 374 : }
818 :
819 349 : async fn acquire(
820 349 : &self,
821 349 : kind: RequestKind,
822 349 : ) -> Result<tokio::sync::SemaphorePermit<'_>, tokio::sync::AcquireError> {
823 0 : self.for_kind(kind).acquire().await
824 0 : }
825 :
826 25 : async fn acquire_owned(
827 25 : &self,
828 25 : kind: RequestKind,
829 25 : ) -> Result<tokio::sync::OwnedSemaphorePermit, tokio::sync::AcquireError> {
830 0 : Arc::clone(self.for_kind(kind)).acquire_owned().await
831 0 : }
832 :
833 34 : fn new(limit: usize) -> ConcurrencyLimiter {
834 34 : Self {
835 34 : read: Arc::new(Semaphore::new(limit)),
836 34 : write: Arc::new(Semaphore::new(limit)),
837 34 : }
838 34 : }
839 : }
840 :
841 : #[cfg(test)]
842 : mod tests {
843 : use super::*;
844 :
845 2 : #[test]
846 2 : fn test_object_name() {
847 2 : let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
848 2 : assert_eq!(k.object_name(), Some("c"));
849 :
850 2 : let k = RemotePath::new(Utf8Path::new("a/b/c/")).unwrap();
851 2 : assert_eq!(k.object_name(), Some("c"));
852 :
853 2 : let k = RemotePath::new(Utf8Path::new("a/")).unwrap();
854 2 : assert_eq!(k.object_name(), Some("a"));
855 :
856 : // XXX is it impossible to have an empty key?
857 2 : let k = RemotePath::new(Utf8Path::new("")).unwrap();
858 2 : assert_eq!(k.object_name(), None);
859 2 : }
860 :
861 2 : #[test]
862 2 : fn rempte_path_cannot_be_created_from_absolute_ones() {
863 2 : let err = RemotePath::new(Utf8Path::new("/")).expect_err("Should fail on absolute paths");
864 2 : assert_eq!(err.to_string(), "Path \"/\" is not relative");
865 2 : }
866 :
867 2 : #[test]
868 2 : fn parse_localfs_config_with_timeout() {
869 2 : let input = "local_path = '.'
870 2 : timeout = '5s'";
871 2 :
872 2 : let toml = input.parse::<toml_edit::Document>().unwrap();
873 2 :
874 2 : let config = RemoteStorageConfig::from_toml(toml.as_item())
875 2 : .unwrap()
876 2 : .expect("it exists");
877 2 :
878 2 : assert_eq!(
879 2 : config,
880 2 : RemoteStorageConfig {
881 2 : storage: RemoteStorageKind::LocalFs(Utf8PathBuf::from(".")),
882 2 : timeout: Duration::from_secs(5)
883 2 : }
884 2 : );
885 2 : }
886 : }
|