Line data Source code
1 : //! Local filesystem acting as a remote storage.
2 : //! Multiple API users can use the same "storage" of this kind by using different storage roots.
3 : //!
4 : //! This storage used in tests, but can also be used in cases when a certain persistent
5 : //! volume is mounted to the local FS.
6 :
7 : use std::collections::HashSet;
8 : use std::io::ErrorKind;
9 : use std::num::NonZeroU32;
10 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
11 :
12 : use anyhow::{Context, bail, ensure};
13 : use bytes::Bytes;
14 : use camino::{Utf8Path, Utf8PathBuf};
15 : use futures::stream::Stream;
16 : use tokio::fs;
17 : use tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt};
18 : use tokio_util::io::ReaderStream;
19 : use tokio_util::sync::CancellationToken;
20 : use utils::crashsafe::path_with_suffix_extension;
21 :
22 : use super::{RemoteStorage, StorageMetadata};
23 : use crate::{
24 : Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject,
25 : REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel,
26 : };
27 :
28 : const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
29 :
30 : #[derive(Debug, Clone)]
31 : pub struct LocalFs {
32 : storage_root: Utf8PathBuf,
33 : timeout: Duration,
34 : }
35 :
36 : impl LocalFs {
37 : /// Attempts to create local FS storage, along with its root directory.
38 : /// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
39 195 : pub fn new(mut storage_root: Utf8PathBuf, timeout: Duration) -> anyhow::Result<Self> {
40 195 : if !storage_root.exists() {
41 33 : std::fs::create_dir_all(&storage_root).with_context(|| {
42 0 : format!("Failed to create all directories in the given root path {storage_root:?}")
43 0 : })?;
44 162 : }
45 195 : if !storage_root.is_absolute() {
46 119 : storage_root = storage_root.canonicalize_utf8().with_context(|| {
47 0 : format!("Failed to represent path {storage_root:?} as an absolute path")
48 0 : })?;
49 76 : }
50 :
51 195 : Ok(Self {
52 195 : storage_root,
53 195 : timeout,
54 195 : })
55 195 : }
56 :
57 : // mirrors S3Bucket::s3_object_to_relative_path
58 152 : fn local_file_to_relative_path(&self, key: Utf8PathBuf) -> RemotePath {
59 152 : let relative_path = key
60 152 : .strip_prefix(&self.storage_root)
61 152 : .expect("relative path must contain storage_root as prefix");
62 152 : RemotePath(relative_path.into())
63 152 : }
64 :
65 95 : async fn read_storage_metadata(
66 95 : &self,
67 95 : file_path: &Utf8Path,
68 95 : ) -> anyhow::Result<Option<StorageMetadata>> {
69 95 : let metadata_path = storage_metadata_path(file_path);
70 95 : if metadata_path.exists() && metadata_path.is_file() {
71 6 : let metadata_string = fs::read_to_string(&metadata_path).await.with_context(|| {
72 0 : format!("Failed to read metadata from the local storage at '{metadata_path}'")
73 0 : })?;
74 :
75 6 : serde_json::from_str(&metadata_string)
76 6 : .with_context(|| {
77 0 : format!(
78 0 : "Failed to deserialize metadata from the local storage at '{metadata_path}'",
79 : )
80 0 : })
81 6 : .map(|metadata| Some(StorageMetadata(metadata)))
82 : } else {
83 89 : Ok(None)
84 : }
85 95 : }
86 :
87 : #[cfg(test)]
88 9 : async fn list_all(&self) -> anyhow::Result<Vec<RemotePath>> {
89 : use std::future::Future;
90 : use std::pin::Pin;
91 27 : fn get_all_files<'a, P>(
92 27 : directory_path: P,
93 27 : ) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Utf8PathBuf>>> + Send + Sync + 'a>>
94 27 : where
95 27 : P: AsRef<Utf8Path> + Send + Sync + 'a,
96 : {
97 27 : Box::pin(async move {
98 27 : let directory_path = directory_path.as_ref();
99 27 : if directory_path.exists() {
100 27 : if directory_path.is_dir() {
101 27 : let mut paths = Vec::new();
102 27 : let mut dir_contents = fs::read_dir(directory_path).await?;
103 54 : while let Some(dir_entry) = dir_contents.next_entry().await? {
104 27 : let file_type = dir_entry.file_type().await?;
105 27 : let entry_path =
106 27 : Utf8PathBuf::from_path_buf(dir_entry.path()).map_err(|pb| {
107 0 : anyhow::Error::msg(format!(
108 0 : "non-Unicode path: {}",
109 0 : pb.to_string_lossy()
110 : ))
111 0 : })?;
112 27 : if file_type.is_symlink() {
113 0 : tracing::debug!("{entry_path:?} is a symlink, skipping")
114 27 : } else if file_type.is_dir() {
115 18 : paths.extend(get_all_files(&entry_path).await?.into_iter())
116 9 : } else {
117 9 : paths.push(entry_path);
118 9 : }
119 : }
120 27 : Ok(paths)
121 : } else {
122 0 : bail!("Path {directory_path:?} is not a directory")
123 : }
124 : } else {
125 0 : Ok(Vec::new())
126 : }
127 27 : })
128 27 : }
129 :
130 9 : Ok(get_all_files(&self.storage_root)
131 9 : .await?
132 9 : .into_iter()
133 9 : .map(|path| {
134 9 : path.strip_prefix(&self.storage_root)
135 9 : .context("Failed to strip storage root prefix")
136 9 : .and_then(RemotePath::new)
137 9 : .expect(
138 9 : "We list files for storage root, hence should be able to remote the prefix",
139 : )
140 9 : })
141 9 : .collect())
142 9 : }
143 :
144 : // recursively lists all files in a directory,
145 : // mirroring the `list_files` for `s3_bucket`
146 262 : async fn list_recursive(&self, folder: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
147 262 : let full_path = match folder {
148 256 : Some(folder) => folder.with_base(&self.storage_root),
149 6 : None => self.storage_root.clone(),
150 : };
151 :
152 : // If we were given a directory, we may use it as our starting point.
153 : // Otherwise, we must go up to the first ancestor dir that exists. This is because
154 : // S3 object list prefixes can be arbitrary strings, but when reading
155 : // the local filesystem we need a directory to start calling read_dir on.
156 262 : let mut initial_dir = full_path.clone();
157 :
158 : // If there's no trailing slash, we have to start looking from one above: even if
159 : // `initial_dir` is a directory, we should still list any prefixes in the parent
160 : // that start with the same string.
161 262 : if !full_path.to_string().ends_with('/') {
162 137 : initial_dir.pop();
163 137 : }
164 :
165 : loop {
166 : // Did we make it to the root?
167 843 : if initial_dir.parent().is_none() {
168 0 : anyhow::bail!("list_files: failed to find valid ancestor dir for {full_path}");
169 843 : }
170 :
171 843 : match fs::metadata(initial_dir.clone()).await {
172 262 : Ok(meta) if meta.is_dir() => {
173 : // We found a directory, break
174 262 : break;
175 : }
176 0 : Ok(_meta) => {
177 0 : // It's not a directory: strip back to the parent
178 0 : initial_dir.pop();
179 0 : }
180 581 : Err(e) if e.kind() == ErrorKind::NotFound => {
181 581 : // It's not a file that exists: strip the prefix back to the parent directory
182 581 : initial_dir.pop();
183 581 : }
184 0 : Err(e) => {
185 : // Unexpected I/O error
186 0 : anyhow::bail!(e)
187 : }
188 : }
189 : }
190 : // Note that Utf8PathBuf starts_with only considers full path segments, but
191 : // object prefixes are arbitrary strings, so we need the strings for doing
192 : // starts_with later.
193 262 : let prefix = full_path.as_str();
194 :
195 262 : let mut files = vec![];
196 262 : let mut directory_queue = vec![initial_dir];
197 595 : while let Some(cur_folder) = directory_queue.pop() {
198 333 : let mut entries = cur_folder.read_dir_utf8()?;
199 667 : while let Some(Ok(entry)) = entries.next() {
200 334 : let file_name = entry.file_name();
201 334 : let full_file_name = cur_folder.join(file_name);
202 334 : if full_file_name.as_str().starts_with(prefix) {
203 152 : let file_remote_path = self.local_file_to_relative_path(full_file_name.clone());
204 152 : files.push(file_remote_path);
205 152 : if full_file_name.is_dir() {
206 71 : directory_queue.push(full_file_name);
207 81 : }
208 182 : }
209 : }
210 : }
211 :
212 262 : Ok(files)
213 262 : }
214 :
215 1915 : async fn upload0(
216 1915 : &self,
217 1915 : data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
218 1915 : data_size_bytes: usize,
219 1915 : to: &RemotePath,
220 1915 : metadata: Option<StorageMetadata>,
221 1915 : cancel: &CancellationToken,
222 1915 : ) -> anyhow::Result<()> {
223 1915 : let target_file_path = to.with_base(&self.storage_root);
224 1915 : create_target_directory(&target_file_path).await?;
225 : // We need this dance with sort of durable rename (without fsyncs)
226 : // to prevent partial uploads. This was really hit when pageserver shutdown
227 : // cancelled the upload and partial file was left on the fs
228 : // NOTE: Because temp file suffix always the same this operation is racy.
229 : // Two concurrent operations can lead to the following sequence:
230 : // T1: write(temp)
231 : // T2: write(temp) -> overwrites the content
232 : // T1: rename(temp, dst) -> succeeds
233 : // T2: rename(temp, dst) -> fails, temp no longet exists
234 : // This can be solved by supplying unique temp suffix every time, but this situation
235 : // is not normal in the first place, the error can help (and helped at least once)
236 : // to discover bugs in upper level synchronization.
237 1915 : let temp_file_path =
238 1915 : path_with_suffix_extension(&target_file_path, LOCAL_FS_TEMP_FILE_SUFFIX);
239 1910 : let mut destination = io::BufWriter::new(
240 1915 : fs::OpenOptions::new()
241 1915 : .write(true)
242 1915 : .create(true)
243 1915 : .truncate(true)
244 1915 : .open(&temp_file_path)
245 1915 : .await
246 1910 : .with_context(|| {
247 0 : format!("Failed to open target fs destination at '{target_file_path}'")
248 0 : })?,
249 : );
250 :
251 1910 : let from_size_bytes = data_size_bytes as u64;
252 1910 : let data = tokio_util::io::StreamReader::new(data);
253 1910 : let data = std::pin::pin!(data);
254 1910 : let mut buffer_to_read = data.take(from_size_bytes);
255 :
256 : // alternatively we could just write the bytes to a file, but local_fs is a testing utility
257 1910 : let copy = io::copy_buf(&mut buffer_to_read, &mut destination);
258 :
259 1910 : let bytes_read = tokio::select! {
260 : biased;
261 1910 : _ = cancel.cancelled() => {
262 3 : let file = destination.into_inner();
263 : // wait for the inflight operation(s) to complete so that there could be a next
264 : // attempt right away and our writes are not directed to their file.
265 3 : file.into_std().await;
266 :
267 : // TODO: leave the temp or not? leaving is probably less racy. enabled truncate at
268 : // least.
269 3 : fs::remove_file(temp_file_path).await.context("remove temp_file_path after cancellation or timeout")?;
270 3 : return Err(TimeoutOrCancel::Cancel.into());
271 : }
272 1910 : read = copy => read,
273 : };
274 :
275 1904 : let bytes_read =
276 1904 : bytes_read.with_context(|| {
277 0 : format!(
278 0 : "Failed to upload file (write temp) to the local storage at '{temp_file_path}'",
279 : )
280 0 : })?;
281 :
282 1904 : if bytes_read < from_size_bytes {
283 3 : bail!(
284 3 : "Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"
285 : );
286 54 : }
287 : // Check if there is any extra data after the given size.
288 1901 : let mut from = buffer_to_read.into_inner();
289 1901 : let extra_read = from.read(&mut [1]).await?;
290 1901 : ensure!(
291 1901 : extra_read == 0,
292 6 : "Provided stream was larger than expected: expected {from_size_bytes} bytes",
293 : );
294 :
295 1895 : destination.flush().await.with_context(|| {
296 0 : format!(
297 0 : "Failed to upload (flush temp) file to the local storage at '{temp_file_path}'",
298 : )
299 0 : })?;
300 :
301 1895 : fs::rename(temp_file_path, &target_file_path)
302 1895 : .await
303 1891 : .with_context(|| {
304 0 : format!(
305 0 : "Failed to upload (rename) file to the local storage at '{target_file_path}'",
306 : )
307 0 : })?;
308 :
309 1891 : if let Some(storage_metadata) = metadata {
310 : // FIXME: we must not be using metadata much, since this would forget the old metadata
311 : // for new writes? or perhaps metadata is sticky; could consider removing if it's never
312 : // used.
313 3 : let storage_metadata_path = storage_metadata_path(&target_file_path);
314 3 : fs::write(
315 3 : &storage_metadata_path,
316 3 : serde_json::to_string(&storage_metadata.0)
317 3 : .context("Failed to serialize storage metadata as json")?,
318 : )
319 3 : .await
320 3 : .with_context(|| {
321 0 : format!(
322 0 : "Failed to write metadata to the local storage at '{storage_metadata_path}'",
323 : )
324 0 : })?;
325 45 : }
326 :
327 1891 : Ok(())
328 60 : }
329 : }
330 :
331 : impl RemoteStorage for LocalFs {
332 6 : fn list_streaming(
333 6 : &self,
334 6 : prefix: Option<&RemotePath>,
335 6 : mode: ListingMode,
336 6 : max_keys: Option<NonZeroU32>,
337 6 : cancel: &CancellationToken,
338 6 : ) -> impl Stream<Item = Result<Listing, DownloadError>> {
339 6 : let listing = self.list(prefix, mode, max_keys, cancel);
340 6 : futures::stream::once(listing)
341 6 : }
342 :
343 262 : async fn list(
344 262 : &self,
345 262 : prefix: Option<&RemotePath>,
346 262 : mode: ListingMode,
347 262 : max_keys: Option<NonZeroU32>,
348 262 : cancel: &CancellationToken,
349 262 : ) -> Result<Listing, DownloadError> {
350 262 : let op = async {
351 262 : let mut result = Listing::default();
352 :
353 : // Filter out directories: in S3 directories don't exist, only the keys within them do.
354 262 : let keys = self
355 262 : .list_recursive(prefix)
356 262 : .await
357 262 : .map_err(DownloadError::Other)?;
358 262 : let mut objects = Vec::with_capacity(keys.len());
359 414 : for key in keys {
360 152 : let path = key.with_base(&self.storage_root);
361 152 : let metadata = file_metadata(&path).await;
362 0 : if let Err(DownloadError::NotFound) = metadata {
363 : // Race: if the file is deleted between listing and metadata check, ignore it.
364 0 : continue;
365 152 : }
366 152 : let metadata = metadata?;
367 152 : if metadata.is_dir() {
368 71 : continue;
369 81 : }
370 81 : objects.push(ListingObject {
371 81 : key: key.clone(),
372 81 : last_modified: metadata.modified()?,
373 81 : size: metadata.len(),
374 : });
375 : }
376 262 : let objects = objects;
377 :
378 262 : if let ListingMode::NoDelimiter = mode {
379 128 : result.keys = objects;
380 128 : } else {
381 134 : let mut prefixes = HashSet::new();
382 187 : for object in objects {
383 53 : let key = object.key;
384 : // If the part after the prefix includes a "/", take only the first part and put it in `prefixes`.
385 53 : let relative_key = if let Some(prefix) = prefix {
386 44 : let mut prefix = prefix.clone();
387 : // We only strip the dirname of the prefix, so that when we strip it from the start of keys we
388 : // end up with full file/dir names.
389 44 : let prefix_full_local_path = prefix.with_base(&self.storage_root);
390 44 : let has_slash = prefix.0.to_string().ends_with('/');
391 44 : let strip_prefix = if prefix_full_local_path.is_dir() && has_slash {
392 20 : prefix
393 : } else {
394 24 : prefix.0.pop();
395 24 : prefix
396 : };
397 :
398 44 : RemotePath::new(key.strip_prefix(&strip_prefix).unwrap()).unwrap()
399 : } else {
400 9 : key
401 : };
402 :
403 53 : let relative_key = format!("{relative_key}");
404 53 : if relative_key.contains(REMOTE_STORAGE_PREFIX_SEPARATOR) {
405 50 : let first_part = relative_key
406 50 : .split(REMOTE_STORAGE_PREFIX_SEPARATOR)
407 50 : .next()
408 50 : .unwrap()
409 50 : .to_owned();
410 50 : prefixes.insert(first_part);
411 50 : } else {
412 3 : result.keys.push(ListingObject {
413 3 : key: RemotePath::from_string(&relative_key).unwrap(),
414 3 : last_modified: object.last_modified,
415 3 : size: object.size,
416 3 : });
417 3 : }
418 : }
419 134 : result.prefixes = prefixes
420 134 : .into_iter()
421 134 : .map(|s| RemotePath::from_string(&s).unwrap())
422 134 : .collect();
423 : }
424 :
425 262 : if let Some(max_keys) = max_keys {
426 0 : result.keys.truncate(max_keys.get() as usize);
427 262 : }
428 262 : Ok(result)
429 262 : };
430 :
431 262 : let timeout = async {
432 257 : tokio::time::sleep(self.timeout).await;
433 0 : Err(DownloadError::Timeout)
434 0 : };
435 :
436 262 : let cancelled = async {
437 260 : cancel.cancelled().await;
438 0 : Err(DownloadError::Cancelled)
439 0 : };
440 :
441 262 : tokio::select! {
442 262 : res = op => res,
443 262 : res = timeout => res,
444 262 : res = cancelled => res,
445 : }
446 262 : }
447 :
448 0 : async fn list_versions(
449 0 : &self,
450 0 : _prefix: Option<&RemotePath>,
451 0 : _mode: ListingMode,
452 0 : _max_keys: Option<NonZeroU32>,
453 0 : _cancel: &CancellationToken,
454 0 : ) -> Result<crate::VersionListing, DownloadError> {
455 0 : unimplemented!()
456 : }
457 :
458 0 : async fn head_object(
459 0 : &self,
460 0 : key: &RemotePath,
461 0 : _cancel: &CancellationToken,
462 0 : ) -> Result<ListingObject, DownloadError> {
463 0 : let target_file_path = key.with_base(&self.storage_root);
464 0 : let metadata = file_metadata(&target_file_path).await?;
465 : Ok(ListingObject {
466 0 : key: key.clone(),
467 0 : last_modified: metadata.modified()?,
468 0 : size: metadata.len(),
469 : })
470 0 : }
471 :
472 1915 : async fn upload(
473 1915 : &self,
474 1915 : data: impl Stream<Item = std::io::Result<Bytes>> + Send + Sync,
475 1915 : data_size_bytes: usize,
476 1915 : to: &RemotePath,
477 1915 : metadata: Option<StorageMetadata>,
478 1915 : cancel: &CancellationToken,
479 1915 : ) -> anyhow::Result<()> {
480 1915 : let cancel = cancel.child_token();
481 :
482 1915 : let op = self.upload0(data, data_size_bytes, to, metadata, &cancel);
483 1915 : let mut op = std::pin::pin!(op);
484 :
485 : // race the upload0 to the timeout; if it goes over, do a graceful shutdown
486 1915 : let (res, timeout) = tokio::select! {
487 1915 : res = &mut op => (res, false),
488 1915 : _ = tokio::time::sleep(self.timeout) => {
489 0 : cancel.cancel();
490 0 : (op.await, true)
491 : }
492 : };
493 :
494 12 : match res {
495 12 : Err(e) if timeout && TimeoutOrCancel::caused_by_cancel(&e) => {
496 : // we caused this cancel (or they happened simultaneously) -- swap it out to
497 : // Timeout
498 0 : Err(TimeoutOrCancel::Timeout.into())
499 : }
500 1903 : res => res,
501 : }
502 60 : }
503 :
504 471 : async fn download(
505 471 : &self,
506 471 : from: &RemotePath,
507 471 : opts: &DownloadOpts,
508 471 : cancel: &CancellationToken,
509 471 : ) -> Result<Download, DownloadError> {
510 471 : let target_path = from.with_base(&self.storage_root);
511 :
512 471 : let file_metadata = file_metadata(&target_path).await?;
513 98 : let etag = mock_etag(&file_metadata);
514 :
515 98 : if opts.etag.as_ref() == Some(&etag) {
516 0 : return Err(DownloadError::Unmodified);
517 98 : }
518 :
519 98 : let mut file = fs::OpenOptions::new()
520 98 : .read(true)
521 98 : .open(&target_path)
522 98 : .await
523 98 : .with_context(|| {
524 0 : format!("Failed to open source file {target_path:?} to use in the download")
525 0 : })
526 98 : .map_err(DownloadError::Other)?;
527 :
528 98 : let mut take = file_metadata.len();
529 98 : if let Some((start, end)) = opts.byte_range() {
530 15 : if start > 0 {
531 6 : file.seek(io::SeekFrom::Start(start))
532 6 : .await
533 6 : .context("Failed to seek to the range start in a local storage file")
534 6 : .map_err(DownloadError::Other)?;
535 9 : }
536 15 : if let Some(end) = end {
537 12 : take = end - start;
538 12 : }
539 83 : }
540 :
541 98 : let source = ReaderStream::new(file.take(take));
542 :
543 98 : let metadata = self
544 98 : .read_storage_metadata(&target_path)
545 98 : .await
546 95 : .map_err(DownloadError::Other)?;
547 :
548 95 : let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
549 95 : let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
550 :
551 : Ok(Download {
552 95 : metadata,
553 95 : last_modified: file_metadata
554 95 : .modified()
555 95 : .map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
556 95 : etag,
557 95 : download_stream: Box::pin(source),
558 : })
559 468 : }
560 :
561 230 : async fn delete(&self, path: &RemotePath, _cancel: &CancellationToken) -> anyhow::Result<()> {
562 230 : let file_path = path.with_base(&self.storage_root);
563 230 : match fs::remove_file(&file_path).await {
564 226 : Ok(()) => Ok(()),
565 : // The file doesn't exist. This shouldn't yield an error to mirror S3's behaviour.
566 : // See https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html
567 : // > If there isn't a null version, Amazon S3 does not remove any objects but will still respond that the command was successful.
568 3 : Err(e) if e.kind() == ErrorKind::NotFound => Ok(()),
569 0 : Err(e) => Err(anyhow::anyhow!(e)),
570 : }
571 229 : }
572 :
573 9 : async fn delete_objects(
574 9 : &self,
575 9 : paths: &[RemotePath],
576 9 : cancel: &CancellationToken,
577 9 : ) -> anyhow::Result<()> {
578 22 : for path in paths {
579 13 : self.delete(path, cancel).await?
580 : }
581 9 : Ok(())
582 9 : }
583 :
584 4 : fn max_keys_per_delete(&self) -> usize {
585 4 : super::MAX_KEYS_PER_DELETE_S3
586 4 : }
587 :
588 0 : async fn copy(
589 0 : &self,
590 0 : from: &RemotePath,
591 0 : to: &RemotePath,
592 0 : _cancel: &CancellationToken,
593 0 : ) -> anyhow::Result<()> {
594 0 : let from_path = from.with_base(&self.storage_root);
595 0 : let to_path = to.with_base(&self.storage_root);
596 0 : create_target_directory(&to_path).await?;
597 0 : fs::copy(&from_path, &to_path)
598 0 : .await
599 0 : .with_context(|| format!("Failed to copy file from '{from_path}' to '{to_path}'"))?;
600 0 : Ok(())
601 0 : }
602 :
603 0 : async fn time_travel_recover(
604 0 : &self,
605 0 : _prefix: Option<&RemotePath>,
606 0 : _timestamp: SystemTime,
607 0 : _done_if_after: SystemTime,
608 0 : _cancel: &CancellationToken,
609 0 : _complexity_limit: Option<NonZeroU32>,
610 0 : ) -> Result<(), TimeTravelError> {
611 0 : Err(TimeTravelError::Unimplemented)
612 0 : }
613 : }
614 :
615 98 : fn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {
616 98 : path_with_suffix_extension(original_path, "metadata")
617 98 : }
618 :
619 1915 : async fn create_target_directory(target_file_path: &Utf8Path) -> anyhow::Result<()> {
620 1915 : let target_dir = match target_file_path.parent() {
621 1915 : Some(parent_dir) => parent_dir,
622 0 : None => bail!("File path '{target_file_path}' has no parent directory"),
623 : };
624 1915 : if !target_dir.exists() {
625 360 : fs::create_dir_all(target_dir).await?;
626 1555 : }
627 1915 : Ok(())
628 1915 : }
629 :
630 623 : async fn file_metadata(file_path: &Utf8Path) -> Result<std::fs::Metadata, DownloadError> {
631 623 : tokio::fs::metadata(&file_path).await.map_err(|e| {
632 373 : if e.kind() == ErrorKind::NotFound {
633 373 : DownloadError::NotFound
634 : } else {
635 0 : DownloadError::BadInput(e.into())
636 : }
637 373 : })
638 623 : }
639 :
640 : // Use mtime as stand-in for ETag. We could calculate a meaningful one by md5'ing the contents of files we
641 : // read, but that's expensive and the local_fs test helper's whole reason for existence is to run small tests
642 : // quickly, with less overhead than using a mock S3 server.
643 98 : fn mock_etag(meta: &std::fs::Metadata) -> Etag {
644 98 : let mtime = meta.modified().expect("Filesystem mtime missing");
645 98 : format!("{}", mtime.duration_since(UNIX_EPOCH).unwrap().as_millis()).into()
646 98 : }
647 :
648 : #[cfg(test)]
649 : mod fs_tests {
650 : use std::collections::HashMap;
651 : use std::io::Write;
652 : use std::ops::Bound;
653 :
654 : use camino_tempfile::tempdir;
655 :
656 : use super::*;
657 :
658 9 : async fn read_and_check_metadata(
659 9 : storage: &LocalFs,
660 9 : remote_storage_path: &RemotePath,
661 9 : expected_metadata: Option<&StorageMetadata>,
662 9 : ) -> anyhow::Result<String> {
663 9 : let cancel = CancellationToken::new();
664 9 : let download = storage
665 9 : .download(remote_storage_path, &DownloadOpts::default(), &cancel)
666 9 : .await
667 9 : .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
668 9 : ensure!(
669 9 : download.metadata.as_ref() == expected_metadata,
670 0 : "Unexpected metadata returned for the downloaded file"
671 : );
672 :
673 9 : let contents = aggregate(download.download_stream).await?;
674 :
675 9 : String::from_utf8(contents).map_err(anyhow::Error::new)
676 9 : }
677 :
678 : #[tokio::test]
679 3 : async fn upload_file() -> anyhow::Result<()> {
680 3 : let (storage, cancel) = create_storage()?;
681 :
682 3 : let target_path_1 = upload_dummy_file(&storage, "upload_1", None, &cancel).await?;
683 3 : assert_eq!(
684 3 : storage.list_all().await?,
685 3 : vec![target_path_1.clone()],
686 0 : "Should list a single file after first upload"
687 : );
688 :
689 3 : let target_path_2 = upload_dummy_file(&storage, "upload_2", None, &cancel).await?;
690 3 : assert_eq!(
691 3 : list_files_sorted(&storage).await?,
692 3 : vec![target_path_1.clone(), target_path_2.clone()],
693 0 : "Should list a two different files after second upload"
694 : );
695 :
696 6 : Ok(())
697 3 : }
698 :
699 : #[tokio::test]
700 3 : async fn upload_file_negatives() -> anyhow::Result<()> {
701 3 : let (storage, cancel) = create_storage()?;
702 :
703 3 : let id = RemotePath::new(Utf8Path::new("dummy"))?;
704 3 : let content = Bytes::from_static(b"12345");
705 12 : let content = move || futures::stream::once(futures::future::ready(Ok(content.clone())));
706 :
707 : // Check that you get an error if the size parameter doesn't match the actual
708 : // size of the stream.
709 3 : storage
710 3 : .upload(content(), 0, &id, None, &cancel)
711 3 : .await
712 3 : .expect_err("upload with zero size succeeded");
713 3 : storage
714 3 : .upload(content(), 4, &id, None, &cancel)
715 3 : .await
716 3 : .expect_err("upload with too short size succeeded");
717 3 : storage
718 3 : .upload(content(), 6, &id, None, &cancel)
719 3 : .await
720 3 : .expect_err("upload with too large size succeeded");
721 :
722 : // Correct size is 5, this should succeed.
723 3 : storage.upload(content(), 5, &id, None, &cancel).await?;
724 :
725 6 : Ok(())
726 3 : }
727 :
728 33 : fn create_storage() -> anyhow::Result<(LocalFs, CancellationToken)> {
729 33 : let storage_root = tempdir()?.path().to_path_buf();
730 33 : LocalFs::new(storage_root, Duration::from_secs(120)).map(|s| (s, CancellationToken::new()))
731 33 : }
732 :
733 : #[tokio::test]
734 3 : async fn download_file() -> anyhow::Result<()> {
735 3 : let (storage, cancel) = create_storage()?;
736 3 : let upload_name = "upload_1";
737 3 : let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
738 :
739 3 : let contents = read_and_check_metadata(&storage, &upload_target, None).await?;
740 3 : assert_eq!(
741 3 : dummy_contents(upload_name),
742 : contents,
743 0 : "We should upload and download the same contents"
744 : );
745 :
746 3 : let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
747 3 : match storage
748 3 : .download(&non_existing_path, &DownloadOpts::default(), &cancel)
749 3 : .await
750 3 : {
751 3 : Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
752 3 : other => panic!(
753 3 : "Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"
754 3 : ),
755 3 : }
756 3 : Ok(())
757 3 : }
758 :
759 : #[tokio::test]
760 3 : async fn download_file_range_positive() -> anyhow::Result<()> {
761 3 : let (storage, cancel) = create_storage()?;
762 3 : let upload_name = "upload_1";
763 3 : let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
764 :
765 3 : let full_range_download_contents =
766 3 : read_and_check_metadata(&storage, &upload_target, None).await?;
767 3 : assert_eq!(
768 3 : dummy_contents(upload_name),
769 : full_range_download_contents,
770 0 : "Download full range should return the whole upload"
771 : );
772 :
773 3 : let uploaded_bytes = dummy_contents(upload_name).into_bytes();
774 3 : let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);
775 :
776 3 : let first_part_download = storage
777 3 : .download(
778 3 : &upload_target,
779 3 : &DownloadOpts {
780 3 : byte_end: Bound::Excluded(first_part_local.len() as u64),
781 3 : ..Default::default()
782 3 : },
783 3 : &cancel,
784 3 : )
785 3 : .await?;
786 3 : assert!(
787 3 : first_part_download.metadata.is_none(),
788 0 : "No metadata should be returned for no metadata upload"
789 : );
790 :
791 3 : let first_part_remote = aggregate(first_part_download.download_stream).await?;
792 3 : assert_eq!(
793 : first_part_local, first_part_remote,
794 0 : "First part bytes should be returned when requested"
795 : );
796 :
797 3 : let second_part_download = storage
798 3 : .download(
799 3 : &upload_target,
800 3 : &DownloadOpts {
801 3 : byte_start: Bound::Included(first_part_local.len() as u64),
802 3 : byte_end: Bound::Excluded(
803 3 : (first_part_local.len() + second_part_local.len()) as u64,
804 3 : ),
805 3 : ..Default::default()
806 3 : },
807 3 : &cancel,
808 3 : )
809 3 : .await?;
810 3 : assert!(
811 3 : second_part_download.metadata.is_none(),
812 0 : "No metadata should be returned for no metadata upload"
813 : );
814 :
815 3 : let second_part_remote = aggregate(second_part_download.download_stream).await?;
816 3 : assert_eq!(
817 : second_part_local, second_part_remote,
818 0 : "Second part bytes should be returned when requested"
819 : );
820 :
821 3 : let suffix_bytes = storage
822 3 : .download(
823 3 : &upload_target,
824 3 : &DownloadOpts {
825 3 : byte_start: Bound::Included(13),
826 3 : ..Default::default()
827 3 : },
828 3 : &cancel,
829 3 : )
830 3 : .await?
831 : .download_stream;
832 3 : let suffix_bytes = aggregate(suffix_bytes).await?;
833 3 : let suffix = std::str::from_utf8(&suffix_bytes)?;
834 3 : assert_eq!(upload_name, suffix);
835 :
836 3 : let all_bytes = storage
837 3 : .download(&upload_target, &DownloadOpts::default(), &cancel)
838 3 : .await?
839 : .download_stream;
840 3 : let all_bytes = aggregate(all_bytes).await?;
841 3 : let all_bytes = std::str::from_utf8(&all_bytes)?;
842 3 : assert_eq!(dummy_contents("upload_1"), all_bytes);
843 :
844 6 : Ok(())
845 3 : }
846 :
847 : #[tokio::test]
848 : #[should_panic(expected = "at or before start")]
849 3 : async fn download_file_range_negative() {
850 3 : let (storage, cancel) = create_storage().unwrap();
851 3 : let upload_name = "upload_1";
852 3 : let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)
853 3 : .await
854 3 : .unwrap();
855 :
856 3 : storage
857 3 : .download(
858 3 : &upload_target,
859 3 : &DownloadOpts {
860 3 : byte_start: Bound::Included(10),
861 3 : byte_end: Bound::Excluded(10),
862 3 : ..Default::default()
863 3 : },
864 3 : &cancel,
865 3 : )
866 3 : .await
867 3 : .unwrap();
868 3 : }
869 :
870 : #[tokio::test]
871 3 : async fn delete_file() -> anyhow::Result<()> {
872 3 : let (storage, cancel) = create_storage()?;
873 3 : let upload_name = "upload_1";
874 3 : let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
875 :
876 3 : storage.delete(&upload_target, &cancel).await?;
877 3 : assert!(storage.list_all().await?.is_empty());
878 :
879 3 : storage
880 3 : .delete(&upload_target, &cancel)
881 3 : .await
882 3 : .expect("Should allow deleting non-existing storage files");
883 :
884 6 : Ok(())
885 3 : }
886 :
887 : #[tokio::test]
888 3 : async fn file_with_metadata() -> anyhow::Result<()> {
889 3 : let (storage, cancel) = create_storage()?;
890 3 : let upload_name = "upload_1";
891 3 : let metadata = StorageMetadata(HashMap::from([
892 3 : ("one".to_string(), "1".to_string()),
893 3 : ("two".to_string(), "2".to_string()),
894 3 : ]));
895 3 : let upload_target =
896 3 : upload_dummy_file(&storage, upload_name, Some(metadata.clone()), &cancel).await?;
897 :
898 3 : let full_range_download_contents =
899 3 : read_and_check_metadata(&storage, &upload_target, Some(&metadata)).await?;
900 3 : assert_eq!(
901 3 : dummy_contents(upload_name),
902 : full_range_download_contents,
903 0 : "We should upload and download the same contents"
904 : );
905 :
906 3 : let uploaded_bytes = dummy_contents(upload_name).into_bytes();
907 3 : let (first_part_local, _) = uploaded_bytes.split_at(3);
908 :
909 3 : let partial_download_with_metadata = storage
910 3 : .download(
911 3 : &upload_target,
912 3 : &DownloadOpts {
913 3 : byte_end: Bound::Excluded(first_part_local.len() as u64),
914 3 : ..Default::default()
915 3 : },
916 3 : &cancel,
917 3 : )
918 3 : .await?;
919 3 : let first_part_remote = aggregate(partial_download_with_metadata.download_stream).await?;
920 3 : assert_eq!(
921 : first_part_local,
922 3 : first_part_remote.as_slice(),
923 0 : "First part bytes should be returned when requested"
924 : );
925 :
926 3 : assert_eq!(
927 : partial_download_with_metadata.metadata,
928 3 : Some(metadata),
929 0 : "We should get the same metadata back for partial download"
930 : );
931 :
932 6 : Ok(())
933 3 : }
934 :
935 : #[tokio::test]
936 3 : async fn list() -> anyhow::Result<()> {
937 : // No delimiter: should recursively list everything
938 3 : let (storage, cancel) = create_storage()?;
939 3 : let child = upload_dummy_file(&storage, "grandparent/parent/child", None, &cancel).await?;
940 3 : let child_sibling =
941 3 : upload_dummy_file(&storage, "grandparent/parent/child_sibling", None, &cancel).await?;
942 3 : let uncle = upload_dummy_file(&storage, "grandparent/uncle", None, &cancel).await?;
943 :
944 3 : let listing = storage
945 3 : .list(None, ListingMode::NoDelimiter, None, &cancel)
946 3 : .await?;
947 3 : assert!(listing.prefixes.is_empty());
948 3 : assert_eq!(
949 3 : listing
950 3 : .keys
951 3 : .into_iter()
952 3 : .map(|o| o.key)
953 3 : .collect::<HashSet<_>>(),
954 3 : HashSet::from([uncle.clone(), child.clone(), child_sibling.clone()])
955 : );
956 :
957 : // Delimiter: should only go one deep
958 3 : let listing = storage
959 3 : .list(None, ListingMode::WithDelimiter, None, &cancel)
960 3 : .await?;
961 :
962 3 : assert_eq!(
963 : listing.prefixes,
964 3 : [RemotePath::from_string("timelines").unwrap()].to_vec()
965 : );
966 3 : assert!(listing.keys.is_empty());
967 :
968 : // Delimiter & prefix with a trailing slash
969 3 : let listing = storage
970 3 : .list(
971 3 : Some(&RemotePath::from_string("timelines/some_timeline/grandparent/").unwrap()),
972 3 : ListingMode::WithDelimiter,
973 3 : None,
974 3 : &cancel,
975 3 : )
976 3 : .await?;
977 3 : assert_eq!(
978 3 : listing.keys.into_iter().map(|o| o.key).collect::<Vec<_>>(),
979 3 : [RemotePath::from_string("uncle").unwrap()].to_vec()
980 : );
981 3 : assert_eq!(
982 : listing.prefixes,
983 3 : [RemotePath::from_string("parent").unwrap()].to_vec()
984 : );
985 :
986 : // Delimiter and prefix without a trailing slash
987 3 : let listing = storage
988 3 : .list(
989 3 : Some(&RemotePath::from_string("timelines/some_timeline/grandparent").unwrap()),
990 3 : ListingMode::WithDelimiter,
991 3 : None,
992 3 : &cancel,
993 3 : )
994 3 : .await?;
995 3 : assert_eq!(listing.keys, vec![]);
996 3 : assert_eq!(
997 : listing.prefixes,
998 3 : [RemotePath::from_string("grandparent").unwrap()].to_vec()
999 : );
1000 :
1001 : // Delimiter and prefix that's partway through a path component
1002 3 : let listing = storage
1003 3 : .list(
1004 3 : Some(&RemotePath::from_string("timelines/some_timeline/grandp").unwrap()),
1005 3 : ListingMode::WithDelimiter,
1006 3 : None,
1007 3 : &cancel,
1008 3 : )
1009 3 : .await?;
1010 3 : assert_eq!(listing.keys, vec![]);
1011 3 : assert_eq!(
1012 : listing.prefixes,
1013 3 : [RemotePath::from_string("grandparent").unwrap()].to_vec()
1014 : );
1015 :
1016 6 : Ok(())
1017 3 : }
1018 :
1019 : #[tokio::test]
1020 3 : async fn list_part_component() -> anyhow::Result<()> {
1021 : // No delimiter: should recursively list everything
1022 3 : let (storage, cancel) = create_storage()?;
1023 :
1024 : // Imitates what happens in a tenant path when we have an unsharded path and a sharded path, and do a listing
1025 : // of the unsharded path: although there is a "directory" at the unsharded path, it should be handled as
1026 : // a freeform prefix.
1027 3 : let _child_a =
1028 3 : upload_dummy_file(&storage, "grandparent/tenant-01/child", None, &cancel).await?;
1029 3 : let _child_b =
1030 3 : upload_dummy_file(&storage, "grandparent/tenant/child", None, &cancel).await?;
1031 :
1032 : // Delimiter and prefix that's partway through a path component
1033 3 : let listing = storage
1034 3 : .list(
1035 3 : Some(
1036 3 : &RemotePath::from_string("timelines/some_timeline/grandparent/tenant").unwrap(),
1037 3 : ),
1038 3 : ListingMode::WithDelimiter,
1039 3 : None,
1040 3 : &cancel,
1041 3 : )
1042 3 : .await?;
1043 3 : assert_eq!(listing.keys, vec![]);
1044 :
1045 3 : let mut found_prefixes = listing.prefixes.clone();
1046 3 : found_prefixes.sort();
1047 3 : assert_eq!(
1048 : found_prefixes,
1049 3 : [
1050 3 : RemotePath::from_string("tenant").unwrap(),
1051 3 : RemotePath::from_string("tenant-01").unwrap(),
1052 3 : ]
1053 3 : .to_vec()
1054 : );
1055 :
1056 6 : Ok(())
1057 3 : }
1058 :
1059 : #[tokio::test]
1060 3 : async fn overwrite_shorter_file() -> anyhow::Result<()> {
1061 3 : let (storage, cancel) = create_storage()?;
1062 :
1063 3 : let path = RemotePath::new("does/not/matter/file".into())?;
1064 :
1065 3 : let body = Bytes::from_static(b"long file contents is long");
1066 : {
1067 3 : let len = body.len();
1068 3 : let body =
1069 3 : futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
1070 3 : storage.upload(body, len, &path, None, &cancel).await?;
1071 : }
1072 :
1073 3 : let read = aggregate(
1074 3 : storage
1075 3 : .download(&path, &DownloadOpts::default(), &cancel)
1076 3 : .await?
1077 : .download_stream,
1078 : )
1079 3 : .await?;
1080 3 : assert_eq!(body, read);
1081 :
1082 3 : let shorter = Bytes::from_static(b"shorter body");
1083 : {
1084 3 : let len = shorter.len();
1085 3 : let body =
1086 3 : futures::stream::once(futures::future::ready(std::io::Result::Ok(shorter.clone())));
1087 3 : storage.upload(body, len, &path, None, &cancel).await?;
1088 : }
1089 :
1090 3 : let read = aggregate(
1091 3 : storage
1092 3 : .download(&path, &DownloadOpts::default(), &cancel)
1093 3 : .await?
1094 : .download_stream,
1095 : )
1096 3 : .await?;
1097 3 : assert_eq!(shorter, read);
1098 6 : Ok(())
1099 3 : }
1100 :
1101 : #[tokio::test]
1102 3 : async fn cancelled_upload_can_later_be_retried() -> anyhow::Result<()> {
1103 3 : let (storage, cancel) = create_storage()?;
1104 :
1105 3 : let path = RemotePath::new("does/not/matter/file".into())?;
1106 :
1107 3 : let body = Bytes::from_static(b"long file contents is long");
1108 : {
1109 3 : let len = body.len();
1110 3 : let body =
1111 3 : futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
1112 3 : let cancel = cancel.child_token();
1113 3 : cancel.cancel();
1114 3 : let e = storage
1115 3 : .upload(body, len, &path, None, &cancel)
1116 3 : .await
1117 3 : .unwrap_err();
1118 :
1119 3 : assert!(TimeoutOrCancel::caused_by_cancel(&e));
1120 : }
1121 :
1122 : {
1123 3 : let len = body.len();
1124 3 : let body =
1125 3 : futures::stream::once(futures::future::ready(std::io::Result::Ok(body.clone())));
1126 3 : storage.upload(body, len, &path, None, &cancel).await?;
1127 : }
1128 :
1129 3 : let read = aggregate(
1130 3 : storage
1131 3 : .download(&path, &DownloadOpts::default(), &cancel)
1132 3 : .await?
1133 : .download_stream,
1134 : )
1135 3 : .await?;
1136 3 : assert_eq!(body, read);
1137 :
1138 6 : Ok(())
1139 3 : }
1140 :
1141 36 : async fn upload_dummy_file(
1142 36 : storage: &LocalFs,
1143 36 : name: &str,
1144 36 : metadata: Option<StorageMetadata>,
1145 36 : cancel: &CancellationToken,
1146 36 : ) -> anyhow::Result<RemotePath> {
1147 36 : let from_path = storage
1148 36 : .storage_root
1149 36 : .join("timelines")
1150 36 : .join("some_timeline")
1151 36 : .join(name);
1152 36 : let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
1153 :
1154 36 : let relative_path = from_path
1155 36 : .strip_prefix(&storage.storage_root)
1156 36 : .context("Failed to strip storage root prefix")
1157 36 : .and_then(RemotePath::new)
1158 36 : .with_context(|| {
1159 0 : format!(
1160 0 : "Failed to resolve remote part of path {:?} for base {:?}",
1161 : from_path, storage.storage_root
1162 : )
1163 0 : })?;
1164 :
1165 36 : let file = tokio_util::io::ReaderStream::new(file);
1166 :
1167 36 : storage
1168 36 : .upload(file, size, &relative_path, metadata, cancel)
1169 36 : .await?;
1170 36 : Ok(relative_path)
1171 36 : }
1172 :
1173 36 : async fn create_file_for_upload(
1174 36 : path: &Utf8Path,
1175 36 : contents: &str,
1176 36 : ) -> anyhow::Result<(fs::File, usize)> {
1177 36 : std::fs::create_dir_all(path.parent().unwrap())?;
1178 36 : let mut file_for_writing = std::fs::OpenOptions::new()
1179 36 : .write(true)
1180 36 : .create_new(true)
1181 36 : .open(path)?;
1182 36 : write!(file_for_writing, "{contents}")?;
1183 36 : drop(file_for_writing);
1184 36 : let file_size = path.metadata()?.len() as usize;
1185 : Ok((
1186 36 : fs::OpenOptions::new().read(true).open(&path).await?,
1187 36 : file_size,
1188 : ))
1189 36 : }
1190 :
1191 54 : fn dummy_contents(name: &str) -> String {
1192 54 : format!("contents for {name}")
1193 54 : }
1194 :
1195 3 : async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {
1196 3 : let mut files = storage.list_all().await?;
1197 3 : files.sort_by(|a, b| a.0.cmp(&b.0));
1198 3 : Ok(files)
1199 3 : }
1200 :
1201 33 : async fn aggregate(
1202 33 : stream: impl Stream<Item = std::io::Result<Bytes>>,
1203 33 : ) -> anyhow::Result<Vec<u8>> {
1204 : use futures::stream::StreamExt;
1205 33 : let mut out = Vec::new();
1206 33 : let mut stream = std::pin::pin!(stream);
1207 66 : while let Some(res) = stream.next().await {
1208 33 : out.extend_from_slice(&res?[..]);
1209 : }
1210 33 : Ok(out)
1211 33 : }
1212 : }
|