Line data Source code
1 : use std::collections::HashSet;
2 : use std::ops::ControlFlow;
3 : use std::path::PathBuf;
4 : use std::sync::Arc;
5 :
6 : use anyhow::Context;
7 : use bytes::Bytes;
8 : use camino::Utf8Path;
9 : use futures::stream::Stream;
10 : use once_cell::sync::OnceCell;
11 : use remote_storage::{Download, GenericRemoteStorage, RemotePath};
12 : use tokio::task::JoinSet;
13 : use tokio_util::sync::CancellationToken;
14 : use tracing::{debug, error, info};
15 :
16 : static LOGGING_DONE: OnceCell<()> = OnceCell::new();
17 :
18 143 : pub(crate) fn upload_stream(
19 143 : content: std::borrow::Cow<'static, [u8]>,
20 143 : ) -> (
21 143 : impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
22 143 : usize,
23 143 : ) {
24 : use std::borrow::Cow;
25 :
26 143 : let content = match content {
27 17 : Cow::Borrowed(x) => Bytes::from_static(x),
28 126 : Cow::Owned(vec) => Bytes::from(vec),
29 : };
30 143 : wrap_stream(content)
31 143 : }
32 :
33 149 : pub(crate) fn wrap_stream(
34 149 : content: bytes::Bytes,
35 149 : ) -> (
36 149 : impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
37 149 : usize,
38 149 : ) {
39 149 : let len = content.len();
40 149 : let content = futures::future::ready(Ok(content));
41 149 :
42 149 : (futures::stream::once(content), len)
43 149 : }
44 :
45 25 : pub(crate) async fn download_to_vec(dl: Download) -> anyhow::Result<Vec<u8>> {
46 25 : let mut buf = Vec::new();
47 25 : tokio::io::copy_buf(
48 25 : &mut tokio_util::io::StreamReader::new(dl.download_stream),
49 25 : &mut buf,
50 25 : )
51 8 : .await?;
52 25 : Ok(buf)
53 25 : }
54 :
55 : // Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
56 3 : pub(crate) async fn upload_simple_remote_data(
57 3 : client: &Arc<GenericRemoteStorage>,
58 3 : upload_tasks_count: usize,
59 3 : ) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
60 3 : info!("Creating {upload_tasks_count} remote files");
61 3 : let mut upload_tasks = JoinSet::new();
62 3 : let cancel = CancellationToken::new();
63 :
64 63 : for i in 1..upload_tasks_count + 1 {
65 63 : let task_client = Arc::clone(client);
66 63 : let cancel = cancel.clone();
67 63 :
68 63 : upload_tasks.spawn(async move {
69 63 : let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
70 63 : let blob_path = RemotePath::new(
71 63 : Utf8Path::from_path(blob_path.as_path()).expect("must be valid blob path"),
72 63 : )
73 63 : .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
74 63 : debug!("Creating remote item {i} at path {blob_path:?}");
75 :
76 63 : let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
77 63 : task_client
78 63 : .upload(data, len, &blob_path, None, &cancel)
79 349 : .await?;
80 :
81 63 : Ok::<_, anyhow::Error>(blob_path)
82 63 : });
83 63 : }
84 :
85 3 : let mut upload_tasks_failed = false;
86 3 : let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
87 66 : while let Some(task_run_result) = upload_tasks.join_next().await {
88 63 : match task_run_result
89 63 : .context("task join failed")
90 63 : .and_then(|task_result| task_result.context("upload task failed"))
91 : {
92 63 : Ok(upload_path) => {
93 63 : uploaded_blobs.insert(upload_path);
94 63 : }
95 0 : Err(e) => {
96 0 : error!("Upload task failed: {e:?}");
97 0 : upload_tasks_failed = true;
98 : }
99 : }
100 : }
101 :
102 3 : if upload_tasks_failed {
103 0 : ControlFlow::Break(uploaded_blobs)
104 : } else {
105 3 : ControlFlow::Continue(uploaded_blobs)
106 : }
107 3 : }
108 :
109 6 : pub(crate) async fn cleanup(
110 6 : client: &Arc<GenericRemoteStorage>,
111 6 : objects_to_delete: HashSet<RemotePath>,
112 6 : ) {
113 6 : info!(
114 0 : "Removing {} objects from the remote storage during cleanup",
115 0 : objects_to_delete.len()
116 : );
117 6 : let cancel = CancellationToken::new();
118 6 : let mut delete_tasks = JoinSet::new();
119 132 : for object_to_delete in objects_to_delete {
120 126 : let task_client = Arc::clone(client);
121 126 : let cancel = cancel.clone();
122 126 : delete_tasks.spawn(async move {
123 126 : debug!("Deleting remote item at path {object_to_delete:?}");
124 126 : task_client
125 126 : .delete(&object_to_delete, &cancel)
126 466 : .await
127 126 : .with_context(|| format!("{object_to_delete:?} removal"))
128 126 : });
129 126 : }
130 :
131 132 : while let Some(task_run_result) = delete_tasks.join_next().await {
132 126 : match task_run_result {
133 126 : Ok(task_result) => match task_result {
134 126 : Ok(()) => {}
135 0 : Err(e) => error!("Delete task failed: {e:?}"),
136 : },
137 0 : Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
138 : }
139 : }
140 6 : }
141 : pub(crate) struct Uploads {
142 : pub(crate) prefixes: HashSet<RemotePath>,
143 : pub(crate) blobs: HashSet<RemotePath>,
144 : }
145 :
146 3 : pub(crate) async fn upload_remote_data(
147 3 : client: &Arc<GenericRemoteStorage>,
148 3 : base_prefix_str: &'static str,
149 3 : upload_tasks_count: usize,
150 3 : ) -> ControlFlow<Uploads, Uploads> {
151 3 : info!("Creating {upload_tasks_count} remote files");
152 3 : let mut upload_tasks = JoinSet::new();
153 3 : let cancel = CancellationToken::new();
154 :
155 63 : for i in 1..=upload_tasks_count {
156 63 : let task_client = Arc::clone(client);
157 63 : let cancel = cancel.clone();
158 63 :
159 63 : upload_tasks.spawn(async move {
160 63 : let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
161 63 : let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
162 63 : .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
163 63 : let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
164 63 : debug!("Creating remote item {i} at path {blob_path:?}");
165 :
166 63 : let (data, data_len) =
167 63 : upload_stream(format!("remote blob data {i}").into_bytes().into());
168 63 : task_client
169 63 : .upload(data, data_len, &blob_path, None, &cancel)
170 338 : .await?;
171 :
172 63 : Ok::<_, anyhow::Error>((blob_prefix, blob_path))
173 63 : });
174 63 : }
175 :
176 3 : let mut upload_tasks_failed = false;
177 3 : let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
178 3 : let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
179 66 : while let Some(task_run_result) = upload_tasks.join_next().await {
180 63 : match task_run_result
181 63 : .context("task join failed")
182 63 : .and_then(|task_result| task_result.context("upload task failed"))
183 : {
184 63 : Ok((upload_prefix, upload_path)) => {
185 63 : uploaded_prefixes.insert(upload_prefix);
186 63 : uploaded_blobs.insert(upload_path);
187 63 : }
188 0 : Err(e) => {
189 0 : error!("Upload task failed: {e:?}");
190 0 : upload_tasks_failed = true;
191 : }
192 : }
193 : }
194 :
195 3 : let uploads = Uploads {
196 3 : prefixes: uploaded_prefixes,
197 3 : blobs: uploaded_blobs,
198 3 : };
199 3 : if upload_tasks_failed {
200 0 : ControlFlow::Break(uploads)
201 : } else {
202 3 : ControlFlow::Continue(uploads)
203 : }
204 3 : }
205 :
206 45 : pub(crate) fn ensure_logging_ready() {
207 45 : LOGGING_DONE.get_or_init(|| {
208 45 : utils::logging::init(
209 45 : utils::logging::LogFormat::Test,
210 45 : utils::logging::TracingErrorLayerEnablement::Disabled,
211 45 : utils::logging::Output::Stdout,
212 45 : )
213 45 : .expect("logging init failed");
214 45 : });
215 45 : }
|