Line data Source code
1 : use std::collections::HashSet;
2 : use std::ops::ControlFlow;
3 : use std::path::PathBuf;
4 : use std::sync::Arc;
5 :
6 : use anyhow::Context;
7 : use bytes::Bytes;
8 : use camino::Utf8Path;
9 : use futures::stream::Stream;
10 : use once_cell::sync::OnceCell;
11 : use remote_storage::{Download, GenericRemoteStorage, RemotePath};
12 : use tokio::task::JoinSet;
13 : use tokio_util::sync::CancellationToken;
14 : use tracing::{debug, error, info};
15 :
16 : static LOGGING_DONE: OnceCell<()> = OnceCell::new();
17 :
18 0 : pub(crate) fn upload_stream(
19 0 : content: std::borrow::Cow<'static, [u8]>,
20 0 : ) -> (
21 0 : impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
22 0 : usize,
23 0 : ) {
24 : use std::borrow::Cow;
25 :
26 0 : let content = match content {
27 0 : Cow::Borrowed(x) => Bytes::from_static(x),
28 0 : Cow::Owned(vec) => Bytes::from(vec),
29 : };
30 0 : wrap_stream(content)
31 0 : }
32 :
33 0 : pub(crate) fn wrap_stream(
34 0 : content: bytes::Bytes,
35 0 : ) -> (
36 0 : impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
37 0 : usize,
38 0 : ) {
39 0 : let len = content.len();
40 0 : let content = futures::future::ready(Ok(content));
41 0 :
42 0 : (futures::stream::once(content), len)
43 0 : }
44 :
45 0 : pub(crate) async fn download_to_vec(dl: Download) -> anyhow::Result<Vec<u8>> {
46 0 : let mut buf = Vec::new();
47 0 : tokio::io::copy_buf(
48 0 : &mut tokio_util::io::StreamReader::new(dl.download_stream),
49 0 : &mut buf,
50 0 : )
51 0 : .await?;
52 0 : Ok(buf)
53 0 : }
54 :
55 : // Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
56 0 : pub(crate) async fn upload_simple_remote_data(
57 0 : client: &Arc<GenericRemoteStorage>,
58 0 : upload_tasks_count: usize,
59 0 : ) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
60 0 : info!("Creating {upload_tasks_count} remote files");
61 0 : let mut upload_tasks = JoinSet::new();
62 0 : let cancel = CancellationToken::new();
63 :
64 0 : for i in 1..upload_tasks_count + 1 {
65 0 : let task_client = Arc::clone(client);
66 0 : let cancel = cancel.clone();
67 0 :
68 0 : upload_tasks.spawn(async move {
69 0 : let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
70 0 : let blob_path = RemotePath::new(
71 0 : Utf8Path::from_path(blob_path.as_path()).expect("must be valid blob path"),
72 0 : )
73 0 : .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
74 0 : debug!("Creating remote item {i} at path {blob_path:?}");
75 :
76 0 : let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
77 0 : task_client
78 0 : .upload(data, len, &blob_path, None, &cancel)
79 0 : .await?;
80 :
81 0 : Ok::<_, anyhow::Error>(blob_path)
82 0 : });
83 0 : }
84 :
85 0 : let mut upload_tasks_failed = false;
86 0 : let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
87 0 : while let Some(task_run_result) = upload_tasks.join_next().await {
88 0 : match task_run_result
89 0 : .context("task join failed")
90 0 : .and_then(|task_result| task_result.context("upload task failed"))
91 : {
92 0 : Ok(upload_path) => {
93 0 : uploaded_blobs.insert(upload_path);
94 0 : }
95 0 : Err(e) => {
96 0 : error!("Upload task failed: {e:?}");
97 0 : upload_tasks_failed = true;
98 : }
99 : }
100 : }
101 :
102 0 : if upload_tasks_failed {
103 0 : ControlFlow::Break(uploaded_blobs)
104 : } else {
105 0 : ControlFlow::Continue(uploaded_blobs)
106 : }
107 0 : }
108 :
109 0 : pub(crate) async fn cleanup(
110 0 : client: &Arc<GenericRemoteStorage>,
111 0 : objects_to_delete: HashSet<RemotePath>,
112 0 : ) {
113 0 : info!(
114 0 : "Removing {} objects from the remote storage during cleanup",
115 0 : objects_to_delete.len()
116 0 : );
117 0 : let cancel = CancellationToken::new();
118 0 : let mut delete_tasks = JoinSet::new();
119 0 : for object_to_delete in objects_to_delete {
120 0 : let task_client = Arc::clone(client);
121 0 : let cancel = cancel.clone();
122 0 : delete_tasks.spawn(async move {
123 0 : debug!("Deleting remote item at path {object_to_delete:?}");
124 0 : task_client
125 0 : .delete(&object_to_delete, &cancel)
126 0 : .await
127 0 : .with_context(|| format!("{object_to_delete:?} removal"))
128 0 : });
129 0 : }
130 :
131 0 : while let Some(task_run_result) = delete_tasks.join_next().await {
132 0 : match task_run_result {
133 0 : Ok(task_result) => match task_result {
134 0 : Ok(()) => {}
135 0 : Err(e) => error!("Delete task failed: {e:?}"),
136 : },
137 0 : Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
138 : }
139 : }
140 0 : }
141 : pub(crate) struct Uploads {
142 : pub(crate) prefixes: HashSet<RemotePath>,
143 : pub(crate) blobs: HashSet<RemotePath>,
144 : }
145 :
146 0 : pub(crate) async fn upload_remote_data(
147 0 : client: &Arc<GenericRemoteStorage>,
148 0 : base_prefix_str: &'static str,
149 0 : upload_tasks_count: usize,
150 0 : ) -> ControlFlow<Uploads, Uploads> {
151 0 : info!("Creating {upload_tasks_count} remote files");
152 0 : let mut upload_tasks = JoinSet::new();
153 0 : let cancel = CancellationToken::new();
154 :
155 0 : for i in 1..upload_tasks_count + 1 {
156 0 : let task_client = Arc::clone(client);
157 0 : let cancel = cancel.clone();
158 0 :
159 0 : upload_tasks.spawn(async move {
160 0 : let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
161 0 : let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
162 0 : .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
163 0 : let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
164 0 : debug!("Creating remote item {i} at path {blob_path:?}");
165 :
166 0 : let (data, data_len) =
167 0 : upload_stream(format!("remote blob data {i}").into_bytes().into());
168 0 : task_client
169 0 : .upload(data, data_len, &blob_path, None, &cancel)
170 0 : .await?;
171 :
172 0 : Ok::<_, anyhow::Error>((blob_prefix, blob_path))
173 0 : });
174 0 : }
175 :
176 0 : let mut upload_tasks_failed = false;
177 0 : let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
178 0 : let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
179 0 : while let Some(task_run_result) = upload_tasks.join_next().await {
180 0 : match task_run_result
181 0 : .context("task join failed")
182 0 : .and_then(|task_result| task_result.context("upload task failed"))
183 : {
184 0 : Ok((upload_prefix, upload_path)) => {
185 0 : uploaded_prefixes.insert(upload_prefix);
186 0 : uploaded_blobs.insert(upload_path);
187 0 : }
188 0 : Err(e) => {
189 0 : error!("Upload task failed: {e:?}");
190 0 : upload_tasks_failed = true;
191 : }
192 : }
193 : }
194 :
195 0 : let uploads = Uploads {
196 0 : prefixes: uploaded_prefixes,
197 0 : blobs: uploaded_blobs,
198 0 : };
199 0 : if upload_tasks_failed {
200 0 : ControlFlow::Break(uploads)
201 : } else {
202 0 : ControlFlow::Continue(uploads)
203 : }
204 0 : }
205 :
206 30 : pub(crate) fn ensure_logging_ready() {
207 30 : LOGGING_DONE.get_or_init(|| {
208 30 : utils::logging::init(
209 30 : utils::logging::LogFormat::Test,
210 30 : utils::logging::TracingErrorLayerEnablement::Disabled,
211 30 : utils::logging::Output::Stdout,
212 30 : )
213 30 : .expect("logging init failed");
214 30 : });
215 30 : }
|