Line data Source code
1 : use std::sync::Arc;
2 :
3 : use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
4 : use super::Timeline;
5 : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
6 : use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
7 :
8 0 : #[derive(thiserror::Error, Debug)]
9 : pub(crate) enum OffloadError {
10 : #[error("Cancelled")]
11 : Cancelled,
12 : #[error("Timeline is not archived")]
13 : NotArchived,
14 : #[error(transparent)]
15 : RemoteStorage(anyhow::Error),
16 : #[error("Unexpected offload error: {0}")]
17 : Other(anyhow::Error),
18 : }
19 :
20 : impl From<TenantManifestError> for OffloadError {
21 0 : fn from(e: TenantManifestError) -> Self {
22 0 : match e {
23 0 : TenantManifestError::Cancelled => Self::Cancelled,
24 0 : TenantManifestError::RemoteStorage(e) => Self::RemoteStorage(e),
25 : }
26 0 : }
27 : }
28 :
29 0 : pub(crate) async fn offload_timeline(
30 0 : tenant: &Tenant,
31 0 : timeline: &Arc<Timeline>,
32 0 : ) -> Result<(), OffloadError> {
33 0 : debug_assert_current_span_has_tenant_and_timeline_id();
34 0 : tracing::info!("offloading archived timeline");
35 :
36 0 : let allow_offloaded_children = true;
37 0 : let (timeline, guard) =
38 0 : DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)
39 0 : .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
40 :
41 0 : let TimelineOrOffloaded::Timeline(timeline) = timeline else {
42 0 : tracing::error!("timeline already offloaded, but given timeline object");
43 0 : return Ok(());
44 : };
45 :
46 0 : let is_archived = timeline.is_archived();
47 0 : match is_archived {
48 0 : Some(true) => (),
49 : Some(false) => {
50 0 : tracing::warn!("tried offloading a non-archived timeline");
51 0 : return Err(OffloadError::NotArchived);
52 : }
53 : None => {
54 : // This is legal: calls to this function can race with the timeline shutting down
55 0 : tracing::info!("tried offloading a timeline whose remote storage is not initialized");
56 0 : return Err(OffloadError::Cancelled);
57 : }
58 : }
59 :
60 : // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
61 0 : timeline.shutdown(super::ShutdownMode::Flush).await;
62 :
63 : // TODO extend guard mechanism above with method
64 : // to make deletions possible while offloading is in progress
65 :
66 0 : let conf = &tenant.conf;
67 0 : delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
68 :
69 0 : remove_timeline_from_tenant(tenant, &timeline, &guard);
70 0 :
71 0 : {
72 0 : let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
73 0 : offloaded_timelines.insert(
74 0 : timeline.timeline_id,
75 0 : Arc::new(
76 0 : OffloadedTimeline::from_timeline(&timeline)
77 0 : .expect("we checked above that timeline was ready"),
78 0 : ),
79 0 : );
80 0 : }
81 0 :
82 0 : // Last step: mark timeline as offloaded in S3
83 0 : // TODO: maybe move this step above, right above deletion of the local timeline directory,
84 0 : // then there is no potential race condition where we partially offload a timeline, and
85 0 : // at the next restart attach it again.
86 0 : // For that to happen, we'd need to make the manifest reflect our *intended* state,
87 0 : // not our actual state of offloaded timelines.
88 0 : tenant.store_tenant_manifest().await?;
89 :
90 0 : Ok(())
91 0 : }
92 :
93 : /// It is important that this gets called when DeletionGuard is being held.
94 : /// For more context see comments in [`DeleteTimelineFlow::prepare`]
95 0 : fn remove_timeline_from_tenant(
96 0 : tenant: &Tenant,
97 0 : timeline: &Timeline,
98 0 : _: &DeletionGuard, // using it as a witness
99 0 : ) {
100 0 : // Remove the timeline from the map.
101 0 : let mut timelines = tenant.timelines.lock().unwrap();
102 0 : let children_exist = timelines
103 0 : .iter()
104 0 : .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
105 0 : // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
106 0 : // We already deleted the layer files, so it's probably best to panic.
107 0 : // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
108 0 : if children_exist {
109 0 : panic!("Timeline grew children while we removed layer files");
110 0 : }
111 0 :
112 0 : timelines
113 0 : .remove(&timeline.timeline_id)
114 0 : .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
115 0 : }
|