Line data Source code
1 : use std::sync::Arc;
2 :
3 : use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
4 : use super::Timeline;
5 : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
6 : use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
7 :
8 0 : #[derive(thiserror::Error, Debug)]
9 : pub(crate) enum OffloadError {
10 : #[error("Cancelled")]
11 : Cancelled,
12 : #[error("Timeline is not archived")]
13 : NotArchived,
14 : #[error(transparent)]
15 : RemoteStorage(anyhow::Error),
16 : #[error("Unexpected offload error: {0}")]
17 : Other(anyhow::Error),
18 : }
19 :
20 : impl From<TenantManifestError> for OffloadError {
21 0 : fn from(e: TenantManifestError) -> Self {
22 0 : match e {
23 0 : TenantManifestError::Cancelled => Self::Cancelled,
24 0 : TenantManifestError::RemoteStorage(e) => Self::RemoteStorage(e),
25 : }
26 0 : }
27 : }
28 :
29 2 : pub(crate) async fn offload_timeline(
30 2 : tenant: &Tenant,
31 2 : timeline: &Arc<Timeline>,
32 2 : ) -> Result<(), OffloadError> {
33 2 : debug_assert_current_span_has_tenant_and_timeline_id();
34 2 : tracing::info!("offloading archived timeline");
35 :
36 2 : let allow_offloaded_children = true;
37 2 : let (timeline, guard) =
38 2 : DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)
39 2 : .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
40 :
41 2 : let TimelineOrOffloaded::Timeline(timeline) = timeline else {
42 0 : tracing::error!("timeline already offloaded, but given timeline object");
43 0 : return Ok(());
44 : };
45 :
46 2 : let is_archived = timeline.is_archived();
47 2 : match is_archived {
48 2 : Some(true) => (),
49 : Some(false) => {
50 0 : tracing::warn!("tried offloading a non-archived timeline");
51 0 : return Err(OffloadError::NotArchived);
52 : }
53 : None => {
54 : // This is legal: calls to this function can race with the timeline shutting down
55 0 : tracing::info!("tried offloading a timeline whose remote storage is not initialized");
56 0 : return Err(OffloadError::Cancelled);
57 : }
58 : }
59 :
60 : // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
61 2 : timeline.shutdown(super::ShutdownMode::Flush).await;
62 :
63 : // TODO extend guard mechanism above with method
64 : // to make deletions possible while offloading is in progress
65 :
66 2 : let conf = &tenant.conf;
67 6 : delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
68 :
69 2 : let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);
70 2 :
71 2 : {
72 2 : let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
73 2 : offloaded_timelines.insert(
74 2 : timeline.timeline_id,
75 2 : Arc::new(
76 2 : OffloadedTimeline::from_timeline(&timeline)
77 2 : .expect("we checked above that timeline was ready"),
78 2 : ),
79 2 : );
80 2 : }
81 2 :
82 2 : // Last step: mark timeline as offloaded in S3
83 2 : // TODO: maybe move this step above, right above deletion of the local timeline directory,
84 2 : // then there is no potential race condition where we partially offload a timeline, and
85 2 : // at the next restart attach it again.
86 2 : // For that to happen, we'd need to make the manifest reflect our *intended* state,
87 2 : // not our actual state of offloaded timelines.
88 8 : tenant.store_tenant_manifest().await?;
89 :
90 2 : tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
91 :
92 2 : Ok(())
93 2 : }
94 :
95 : /// It is important that this gets called when DeletionGuard is being held.
96 : /// For more context see comments in [`DeleteTimelineFlow::prepare`]
97 : ///
98 : /// Returns the strong count of the timeline `Arc`
99 2 : fn remove_timeline_from_tenant(
100 2 : tenant: &Tenant,
101 2 : timeline: &Timeline,
102 2 : _: &DeletionGuard, // using it as a witness
103 2 : ) -> usize {
104 2 : // Remove the timeline from the map.
105 2 : let mut timelines = tenant.timelines.lock().unwrap();
106 2 : let children_exist = timelines
107 2 : .iter()
108 4 : .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
109 2 : // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
110 2 : // We already deleted the layer files, so it's probably best to panic.
111 2 : // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
112 2 : if children_exist {
113 0 : panic!("Timeline grew children while we removed layer files");
114 2 : }
115 2 :
116 2 : let timeline = timelines
117 2 : .remove(&timeline.timeline_id)
118 2 : .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
119 2 :
120 2 : Arc::strong_count(&timeline)
121 2 : }
|