Line data Source code
1 : use std::sync::Arc;
2 :
3 : use pageserver_api::models::{TenantState, TimelineState};
4 :
5 : use super::Timeline;
6 : use super::delete::{DeletionGuard, delete_local_timeline_directory};
7 : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
8 : use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
9 : use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
10 : use crate::tenant::{
11 : DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,
12 : };
13 :
14 : #[derive(thiserror::Error, Debug)]
15 : pub(crate) enum OffloadError {
16 : #[error("Cancelled")]
17 : Cancelled,
18 : #[error("Timeline is not archived")]
19 : NotArchived,
20 : #[error("Offload or deletion already in progress")]
21 : AlreadyInProgress,
22 : #[error("Unexpected offload error: {0}")]
23 : Other(anyhow::Error),
24 : }
25 :
26 : impl From<TenantManifestError> for OffloadError {
27 0 : fn from(e: TenantManifestError) -> Self {
28 0 : match e {
29 0 : TenantManifestError::Cancelled => Self::Cancelled,
30 0 : TenantManifestError::RemoteStorage(e) => Self::Other(e),
31 : }
32 0 : }
33 : }
34 :
35 1 : pub(crate) async fn offload_timeline(
36 1 : tenant: &TenantShard,
37 1 : timeline: &Arc<Timeline>,
38 1 : ) -> Result<(), OffloadError> {
39 1 : debug_assert_current_span_has_tenant_and_timeline_id();
40 1 : tracing::info!("offloading archived timeline");
41 :
42 1 : let delete_guard_res = make_timeline_delete_guard(
43 1 : tenant,
44 1 : timeline.timeline_id,
45 1 : TimelineDeleteGuardKind::Offload,
46 : );
47 1 : let (timeline, guard) = match delete_guard_res {
48 1 : Ok(timeline_and_guard) => timeline_and_guard,
49 0 : Err(DeleteTimelineError::HasChildren(children)) => {
50 0 : let is_archived = timeline.is_archived();
51 0 : if is_archived == Some(true) {
52 0 : tracing::error!("timeline is archived but has non-archived children: {children:?}");
53 0 : return Err(OffloadError::NotArchived);
54 0 : }
55 0 : tracing::info!(
56 : ?is_archived,
57 0 : "timeline is not archived and has unarchived children"
58 : );
59 0 : return Err(OffloadError::NotArchived);
60 : }
61 : Err(DeleteTimelineError::AlreadyInProgress(_)) => {
62 0 : tracing::info!("timeline offload or deletion already in progress");
63 0 : return Err(OffloadError::AlreadyInProgress);
64 : }
65 0 : Err(e) => return Err(OffloadError::Other(anyhow::anyhow!(e))),
66 : };
67 :
68 1 : let TimelineOrOffloaded::Timeline(timeline) = timeline else {
69 0 : tracing::error!("timeline already offloaded, but given timeline object");
70 0 : return Ok(());
71 : };
72 :
73 1 : match timeline.remote_client.shutdown_if_archived().await {
74 1 : Ok(()) => {}
75 0 : Err(ShutdownIfArchivedError::NotInitialized(_)) => {
76 0 : // Either the timeline is being deleted, the operation is being retried, or we are shutting down.
77 0 : // Don't return cancelled here to keep it idempotent.
78 0 : }
79 0 : Err(ShutdownIfArchivedError::NotArchived) => return Err(OffloadError::NotArchived),
80 : }
81 1 : timeline.set_state(TimelineState::Stopping);
82 :
83 : // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
84 1 : timeline.shutdown(super::ShutdownMode::Reload).await;
85 :
86 : // TODO extend guard mechanism above with method
87 : // to make deletions possible while offloading is in progress
88 :
89 1 : let conf = &tenant.conf;
90 1 : delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
91 :
92 1 : let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);
93 :
94 : {
95 1 : let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
96 1 : if matches!(
97 1 : tenant.current_state(),
98 : TenantState::Stopping { .. } | TenantState::Broken { .. }
99 : ) {
100 : // Cancel the operation if the tenant is shutting down. Do this while the
101 : // timelines_offloaded lock is held to prevent a race with Tenant::shutdown
102 : // for defusing the lock
103 0 : return Err(OffloadError::Cancelled);
104 1 : }
105 1 : offloaded_timelines.insert(
106 1 : timeline.timeline_id,
107 1 : Arc::new(
108 1 : OffloadedTimeline::from_timeline(&timeline)
109 1 : .expect("we checked above that timeline was ready"),
110 1 : ),
111 1 : );
112 : }
113 :
114 : // Last step: mark timeline as offloaded in S3
115 : // TODO: maybe move this step above, right above deletion of the local timeline directory,
116 : // then there is no potential race condition where we partially offload a timeline, and
117 : // at the next restart attach it again.
118 : // For that to happen, we'd need to make the manifest reflect our *intended* state,
119 : // not our actual state of offloaded timelines.
120 1 : tenant.maybe_upload_tenant_manifest().await?;
121 :
122 1 : tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
123 :
124 1 : Ok(())
125 1 : }
126 :
127 : /// It is important that this gets called when DeletionGuard is being held.
128 : /// For more context see comments in [`make_timeline_delete_guard`]
129 : ///
130 : /// Returns the strong count of the timeline `Arc`
131 1 : fn remove_timeline_from_tenant(
132 1 : tenant: &TenantShard,
133 1 : timeline: &Timeline,
134 1 : _: &DeletionGuard, // using it as a witness
135 1 : ) -> usize {
136 : // Remove the timeline from the map.
137 1 : let mut timelines = tenant.timelines.lock().unwrap();
138 1 : let children_exist = timelines
139 1 : .iter()
140 2 : .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
141 : // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
142 : // We already deleted the layer files, so it's probably best to panic.
143 : // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
144 1 : if children_exist {
145 0 : panic!("Timeline grew children while we removed layer files");
146 1 : }
147 :
148 1 : let timeline = timelines
149 1 : .remove(&timeline.timeline_id)
150 1 : .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
151 :
152 : // Clear the compaction queue for this timeline
153 1 : tenant
154 1 : .scheduled_compaction_tasks
155 1 : .lock()
156 1 : .unwrap()
157 1 : .remove(&timeline.timeline_id);
158 :
159 1 : Arc::strong_count(&timeline)
160 1 : }
|