LCOV - code coverage report
Current view: top level - pageserver/src/tenant/timeline - offload.rs (source / functions) Coverage Total Hit
Test: ac1e0b9bf1b4ead74961174b01ba016322d3f9a6.info Lines: 68.3 % 82 56
Test Date: 2025-07-08 09:16:10 Functions: 80.0 % 5 4

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use pageserver_api::models::{TenantState, TimelineState};
       4              : 
       5              : use super::Timeline;
       6              : use super::delete::{DeletionGuard, delete_local_timeline_directory};
       7              : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
       8              : use crate::tenant::remote_timeline_client::ShutdownIfArchivedError;
       9              : use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard};
      10              : use crate::tenant::{
      11              :     DeleteTimelineError, OffloadedTimeline, TenantManifestError, TenantShard, TimelineOrOffloaded,
      12              : };
      13              : 
      14              : #[derive(thiserror::Error, Debug)]
      15              : pub(crate) enum OffloadError {
      16              :     #[error("Cancelled")]
      17              :     Cancelled,
      18              :     #[error("Timeline is not archived")]
      19              :     NotArchived,
      20              :     #[error(transparent)]
      21              :     RemoteStorage(anyhow::Error),
      22              :     #[error("Offload or deletion already in progress")]
      23              :     AlreadyInProgress,
      24              :     #[error("Unexpected offload error: {0}")]
      25              :     Other(anyhow::Error),
      26              : }
      27              : 
      28              : impl From<TenantManifestError> for OffloadError {
      29            0 :     fn from(e: TenantManifestError) -> Self {
      30            0 :         match e {
      31            0 :             TenantManifestError::Cancelled => Self::Cancelled,
      32            0 :             TenantManifestError::RemoteStorage(e) => Self::RemoteStorage(e),
      33              :         }
      34            0 :     }
      35              : }
      36              : 
      37            1 : pub(crate) async fn offload_timeline(
      38            1 :     tenant: &TenantShard,
      39            1 :     timeline: &Arc<Timeline>,
      40            1 : ) -> Result<(), OffloadError> {
      41            1 :     debug_assert_current_span_has_tenant_and_timeline_id();
      42            1 :     tracing::info!("offloading archived timeline");
      43              : 
      44            1 :     let delete_guard_res = make_timeline_delete_guard(
      45            1 :         tenant,
      46            1 :         timeline.timeline_id,
      47            1 :         TimelineDeleteGuardKind::Offload,
      48              :     );
      49            1 :     let (timeline, guard) = match delete_guard_res {
      50            1 :         Ok(timeline_and_guard) => timeline_and_guard,
      51            0 :         Err(DeleteTimelineError::HasChildren(children)) => {
      52            0 :             let is_archived = timeline.is_archived();
      53            0 :             if is_archived == Some(true) {
      54            0 :                 tracing::error!("timeline is archived but has non-archived children: {children:?}");
      55            0 :                 return Err(OffloadError::NotArchived);
      56            0 :             }
      57            0 :             tracing::info!(
      58              :                 ?is_archived,
      59            0 :                 "timeline is not archived and has unarchived children"
      60              :             );
      61            0 :             return Err(OffloadError::NotArchived);
      62              :         }
      63              :         Err(DeleteTimelineError::AlreadyInProgress(_)) => {
      64            0 :             tracing::info!("timeline offload or deletion already in progress");
      65            0 :             return Err(OffloadError::AlreadyInProgress);
      66              :         }
      67            0 :         Err(e) => return Err(OffloadError::Other(anyhow::anyhow!(e))),
      68              :     };
      69              : 
      70            1 :     let TimelineOrOffloaded::Timeline(timeline) = timeline else {
      71            0 :         tracing::error!("timeline already offloaded, but given timeline object");
      72            0 :         return Ok(());
      73              :     };
      74              : 
      75            1 :     match timeline.remote_client.shutdown_if_archived().await {
      76            1 :         Ok(()) => {}
      77            0 :         Err(ShutdownIfArchivedError::NotInitialized(_)) => {
      78            0 :             // Either the timeline is being deleted, the operation is being retried, or we are shutting down.
      79            0 :             // Don't return cancelled here to keep it idempotent.
      80            0 :         }
      81            0 :         Err(ShutdownIfArchivedError::NotArchived) => return Err(OffloadError::NotArchived),
      82              :     }
      83            1 :     timeline.set_state(TimelineState::Stopping);
      84              : 
      85              :     // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
      86            1 :     timeline.shutdown(super::ShutdownMode::Reload).await;
      87              : 
      88              :     // TODO extend guard mechanism above with method
      89              :     // to make deletions possible while offloading is in progress
      90              : 
      91            1 :     let conf = &tenant.conf;
      92            1 :     delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
      93              : 
      94            1 :     let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);
      95              : 
      96              :     {
      97            1 :         let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
      98            1 :         if matches!(
      99            1 :             tenant.current_state(),
     100              :             TenantState::Stopping { .. } | TenantState::Broken { .. }
     101              :         ) {
     102              :             // Cancel the operation if the tenant is shutting down. Do this while the
     103              :             // timelines_offloaded lock is held to prevent a race with Tenant::shutdown
     104              :             // for defusing the lock
     105            0 :             return Err(OffloadError::Cancelled);
     106            1 :         }
     107            1 :         offloaded_timelines.insert(
     108            1 :             timeline.timeline_id,
     109            1 :             Arc::new(
     110            1 :                 OffloadedTimeline::from_timeline(&timeline)
     111            1 :                     .expect("we checked above that timeline was ready"),
     112            1 :             ),
     113            1 :         );
     114              :     }
     115              : 
     116              :     // Last step: mark timeline as offloaded in S3
     117              :     // TODO: maybe move this step above, right above deletion of the local timeline directory,
     118              :     // then there is no potential race condition where we partially offload a timeline, and
     119              :     // at the next restart attach it again.
     120              :     // For that to happen, we'd need to make the manifest reflect our *intended* state,
     121              :     // not our actual state of offloaded timelines.
     122            1 :     tenant.maybe_upload_tenant_manifest().await?;
     123              : 
     124            1 :     tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
     125              : 
     126            1 :     Ok(())
     127            1 : }
     128              : 
     129              : /// It is important that this gets called when DeletionGuard is being held.
     130              : /// For more context see comments in [`make_timeline_delete_guard`]
     131              : ///
     132              : /// Returns the strong count of the timeline `Arc`
     133            1 : fn remove_timeline_from_tenant(
     134            1 :     tenant: &TenantShard,
     135            1 :     timeline: &Timeline,
     136            1 :     _: &DeletionGuard, // using it as a witness
     137            1 : ) -> usize {
     138              :     // Remove the timeline from the map.
     139            1 :     let mut timelines = tenant.timelines.lock().unwrap();
     140            1 :     let children_exist = timelines
     141            1 :         .iter()
     142            2 :         .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
     143              :     // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
     144              :     // We already deleted the layer files, so it's probably best to panic.
     145              :     // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
     146            1 :     if children_exist {
     147            0 :         panic!("Timeline grew children while we removed layer files");
     148            1 :     }
     149              : 
     150            1 :     let timeline = timelines
     151            1 :         .remove(&timeline.timeline_id)
     152            1 :         .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
     153              : 
     154              :     // Clear the compaction queue for this timeline
     155            1 :     tenant
     156            1 :         .scheduled_compaction_tasks
     157            1 :         .lock()
     158            1 :         .unwrap()
     159            1 :         .remove(&timeline.timeline_id);
     160              : 
     161            1 :     Arc::strong_count(&timeline)
     162            1 : }
        

Generated by: LCOV version 2.1-beta