LCOV - code coverage report
Current view: top level - pageserver/src/tenant/timeline - offload.rs (source / functions) Coverage Total Hit
Test: 8b13a09a5c233d98abd4a0d3e59157e7db16d6fd.info Lines: 82.7 % 75 62
Test Date: 2024-11-21 10:53:51 Functions: 50.0 % 8 4

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
       4              : use super::Timeline;
       5              : use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
       6              : use crate::tenant::{OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded};
       7              : 
       8            0 : #[derive(thiserror::Error, Debug)]
       9              : pub(crate) enum OffloadError {
      10              :     #[error("Cancelled")]
      11              :     Cancelled,
      12              :     #[error("Timeline is not archived")]
      13              :     NotArchived,
      14              :     #[error(transparent)]
      15              :     RemoteStorage(anyhow::Error),
      16              :     #[error("Unexpected offload error: {0}")]
      17              :     Other(anyhow::Error),
      18              : }
      19              : 
      20              : impl From<TenantManifestError> for OffloadError {
      21            0 :     fn from(e: TenantManifestError) -> Self {
      22            0 :         match e {
      23            0 :             TenantManifestError::Cancelled => Self::Cancelled,
      24            0 :             TenantManifestError::RemoteStorage(e) => Self::RemoteStorage(e),
      25              :         }
      26            0 :     }
      27              : }
      28              : 
      29            2 : pub(crate) async fn offload_timeline(
      30            2 :     tenant: &Tenant,
      31            2 :     timeline: &Arc<Timeline>,
      32            2 : ) -> Result<(), OffloadError> {
      33            2 :     debug_assert_current_span_has_tenant_and_timeline_id();
      34            2 :     tracing::info!("offloading archived timeline");
      35              : 
      36            2 :     let allow_offloaded_children = true;
      37            2 :     let (timeline, guard) =
      38            2 :         DeleteTimelineFlow::prepare(tenant, timeline.timeline_id, allow_offloaded_children)
      39            2 :             .map_err(|e| OffloadError::Other(anyhow::anyhow!(e)))?;
      40              : 
      41            2 :     let TimelineOrOffloaded::Timeline(timeline) = timeline else {
      42            0 :         tracing::error!("timeline already offloaded, but given timeline object");
      43            0 :         return Ok(());
      44              :     };
      45              : 
      46            2 :     let is_archived = timeline.is_archived();
      47            2 :     match is_archived {
      48            2 :         Some(true) => (),
      49              :         Some(false) => {
      50            0 :             tracing::warn!("tried offloading a non-archived timeline");
      51            0 :             return Err(OffloadError::NotArchived);
      52              :         }
      53              :         None => {
      54              :             // This is legal: calls to this function can race with the timeline shutting down
      55            0 :             tracing::info!("tried offloading a timeline whose remote storage is not initialized");
      56            0 :             return Err(OffloadError::Cancelled);
      57              :         }
      58              :     }
      59              : 
      60              :     // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
      61            2 :     timeline.shutdown(super::ShutdownMode::Flush).await;
      62              : 
      63              :     // TODO extend guard mechanism above with method
      64              :     // to make deletions possible while offloading is in progress
      65              : 
      66            2 :     let conf = &tenant.conf;
      67            6 :     delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await;
      68              : 
      69            2 :     let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard);
      70            2 : 
      71            2 :     {
      72            2 :         let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
      73            2 :         offloaded_timelines.insert(
      74            2 :             timeline.timeline_id,
      75            2 :             Arc::new(
      76            2 :                 OffloadedTimeline::from_timeline(&timeline)
      77            2 :                     .expect("we checked above that timeline was ready"),
      78            2 :             ),
      79            2 :         );
      80            2 :     }
      81            2 : 
      82            2 :     // Last step: mark timeline as offloaded in S3
      83            2 :     // TODO: maybe move this step above, right above deletion of the local timeline directory,
      84            2 :     // then there is no potential race condition where we partially offload a timeline, and
      85            2 :     // at the next restart attach it again.
      86            2 :     // For that to happen, we'd need to make the manifest reflect our *intended* state,
      87            2 :     // not our actual state of offloaded timelines.
      88            8 :     tenant.store_tenant_manifest().await?;
      89              : 
      90            2 :     tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})");
      91              : 
      92            2 :     Ok(())
      93            2 : }
      94              : 
      95              : /// It is important that this gets called when DeletionGuard is being held.
      96              : /// For more context see comments in [`DeleteTimelineFlow::prepare`]
      97              : ///
      98              : /// Returns the strong count of the timeline `Arc`
      99            2 : fn remove_timeline_from_tenant(
     100            2 :     tenant: &Tenant,
     101            2 :     timeline: &Timeline,
     102            2 :     _: &DeletionGuard, // using it as a witness
     103            2 : ) -> usize {
     104            2 :     // Remove the timeline from the map.
     105            2 :     let mut timelines = tenant.timelines.lock().unwrap();
     106            2 :     let children_exist = timelines
     107            2 :         .iter()
     108            4 :         .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
     109            2 :     // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
     110            2 :     // We already deleted the layer files, so it's probably best to panic.
     111            2 :     // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
     112            2 :     if children_exist {
     113            0 :         panic!("Timeline grew children while we removed layer files");
     114            2 :     }
     115            2 : 
     116            2 :     let timeline = timelines
     117            2 :         .remove(&timeline.timeline_id)
     118            2 :         .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
     119            2 : 
     120            2 :     Arc::strong_count(&timeline)
     121            2 : }
        

Generated by: LCOV version 2.1-beta