LCOV - code coverage report
Current view: top level - pageserver/src/tenant/timeline - delete.rs (source / functions) Coverage Total Hit
Test: 792183ae0ef4f1f8b22e9ac7e8748740ab73f873.info Lines: 0.0 % 204 0
Test Date: 2024-06-26 01:04:33 Functions: 0.0 % 32 0

            Line data    Source code
       1              : use std::{
       2              :     ops::{Deref, DerefMut},
       3              :     sync::Arc,
       4              : };
       5              : 
       6              : use anyhow::Context;
       7              : use pageserver_api::{models::TimelineState, shard::TenantShardId};
       8              : use tokio::sync::OwnedMutexGuard;
       9              : use tracing::{error, info, instrument, Instrument};
      10              : use utils::{crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
      11              : 
      12              : use crate::{
      13              :     config::PageServerConf,
      14              :     task_mgr::{self, TaskKind},
      15              :     tenant::{
      16              :         metadata::TimelineMetadata,
      17              :         remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
      18              :         CreateTimelineCause, DeleteTimelineError, Tenant,
      19              :     },
      20              : };
      21              : 
      22              : use super::{Timeline, TimelineResources};
      23              : 
      24              : /// Mark timeline as deleted in S3 so we won't pick it up next time
      25              : /// during attach or pageserver restart.
      26              : /// See comment in persist_index_part_with_deleted_flag.
      27            0 : async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
      28            0 :     match timeline
      29            0 :         .remote_client
      30            0 :         .persist_index_part_with_deleted_flag()
      31            0 :         .await
      32              :     {
      33              :         // If we (now, or already) marked it successfully as deleted, we can proceed
      34            0 :         Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
      35              :         // Bail out otherwise
      36              :         //
      37              :         // AlreadyInProgress shouldn't happen, because the 'delete_lock' prevents
      38              :         // two tasks from performing the deletion at the same time. The first task
      39              :         // that starts deletion should run it to completion.
      40            0 :         Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
      41            0 :         | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
      42            0 :             return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
      43              :         }
      44              :     }
      45            0 :     Ok(())
      46            0 : }
      47              : 
      48              : /// Grab the compaction and gc locks, and actually perform the deletion.
      49              : ///
      50              : /// The locks prevent GC or compaction from running at the same time. The background tasks do not
      51              : /// register themselves with the timeline it's operating on, so it might still be running even
      52              : /// though we called `shutdown_tasks`.
      53              : ///
      54              : /// Note that there are still other race conditions between
      55              : /// GC, compaction and timeline deletion. See
      56              : /// <https://github.com/neondatabase/neon/issues/2671>
      57              : ///
      58              : /// No timeout here, GC & Compaction should be responsive to the
      59              : /// `TimelineState::Stopping` change.
      60              : // pub(super): documentation link
      61            0 : pub(super) async fn delete_local_timeline_directory(
      62            0 :     conf: &PageServerConf,
      63            0 :     tenant_shard_id: TenantShardId,
      64            0 :     timeline: &Timeline,
      65            0 : ) -> anyhow::Result<()> {
      66            0 :     let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) };
      67            0 :     let guards = crate::timed(
      68            0 :         guards,
      69            0 :         "acquire gc and compaction locks",
      70            0 :         std::time::Duration::from_secs(5),
      71            0 :     )
      72            0 :     .await;
      73              : 
      74              :     // NB: storage_sync upload tasks that reference these layers have been cancelled
      75              :     //     by the caller.
      76              : 
      77            0 :     let local_timeline_directory = conf.timeline_path(&tenant_shard_id, &timeline.timeline_id);
      78            0 : 
      79            0 :     fail::fail_point!("timeline-delete-before-rm", |_| {
      80            0 :         Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
      81            0 :     });
      82              : 
      83              :     // NB: This need not be atomic because the deleted flag in the IndexPart
      84              :     // will be observed during tenant/timeline load. The deletion will be resumed there.
      85              :     //
      86              :     // Note that here we do not bail out on std::io::ErrorKind::NotFound.
      87              :     // This can happen if we're called a second time, e.g.,
      88              :     // because of a previous failure/cancellation at/after
      89              :     // failpoint timeline-delete-after-rm.
      90              :     //
      91              :     // ErrorKind::NotFound can also happen if we race with tenant detach, because,
      92              :     // no locks are shared.
      93            0 :     tokio::fs::remove_dir_all(local_timeline_directory)
      94            0 :         .await
      95            0 :         .or_else(fs_ext::ignore_not_found)
      96            0 :         .context("remove local timeline directory")?;
      97              : 
      98              :     // Make sure previous deletions are ordered before mark removal.
      99              :     // Otherwise there is no guarantee that they reach the disk before mark deletion.
     100              :     // So its possible for mark to reach disk first and for other deletions
     101              :     // to be reordered later and thus missed if a crash occurs.
     102              :     // Note that we dont need to sync after mark file is removed
     103              :     // because we can tolerate the case when mark file reappears on startup.
     104            0 :     let timeline_path = conf.timelines_path(&tenant_shard_id);
     105            0 :     crashsafe::fsync_async(timeline_path)
     106            0 :         .await
     107            0 :         .context("fsync_pre_mark_remove")?;
     108              : 
     109            0 :     info!("finished deleting layer files, releasing locks");
     110            0 :     drop(guards);
     111            0 : 
     112            0 :     fail::fail_point!("timeline-delete-after-rm", |_| {
     113            0 :         Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
     114            0 :     });
     115              : 
     116            0 :     Ok(())
     117            0 : }
     118              : 
     119              : /// Removes remote layers and an index file after them.
     120            0 : async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
     121            0 :     timeline
     122            0 :         .remote_client
     123            0 :         .delete_all()
     124            0 :         .await
     125            0 :         .context("delete_all")
     126            0 : }
     127              : 
     128              : // This function removs remaining traces of a timeline on disk.
     129              : // Namely: metadata file, timeline directory, delete mark.
     130              : // Note: io::ErrorKind::NotFound are ignored for metadata and timeline dir.
     131              : // delete mark should be present because it is the last step during deletion.
     132              : // (nothing can fail after its deletion)
     133            0 : async fn cleanup_remaining_timeline_fs_traces(
     134            0 :     conf: &PageServerConf,
     135            0 :     tenant_shard_id: TenantShardId,
     136            0 :     timeline_id: TimelineId,
     137            0 : ) -> anyhow::Result<()> {
     138            0 :     // Remove delete mark
     139            0 :     // TODO: once we are confident that no more exist in the field, remove this
     140            0 :     // line.  It cleans up a legacy marker file that might in rare cases be present.
     141            0 :     tokio::fs::remove_file(conf.timeline_delete_mark_file_path(tenant_shard_id, timeline_id))
     142            0 :         .await
     143            0 :         .or_else(fs_ext::ignore_not_found)
     144            0 :         .context("remove delete mark")
     145            0 : }
     146              : 
     147              : /// It is important that this gets called when DeletionGuard is being held.
     148              : /// For more context see comments in [`DeleteTimelineFlow::prepare`]
     149            0 : async fn remove_timeline_from_tenant(
     150            0 :     tenant: &Tenant,
     151            0 :     timeline_id: TimelineId,
     152            0 :     _: &DeletionGuard, // using it as a witness
     153            0 : ) -> anyhow::Result<()> {
     154            0 :     // Remove the timeline from the map.
     155            0 :     let mut timelines = tenant.timelines.lock().unwrap();
     156            0 :     let children_exist = timelines
     157            0 :         .iter()
     158            0 :         .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));
     159            0 :     // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
     160            0 :     // We already deleted the layer files, so it's probably best to panic.
     161            0 :     // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
     162            0 :     if children_exist {
     163            0 :         panic!("Timeline grew children while we removed layer files");
     164            0 :     }
     165            0 : 
     166            0 :     timelines
     167            0 :         .remove(&timeline_id)
     168            0 :         .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
     169            0 : 
     170            0 :     drop(timelines);
     171            0 : 
     172            0 :     Ok(())
     173            0 : }
     174              : 
     175              : /// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,
     176              : /// and deletes its data from both disk and s3.
     177              : /// The sequence of steps:
     178              : /// 1. Set deleted_at in remote index part.
     179              : /// 2. Create local mark file.
     180              : /// 3. Delete local files except metadata (it is simpler this way, to be able to reuse timeline initialization code that expects metadata)
     181              : /// 4. Delete remote layers
     182              : /// 5. Delete index part
     183              : /// 6. Delete meta, timeline directory
     184              : /// 7. Delete mark file
     185              : /// It is resumable from any step in case a crash/restart occurs.
     186              : /// There are three entrypoints to the process:
     187              : /// 1. [`DeleteTimelineFlow::run`] this is the main one called by a management api handler.
     188              : /// 2. [`DeleteTimelineFlow::resume_deletion`] is called during restarts when local metadata is still present
     189              : /// and we possibly neeed to continue deletion of remote files.
     190              : /// 3. [`DeleteTimelineFlow::cleanup_remaining_timeline_fs_traces`] is used when we deleted remote
     191              : /// index but still have local metadata, timeline directory and delete mark.
     192              : /// Note the only other place that messes around timeline delete mark is the logic that scans directory with timelines during tenant load.
     193              : #[derive(Default)]
     194              : pub enum DeleteTimelineFlow {
     195              :     #[default]
     196              :     NotStarted,
     197              :     InProgress,
     198              :     Finished,
     199              : }
     200              : 
     201              : impl DeleteTimelineFlow {
     202              :     // These steps are run in the context of management api request handler.
     203              :     // Long running steps are continued to run in the background.
     204              :     // NB: If this fails half-way through, and is retried, the retry will go through
     205              :     // all the same steps again. Make sure the code here is idempotent, and don't
     206              :     // error out if some of the shutdown tasks have already been completed!
     207            0 :     #[instrument(skip_all, fields(%inplace))]
     208              :     pub async fn run(
     209              :         tenant: &Arc<Tenant>,
     210              :         timeline_id: TimelineId,
     211              :         inplace: bool,
     212              :     ) -> Result<(), DeleteTimelineError> {
     213              :         super::debug_assert_current_span_has_tenant_and_timeline_id();
     214              : 
     215              :         let (timeline, mut guard) = Self::prepare(tenant, timeline_id)?;
     216              : 
     217              :         guard.mark_in_progress()?;
     218              : 
     219              :         // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
     220              :         timeline.shutdown(super::ShutdownMode::Hard).await;
     221              : 
     222            0 :         fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
     223            0 :             Err(anyhow::anyhow!(
     224            0 :                 "failpoint: timeline-delete-before-index-deleted-at"
     225            0 :             ))?
     226            0 :         });
     227              : 
     228              :         set_deleted_in_remote_index(&timeline).await?;
     229              : 
     230            0 :         fail::fail_point!("timeline-delete-before-schedule", |_| {
     231            0 :             Err(anyhow::anyhow!(
     232            0 :                 "failpoint: timeline-delete-before-schedule"
     233            0 :             ))?
     234            0 :         });
     235              : 
     236              :         if inplace {
     237              :             Self::background(guard, tenant.conf, tenant, &timeline).await?
     238              :         } else {
     239              :             Self::schedule_background(guard, tenant.conf, Arc::clone(tenant), timeline);
     240              :         }
     241              : 
     242              :         Ok(())
     243              :     }
     244              : 
     245            0 :     fn mark_in_progress(&mut self) -> anyhow::Result<()> {
     246            0 :         match self {
     247            0 :             Self::Finished => anyhow::bail!("Bug. Is in finished state"),
     248            0 :             Self::InProgress { .. } => { /* We're in a retry */ }
     249            0 :             Self::NotStarted => { /* Fresh start */ }
     250              :         }
     251              : 
     252            0 :         *self = Self::InProgress;
     253            0 : 
     254            0 :         Ok(())
     255            0 :     }
     256              : 
     257              :     /// Shortcut to create Timeline in stopping state and spawn deletion task.
     258            0 :     #[instrument(skip_all, fields(%timeline_id))]
     259              :     pub async fn resume_deletion(
     260              :         tenant: Arc<Tenant>,
     261              :         timeline_id: TimelineId,
     262              :         local_metadata: &TimelineMetadata,
     263              :         remote_client: RemoteTimelineClient,
     264              :     ) -> anyhow::Result<()> {
     265              :         // Note: here we even skip populating layer map. Timeline is essentially uninitialized.
     266              :         // RemoteTimelineClient is the only functioning part.
     267              :         let timeline = tenant
     268              :             .create_timeline_struct(
     269              :                 timeline_id,
     270              :                 local_metadata,
     271              :                 None, // Ancestor is not needed for deletion.
     272              :                 TimelineResources {
     273              :                     remote_client,
     274              :                     timeline_get_throttle: tenant.timeline_get_throttle.clone(),
     275              :                 },
     276              :                 // Important. We dont pass ancestor above because it can be missing.
     277              :                 // Thus we need to skip the validation here.
     278              :                 CreateTimelineCause::Delete,
     279              :                 // Aux file policy is not needed for deletion, assuming deletion does not read aux keyspace
     280              :                 None,
     281              :             )
     282              :             .context("create_timeline_struct")?;
     283              : 
     284              :         let mut guard = DeletionGuard(
     285              :             Arc::clone(&timeline.delete_progress)
     286              :                 .try_lock_owned()
     287              :                 .expect("cannot happen because we're the only owner"),
     288              :         );
     289              : 
     290              :         // We meed to do this because when console retries delete request we shouldnt answer with 404
     291              :         // because 404 means successful deletion.
     292              :         {
     293              :             let mut locked = tenant.timelines.lock().unwrap();
     294              :             locked.insert(timeline_id, Arc::clone(&timeline));
     295              :         }
     296              : 
     297              :         guard.mark_in_progress()?;
     298              : 
     299              :         Self::schedule_background(guard, tenant.conf, tenant, timeline);
     300              : 
     301              :         Ok(())
     302              :     }
     303              : 
     304            0 :     #[instrument(skip_all, fields(%timeline_id))]
     305              :     pub async fn cleanup_remaining_timeline_fs_traces(
     306              :         tenant: &Tenant,
     307              :         timeline_id: TimelineId,
     308              :     ) -> anyhow::Result<()> {
     309              :         let r =
     310              :             cleanup_remaining_timeline_fs_traces(tenant.conf, tenant.tenant_shard_id, timeline_id)
     311              :                 .await;
     312              :         info!("Done");
     313              :         r
     314              :     }
     315              : 
     316            0 :     fn prepare(
     317            0 :         tenant: &Tenant,
     318            0 :         timeline_id: TimelineId,
     319            0 :     ) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
     320            0 :         // Note the interaction between this guard and deletion guard.
     321            0 :         // Here we attempt to lock deletion guard when we're holding a lock on timelines.
     322            0 :         // This is important because when you take into account `remove_timeline_from_tenant`
     323            0 :         // we remove timeline from memory when we still hold the deletion guard.
     324            0 :         // So here when timeline deletion is finished timeline wont be present in timelines map at all
     325            0 :         // which makes the following sequence impossible:
     326            0 :         // T1: get preempted right before the try_lock on `Timeline::delete_progress`
     327            0 :         // T2: do a full deletion, acquire and drop `Timeline::delete_progress`
     328            0 :         // T1: acquire deletion lock, do another `DeleteTimelineFlow::run`
     329            0 :         // For more context see this discussion: `https://github.com/neondatabase/neon/pull/4552#discussion_r1253437346`
     330            0 :         let timelines = tenant.timelines.lock().unwrap();
     331              : 
     332            0 :         let timeline = match timelines.get(&timeline_id) {
     333            0 :             Some(t) => t,
     334            0 :             None => return Err(DeleteTimelineError::NotFound),
     335              :         };
     336              : 
     337              :         // Ensure that there are no child timelines **attached to that pageserver**,
     338              :         // because detach removes files, which will break child branches
     339            0 :         let children: Vec<TimelineId> = timelines
     340            0 :             .iter()
     341            0 :             .filter_map(|(id, entry)| {
     342            0 :                 if entry.get_ancestor_timeline_id() == Some(timeline_id) {
     343            0 :                     Some(*id)
     344              :                 } else {
     345            0 :                     None
     346              :                 }
     347            0 :             })
     348            0 :             .collect();
     349            0 : 
     350            0 :         if !children.is_empty() {
     351            0 :             return Err(DeleteTimelineError::HasChildren(children));
     352            0 :         }
     353            0 : 
     354            0 :         // Note that using try_lock here is important to avoid a deadlock.
     355            0 :         // Here we take lock on timelines and then the deletion guard.
     356            0 :         // At the end of the operation we're holding the guard and need to lock timelines map
     357            0 :         // to remove the timeline from it.
     358            0 :         // Always if you have two locks that are taken in different order this can result in a deadlock.
     359            0 : 
     360            0 :         let delete_progress = Arc::clone(&timeline.delete_progress);
     361            0 :         let delete_lock_guard = match delete_progress.try_lock_owned() {
     362            0 :             Ok(guard) => DeletionGuard(guard),
     363              :             Err(_) => {
     364              :                 // Unfortunately if lock fails arc is consumed.
     365            0 :                 return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
     366            0 :                     &timeline.delete_progress,
     367            0 :                 )));
     368              :             }
     369              :         };
     370              : 
     371            0 :         timeline.set_state(TimelineState::Stopping);
     372            0 : 
     373            0 :         Ok((Arc::clone(timeline), delete_lock_guard))
     374            0 :     }
     375              : 
     376            0 :     fn schedule_background(
     377            0 :         guard: DeletionGuard,
     378            0 :         conf: &'static PageServerConf,
     379            0 :         tenant: Arc<Tenant>,
     380            0 :         timeline: Arc<Timeline>,
     381            0 :     ) {
     382            0 :         let tenant_shard_id = timeline.tenant_shard_id;
     383            0 :         let timeline_id = timeline.timeline_id;
     384            0 : 
     385            0 :         task_mgr::spawn(
     386            0 :             task_mgr::BACKGROUND_RUNTIME.handle(),
     387            0 :             TaskKind::TimelineDeletionWorker,
     388            0 :             Some(tenant_shard_id),
     389            0 :             Some(timeline_id),
     390            0 :             "timeline_delete",
     391              :             false,
     392            0 :             async move {
     393            0 :                 if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
     394            0 :                     error!("Error: {err:#}");
     395            0 :                     timeline.set_broken(format!("{err:#}"))
     396            0 :                 };
     397            0 :                 Ok(())
     398            0 :             }
     399            0 :             .instrument(tracing::info_span!(parent: None, "delete_timeline", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),timeline_id=%timeline_id)),
     400              :         );
     401            0 :     }
     402              : 
     403            0 :     async fn background(
     404            0 :         mut guard: DeletionGuard,
     405            0 :         conf: &PageServerConf,
     406            0 :         tenant: &Tenant,
     407            0 :         timeline: &Timeline,
     408            0 :     ) -> Result<(), DeleteTimelineError> {
     409            0 :         delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
     410              : 
     411            0 :         delete_remote_layers_and_index(timeline).await?;
     412              : 
     413              :         pausable_failpoint!("in_progress_delete");
     414              : 
     415            0 :         remove_timeline_from_tenant(tenant, timeline.timeline_id, &guard).await?;
     416              : 
     417            0 :         *guard = Self::Finished;
     418            0 : 
     419            0 :         Ok(())
     420            0 :     }
     421              : 
     422            0 :     pub(crate) fn is_not_started(&self) -> bool {
     423            0 :         matches!(self, Self::NotStarted)
     424            0 :     }
     425              : }
     426              : 
     427              : struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
     428              : 
     429              : impl Deref for DeletionGuard {
     430              :     type Target = DeleteTimelineFlow;
     431              : 
     432            0 :     fn deref(&self) -> &Self::Target {
     433            0 :         &self.0
     434            0 :     }
     435              : }
     436              : 
     437              : impl DerefMut for DeletionGuard {
     438            0 :     fn deref_mut(&mut self) -> &mut Self::Target {
     439            0 :         &mut self.0
     440            0 :     }
     441              : }
        

Generated by: LCOV version 2.1-beta