LCOV - code coverage report
Current view: top level - pageserver/src/tenant - delete.rs (source / functions) Coverage Total Hit
Test: 75747cdbffeb0b6d2a2a311584368de68cd9aadc.info Lines: 0.0 % 279 0
Test Date: 2024-06-24 06:52:57 Functions: 0.0 % 38 0

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use anyhow::Context;
       4              : use camino::{Utf8Path, Utf8PathBuf};
       5              : use pageserver_api::{models::TenantState, shard::TenantShardId};
       6              : use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
       7              : use tokio::sync::OwnedMutexGuard;
       8              : use tokio_util::sync::CancellationToken;
       9              : use tracing::{error, Instrument};
      10              : 
      11              : use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
      12              : 
      13              : use crate::{
      14              :     config::PageServerConf,
      15              :     context::RequestContext,
      16              :     task_mgr::{self},
      17              :     tenant::{
      18              :         mgr::{TenantSlot, TenantsMapRemoveResult},
      19              :         remote_timeline_client::remote_heatmap_path,
      20              :     },
      21              : };
      22              : 
      23              : use super::{
      24              :     mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
      25              :     remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
      26              :     timeline::delete::DeleteTimelineFlow,
      27              :     tree_sort_timelines, DeleteTimelineError, Tenant, TenantPreload,
      28              : };
      29              : 
      30            0 : #[derive(Debug, thiserror::Error)]
      31              : pub(crate) enum DeleteTenantError {
      32              :     #[error("GetTenant {0}")]
      33              :     Get(#[from] GetTenantError),
      34              : 
      35              :     #[error("Tenant map slot error {0}")]
      36              :     SlotError(#[from] TenantSlotError),
      37              : 
      38              :     #[error("Tenant map slot upsert error {0}")]
      39              :     SlotUpsertError(#[from] TenantSlotUpsertError),
      40              : 
      41              :     #[error("Timeline {0}")]
      42              :     Timeline(#[from] DeleteTimelineError),
      43              : 
      44              :     #[error("Cancelled")]
      45              :     Cancelled,
      46              : 
      47              :     #[error(transparent)]
      48              :     Other(#[from] anyhow::Error),
      49              : }
      50              : 
      51              : type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
      52              : 
      53            0 : fn remote_tenant_delete_mark_path(
      54            0 :     conf: &PageServerConf,
      55            0 :     tenant_shard_id: &TenantShardId,
      56            0 : ) -> anyhow::Result<RemotePath> {
      57            0 :     let tenant_remote_path = conf
      58            0 :         .tenant_path(tenant_shard_id)
      59            0 :         .strip_prefix(&conf.workdir)
      60            0 :         .context("Failed to strip workdir prefix")
      61            0 :         .and_then(RemotePath::new)
      62            0 :         .context("tenant path")?;
      63            0 :     Ok(tenant_remote_path.join(Utf8Path::new("timelines/deleted")))
      64            0 : }
      65              : 
      66            0 : async fn schedule_ordered_timeline_deletions(
      67            0 :     tenant: &Arc<Tenant>,
      68            0 : ) -> Result<Vec<(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>, TimelineId)>, DeleteTenantError> {
      69            0 :     // Tenant is stopping at this point. We know it will be deleted.
      70            0 :     // No new timelines should be created.
      71            0 :     // Tree sort timelines to delete from leafs to the root.
      72            0 :     // NOTE: by calling clone we release the mutex which creates a possibility for a race: pending deletion
      73            0 :     // can complete and remove timeline from the map in between our call to clone
      74            0 :     // and `DeleteTimelineFlow::run`, so `run` wont find timeline in `timelines` map.
      75            0 :     // timelines.lock is currently synchronous so we cant hold it across await point.
      76            0 :     // So just ignore NotFound error if we get it from `run`.
      77            0 :     // Beware: in case it becomes async and we try to hold it here, `run` also locks it, which can create a deadlock.
      78            0 :     let timelines = tenant.timelines.lock().unwrap().clone();
      79            0 :     let sorted =
      80            0 :         tree_sort_timelines(timelines, |t| t.get_ancestor_timeline_id()).context("tree sort")?;
      81              : 
      82            0 :     let mut already_running_deletions = vec![];
      83              : 
      84            0 :     for (timeline_id, _) in sorted.into_iter().rev() {
      85            0 :         let span = tracing::info_span!("timeline_delete", %timeline_id);
      86            0 :         let res = DeleteTimelineFlow::run(tenant, timeline_id, true)
      87            0 :             .instrument(span)
      88            0 :             .await;
      89            0 :         if let Err(e) = res {
      90            0 :             match e {
      91              :                 DeleteTimelineError::NotFound => {
      92              :                     // Timeline deletion finished after call to clone above but before call
      93              :                     // to `DeleteTimelineFlow::run` and removed timeline from the map.
      94            0 :                     continue;
      95              :                 }
      96            0 :                 DeleteTimelineError::AlreadyInProgress(guard) => {
      97            0 :                     already_running_deletions.push((guard, timeline_id));
      98            0 :                     continue;
      99              :                 }
     100            0 :                 e => return Err(DeleteTenantError::Timeline(e)),
     101              :             }
     102            0 :         }
     103              :     }
     104              : 
     105            0 :     Ok(already_running_deletions)
     106            0 : }
     107              : 
     108            0 : async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), DeleteTenantError> {
     109            0 :     // Assert timelines dir is empty.
     110            0 :     if !fs_ext::is_directory_empty(timelines_path).await? {
     111              :         // Display first 10 items in directory
     112            0 :         let list = fs_ext::list_dir(timelines_path).await.context("list_dir")?;
     113            0 :         let list = &list.into_iter().take(10).collect::<Vec<_>>();
     114            0 :         return Err(DeleteTenantError::Other(anyhow::anyhow!(
     115            0 :             "Timelines directory is not empty after all timelines deletion: {list:?}"
     116            0 :         )));
     117            0 :     }
     118            0 : 
     119            0 :     Ok(())
     120            0 : }
     121              : 
     122            0 : async fn remove_tenant_remote_delete_mark(
     123            0 :     conf: &PageServerConf,
     124            0 :     remote_storage: &GenericRemoteStorage,
     125            0 :     tenant_shard_id: &TenantShardId,
     126            0 :     cancel: &CancellationToken,
     127            0 : ) -> Result<(), DeleteTenantError> {
     128            0 :     let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
     129            0 :     backoff::retry(
     130            0 :         || async { remote_storage.delete(&path, cancel).await },
     131            0 :         TimeoutOrCancel::caused_by_cancel,
     132            0 :         FAILED_UPLOAD_WARN_THRESHOLD,
     133            0 :         FAILED_REMOTE_OP_RETRIES,
     134            0 :         "remove_tenant_remote_delete_mark",
     135            0 :         cancel,
     136            0 :     )
     137            0 :     .await
     138            0 :     .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
     139            0 :     .and_then(|x| x)
     140            0 :     .context("remove_tenant_remote_delete_mark")?;
     141            0 :     Ok(())
     142            0 : }
     143              : 
     144              : // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
     145            0 : async fn cleanup_remaining_fs_traces(
     146            0 :     conf: &PageServerConf,
     147            0 :     tenant_shard_id: &TenantShardId,
     148            0 : ) -> Result<(), DeleteTenantError> {
     149            0 :     let rm = |p: Utf8PathBuf, is_dir: bool| async move {
     150            0 :         if is_dir {
     151            0 :             tokio::fs::remove_dir(&p).await
     152            0 :         } else {
     153            0 :             tokio::fs::remove_file(&p).await
     154            0 :         }
     155            0 :         .or_else(fs_ext::ignore_not_found)
     156            0 :         .with_context(|| format!("failed to delete {p}"))
     157            0 :     };
     158              : 
     159            0 :     rm(conf.tenant_config_path(tenant_shard_id), false).await?;
     160            0 :     rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
     161              : 
     162            0 :     fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
     163            0 :         Err(anyhow::anyhow!(
     164            0 :             "failpoint: tenant-delete-before-remove-timelines-dir"
     165            0 :         ))?
     166            0 :     });
     167              : 
     168            0 :     rm(conf.timelines_path(tenant_shard_id), true).await?;
     169              : 
     170            0 :     fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
     171            0 :         Err(anyhow::anyhow!(
     172            0 :             "failpoint: tenant-delete-before-remove-deleted-mark"
     173            0 :         ))?
     174            0 :     });
     175              : 
     176              :     // Make sure previous deletions are ordered before mark removal.
     177              :     // Otherwise there is no guarantee that they reach the disk before mark deletion.
     178              :     // So its possible for mark to reach disk first and for other deletions
     179              :     // to be reordered later and thus missed if a crash occurs.
     180              :     // Note that we dont need to sync after mark file is removed
     181              :     // because we can tolerate the case when mark file reappears on startup.
     182            0 :     let tenant_path = &conf.tenant_path(tenant_shard_id);
     183            0 :     if tenant_path.exists() {
     184            0 :         crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
     185            0 :             .await
     186            0 :             .context("fsync_pre_mark_remove")?;
     187            0 :     }
     188              : 
     189            0 :     rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
     190              : 
     191            0 :     rm(conf.tenant_heatmap_path(tenant_shard_id), false).await?;
     192              : 
     193            0 :     fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
     194            0 :         Err(anyhow::anyhow!(
     195            0 :             "failpoint: tenant-delete-before-remove-tenant-dir"
     196            0 :         ))?
     197            0 :     });
     198              : 
     199            0 :     rm(conf.tenant_path(tenant_shard_id), true).await?;
     200              : 
     201            0 :     Ok(())
     202            0 : }
     203              : 
     204              : #[derive(Default)]
     205              : pub enum DeleteTenantFlow {
     206              :     #[default]
     207              :     NotStarted,
     208              :     InProgress,
     209              :     Finished,
     210              : }
     211              : 
     212              : impl DeleteTenantFlow {
     213            0 :     pub(crate) async fn should_resume_deletion(
     214            0 :         conf: &'static PageServerConf,
     215            0 :         remote_mark_exists: bool,
     216            0 :         tenant: &Tenant,
     217            0 :     ) -> Result<Option<DeletionGuard>, DeleteTenantError> {
     218            0 :         let acquire = |t: &Tenant| {
     219            0 :             Some(
     220            0 :                 Arc::clone(&t.delete_progress)
     221            0 :                     .try_lock_owned()
     222            0 :                     .expect("we're the only owner during init"),
     223            0 :             )
     224            0 :         };
     225              : 
     226            0 :         if remote_mark_exists {
     227            0 :             return Ok(acquire(tenant));
     228            0 :         }
     229            0 : 
     230            0 :         // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
     231            0 :         if conf
     232            0 :             .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
     233            0 :             .exists()
     234              :         {
     235            0 :             Ok(acquire(tenant))
     236              :         } else {
     237            0 :             Ok(None)
     238              :         }
     239            0 :     }
     240              : 
     241            0 :     pub(crate) async fn resume_from_attach(
     242            0 :         guard: DeletionGuard,
     243            0 :         tenant: &Arc<Tenant>,
     244            0 :         preload: Option<TenantPreload>,
     245            0 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     246            0 :         ctx: &RequestContext,
     247            0 :     ) -> Result<(), DeleteTenantError> {
     248            0 :         let (_, progress) = completion::channel();
     249            0 : 
     250            0 :         tenant
     251            0 :             .set_stopping(progress, false, true)
     252            0 :             .await
     253            0 :             .expect("cant be stopping or broken");
     254            0 : 
     255            0 :         tenant
     256            0 :             .attach(preload, super::SpawnMode::Eager, ctx)
     257            0 :             .await
     258            0 :             .context("attach")?;
     259              : 
     260            0 :         Self::background(
     261            0 :             guard,
     262            0 :             tenant.conf,
     263            0 :             tenant.remote_storage.clone(),
     264            0 :             tenants,
     265            0 :             tenant,
     266            0 :         )
     267            0 :         .await
     268            0 :     }
     269              : 
     270            0 :     async fn background(
     271            0 :         mut guard: OwnedMutexGuard<Self>,
     272            0 :         conf: &PageServerConf,
     273            0 :         remote_storage: GenericRemoteStorage,
     274            0 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     275            0 :         tenant: &Arc<Tenant>,
     276            0 :     ) -> Result<(), DeleteTenantError> {
     277              :         // Tree sort timelines, schedule delete for them. Mention retries from the console side.
     278              :         // Note that if deletion fails we dont mark timelines as broken,
     279              :         // the whole tenant will become broken as by `Self::schedule_background` logic
     280            0 :         let already_running_timeline_deletions = schedule_ordered_timeline_deletions(tenant)
     281            0 :             .await
     282            0 :             .context("schedule_ordered_timeline_deletions")?;
     283              : 
     284            0 :         fail::fail_point!("tenant-delete-before-polling-ongoing-deletions", |_| {
     285            0 :             Err(anyhow::anyhow!(
     286            0 :                 "failpoint: tenant-delete-before-polling-ongoing-deletions"
     287            0 :             ))?
     288            0 :         });
     289              : 
     290              :         // Wait for deletions that were already running at the moment when tenant deletion was requested.
     291              :         // When we can lock deletion guard it means that corresponding timeline deletion finished.
     292            0 :         for (guard, timeline_id) in already_running_timeline_deletions {
     293            0 :             let flow = guard.lock().await;
     294            0 :             if !flow.is_finished() {
     295            0 :                 return Err(DeleteTenantError::Other(anyhow::anyhow!(
     296            0 :                     "already running timeline deletion failed: {timeline_id}"
     297            0 :                 )));
     298            0 :             }
     299              :         }
     300              : 
     301              :         // Remove top-level tenant objects that don't belong to a timeline, such as heatmap
     302            0 :         let heatmap_path = remote_heatmap_path(&tenant.tenant_shard_id());
     303            0 :         if let Some(Err(e)) = backoff::retry(
     304            0 :             || async {
     305            0 :                 remote_storage
     306            0 :                     .delete(&heatmap_path, &task_mgr::shutdown_token())
     307            0 :                     .await
     308            0 :             },
     309            0 :             TimeoutOrCancel::caused_by_cancel,
     310            0 :             FAILED_UPLOAD_WARN_THRESHOLD,
     311            0 :             FAILED_REMOTE_OP_RETRIES,
     312            0 :             "remove_remote_tenant_heatmap",
     313            0 :             &task_mgr::shutdown_token(),
     314            0 :         )
     315            0 :         .await
     316              :         {
     317            0 :             tracing::warn!("Failed to delete heatmap at {heatmap_path}: {e}");
     318            0 :         }
     319              : 
     320            0 :         let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
     321            0 :         // May not exist if we fail in cleanup_remaining_fs_traces after removing it
     322            0 :         if timelines_path.exists() {
     323              :             // sanity check to guard against layout changes
     324            0 :             ensure_timelines_dir_empty(&timelines_path)
     325            0 :                 .await
     326            0 :                 .context("timelines dir not empty")?;
     327            0 :         }
     328              : 
     329            0 :         remove_tenant_remote_delete_mark(
     330            0 :             conf,
     331            0 :             &remote_storage,
     332            0 :             &tenant.tenant_shard_id,
     333            0 :             &task_mgr::shutdown_token(),
     334            0 :         )
     335            0 :         .await?;
     336              : 
     337              :         pausable_failpoint!("tenant-delete-before-cleanup-remaining-fs-traces-pausable");
     338            0 :         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
     339            0 :             Err(anyhow::anyhow!(
     340            0 :                 "failpoint: tenant-delete-before-cleanup-remaining-fs-traces"
     341            0 :             ))?
     342            0 :         });
     343              : 
     344            0 :         cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
     345            0 :             .await
     346            0 :             .context("cleanup_remaining_fs_traces")?;
     347              : 
     348              :         {
     349              :             // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
     350              :             // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
     351              :             //
     352              :             // This complexity will go away when we simplify how deletion works:
     353              :             // https://github.com/neondatabase/neon/issues/5080
     354              :             loop {
     355              :                 // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
     356              :                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
     357            0 :                 let barrier = {
     358            0 :                     let mut locked = tenants.write().unwrap();
     359            0 :                     let removed = locked.remove(tenant.tenant_shard_id);
     360            0 : 
     361            0 :                     // FIXME: we should not be modifying this from outside of mgr.rs.
     362            0 :                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
     363            0 : 
     364            0 :                     // Update stats
     365            0 :                     match &removed {
     366            0 :                         TenantsMapRemoveResult::Occupied(slot) => {
     367            0 :                             crate::metrics::TENANT_MANAGER.slot_removed(slot);
     368            0 :                         }
     369            0 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     370            0 :                             crate::metrics::TENANT_MANAGER
     371            0 :                                 .slot_removed(&TenantSlot::InProgress(barrier.clone()));
     372            0 :                         }
     373            0 :                         TenantsMapRemoveResult::Vacant => {
     374            0 :                             // Nothing changed in map, no metric update
     375            0 :                         }
     376              :                     }
     377              : 
     378            0 :                     match removed {
     379            0 :                         TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
     380            0 :                             match tenant.current_state() {
     381            0 :                                 TenantState::Stopping { .. } | TenantState::Broken { .. } => {
     382            0 :                                     // Expected: we put the tenant into stopping state before we start deleting it
     383            0 :                                 }
     384            0 :                                 state => {
     385            0 :                                     // Unexpected state
     386            0 :                                     tracing::warn!(
     387            0 :                                         "Tenant in unexpected state {state} after deletion"
     388              :                                     );
     389              :                                 }
     390              :                             }
     391            0 :                             break;
     392              :                         }
     393              :                         TenantsMapRemoveResult::Occupied(TenantSlot::Secondary(_)) => {
     394              :                             // This is unexpected: this secondary tenants should not have been created, and we
     395              :                             // are not in a position to shut it down from here.
     396            0 :                             tracing::warn!("Tenant transitioned to secondary mode while deleting!");
     397            0 :                             break;
     398              :                         }
     399              :                         TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
     400            0 :                             unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
     401              :                         }
     402              :                         TenantsMapRemoveResult::Vacant => {
     403            0 :                             tracing::warn!(
     404            0 :                                 "Tenant removed from TenantsMap before deletion completed"
     405              :                             );
     406            0 :                             break;
     407              :                         }
     408            0 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     409            0 :                             // An InProgress entry was found, we must wait on its barrier
     410            0 :                             barrier
     411            0 :                         }
     412            0 :                     }
     413            0 :                 };
     414            0 : 
     415            0 :                 tracing::info!(
     416            0 :                     "Waiting for competing operation to complete before deleting state for tenant"
     417              :                 );
     418            0 :                 barrier.wait().await;
     419              :             }
     420              :         }
     421              : 
     422            0 :         *guard = Self::Finished;
     423            0 : 
     424            0 :         Ok(())
     425            0 :     }
     426              : }
        

Generated by: LCOV version 2.1-beta