LCOV - code coverage report
Current view: top level - pageserver/src/tenant - delete.rs (source / functions) Coverage Total Hit
Test: 83fdc07b20afb411eb00587331b88259a1ff6742.info Lines: 0.0 % 421 0
Test Date: 2024-06-20 09:39:16 Functions: 0.0 % 62 0

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use anyhow::Context;
       4              : use camino::{Utf8Path, Utf8PathBuf};
       5              : use pageserver_api::{models::TenantState, shard::TenantShardId};
       6              : use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel};
       7              : use tokio::sync::OwnedMutexGuard;
       8              : use tokio_util::sync::CancellationToken;
       9              : use tracing::{error, instrument, Instrument};
      10              : 
      11              : use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
      12              : 
      13              : use crate::{
      14              :     config::PageServerConf,
      15              :     context::RequestContext,
      16              :     task_mgr::{self, TaskKind},
      17              :     tenant::{
      18              :         mgr::{TenantSlot, TenantsMapRemoveResult},
      19              :         remote_timeline_client::remote_heatmap_path,
      20              :         timeline::ShutdownMode,
      21              :     },
      22              : };
      23              : 
      24              : use super::{
      25              :     mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
      26              :     remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
      27              :     span,
      28              :     timeline::delete::DeleteTimelineFlow,
      29              :     tree_sort_timelines, DeleteTimelineError, Tenant, TenantPreload,
      30              : };
      31              : 
      32            0 : #[derive(Debug, thiserror::Error)]
      33              : pub(crate) enum DeleteTenantError {
      34              :     #[error("GetTenant {0}")]
      35              :     Get(#[from] GetTenantError),
      36              : 
      37              :     #[error("Tenant not attached")]
      38              :     NotAttached,
      39              : 
      40              :     #[error("Invalid state {0}. Expected Active or Broken")]
      41              :     InvalidState(TenantState),
      42              : 
      43              :     #[error("Tenant deletion is already in progress")]
      44              :     AlreadyInProgress,
      45              : 
      46              :     #[error("Tenant map slot error {0}")]
      47              :     SlotError(#[from] TenantSlotError),
      48              : 
      49              :     #[error("Tenant map slot upsert error {0}")]
      50              :     SlotUpsertError(#[from] TenantSlotUpsertError),
      51              : 
      52              :     #[error("Timeline {0}")]
      53              :     Timeline(#[from] DeleteTimelineError),
      54              : 
      55              :     #[error("Cancelled")]
      56              :     Cancelled,
      57              : 
      58              :     #[error(transparent)]
      59              :     Other(#[from] anyhow::Error),
      60              : }
      61              : 
      62              : type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
      63              : 
      64            0 : fn remote_tenant_delete_mark_path(
      65            0 :     conf: &PageServerConf,
      66            0 :     tenant_shard_id: &TenantShardId,
      67            0 : ) -> anyhow::Result<RemotePath> {
      68            0 :     let tenant_remote_path = conf
      69            0 :         .tenant_path(tenant_shard_id)
      70            0 :         .strip_prefix(&conf.workdir)
      71            0 :         .context("Failed to strip workdir prefix")
      72            0 :         .and_then(RemotePath::new)
      73            0 :         .context("tenant path")?;
      74            0 :     Ok(tenant_remote_path.join(Utf8Path::new("timelines/deleted")))
      75            0 : }
      76              : 
      77            0 : async fn create_remote_delete_mark(
      78            0 :     conf: &PageServerConf,
      79            0 :     remote_storage: &GenericRemoteStorage,
      80            0 :     tenant_shard_id: &TenantShardId,
      81            0 :     cancel: &CancellationToken,
      82            0 : ) -> Result<(), DeleteTenantError> {
      83            0 :     let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
      84              : 
      85            0 :     let data: &[u8] = &[];
      86            0 :     backoff::retry(
      87            0 :         || async {
      88            0 :             let data = bytes::Bytes::from_static(data);
      89            0 :             let stream = futures::stream::once(futures::future::ready(Ok(data)));
      90            0 :             remote_storage
      91            0 :                 .upload(stream, 0, &remote_mark_path, None, cancel)
      92            0 :                 .await
      93            0 :         },
      94            0 :         TimeoutOrCancel::caused_by_cancel,
      95            0 :         FAILED_UPLOAD_WARN_THRESHOLD,
      96            0 :         FAILED_REMOTE_OP_RETRIES,
      97            0 :         "mark_upload",
      98            0 :         cancel,
      99            0 :     )
     100            0 :     .await
     101            0 :     .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
     102            0 :     .and_then(|x| x)
     103            0 :     .context("mark_upload")?;
     104              : 
     105            0 :     Ok(())
     106            0 : }
     107              : 
     108            0 : async fn create_local_delete_mark(
     109            0 :     conf: &PageServerConf,
     110            0 :     tenant_shard_id: &TenantShardId,
     111            0 : ) -> Result<(), DeleteTenantError> {
     112            0 :     let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
     113            0 : 
     114            0 :     // Note: we're ok to replace existing file.
     115            0 :     let _ = std::fs::OpenOptions::new()
     116            0 :         .write(true)
     117            0 :         .create(true)
     118            0 :         .truncate(true)
     119            0 :         .open(&marker_path)
     120            0 :         .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
     121              : 
     122            0 :     crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
     123              : 
     124            0 :     Ok(())
     125            0 : }
     126              : 
     127            0 : async fn schedule_ordered_timeline_deletions(
     128            0 :     tenant: &Arc<Tenant>,
     129            0 : ) -> Result<Vec<(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>, TimelineId)>, DeleteTenantError> {
     130            0 :     // Tenant is stopping at this point. We know it will be deleted.
     131            0 :     // No new timelines should be created.
     132            0 :     // Tree sort timelines to delete from leafs to the root.
     133            0 :     // NOTE: by calling clone we release the mutex which creates a possibility for a race: pending deletion
     134            0 :     // can complete and remove timeline from the map in between our call to clone
     135            0 :     // and `DeleteTimelineFlow::run`, so `run` wont find timeline in `timelines` map.
     136            0 :     // timelines.lock is currently synchronous so we cant hold it across await point.
     137            0 :     // So just ignore NotFound error if we get it from `run`.
     138            0 :     // Beware: in case it becomes async and we try to hold it here, `run` also locks it, which can create a deadlock.
     139            0 :     let timelines = tenant.timelines.lock().unwrap().clone();
     140            0 :     let sorted =
     141            0 :         tree_sort_timelines(timelines, |t| t.get_ancestor_timeline_id()).context("tree sort")?;
     142              : 
     143            0 :     let mut already_running_deletions = vec![];
     144              : 
     145            0 :     for (timeline_id, _) in sorted.into_iter().rev() {
     146            0 :         let span = tracing::info_span!("timeline_delete", %timeline_id);
     147            0 :         let res = DeleteTimelineFlow::run(tenant, timeline_id, true)
     148            0 :             .instrument(span)
     149            0 :             .await;
     150            0 :         if let Err(e) = res {
     151            0 :             match e {
     152              :                 DeleteTimelineError::NotFound => {
     153              :                     // Timeline deletion finished after call to clone above but before call
     154              :                     // to `DeleteTimelineFlow::run` and removed timeline from the map.
     155            0 :                     continue;
     156              :                 }
     157            0 :                 DeleteTimelineError::AlreadyInProgress(guard) => {
     158            0 :                     already_running_deletions.push((guard, timeline_id));
     159            0 :                     continue;
     160              :                 }
     161            0 :                 e => return Err(DeleteTenantError::Timeline(e)),
     162              :             }
     163            0 :         }
     164              :     }
     165              : 
     166            0 :     Ok(already_running_deletions)
     167            0 : }
     168              : 
     169            0 : async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), DeleteTenantError> {
     170            0 :     // Assert timelines dir is empty.
     171            0 :     if !fs_ext::is_directory_empty(timelines_path).await? {
     172              :         // Display first 10 items in directory
     173            0 :         let list = fs_ext::list_dir(timelines_path).await.context("list_dir")?;
     174            0 :         let list = &list.into_iter().take(10).collect::<Vec<_>>();
     175            0 :         return Err(DeleteTenantError::Other(anyhow::anyhow!(
     176            0 :             "Timelines directory is not empty after all timelines deletion: {list:?}"
     177            0 :         )));
     178            0 :     }
     179            0 : 
     180            0 :     Ok(())
     181            0 : }
     182              : 
     183            0 : async fn remove_tenant_remote_delete_mark(
     184            0 :     conf: &PageServerConf,
     185            0 :     remote_storage: &GenericRemoteStorage,
     186            0 :     tenant_shard_id: &TenantShardId,
     187            0 :     cancel: &CancellationToken,
     188            0 : ) -> Result<(), DeleteTenantError> {
     189            0 :     let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
     190            0 :     backoff::retry(
     191            0 :         || async { remote_storage.delete(&path, cancel).await },
     192            0 :         TimeoutOrCancel::caused_by_cancel,
     193            0 :         FAILED_UPLOAD_WARN_THRESHOLD,
     194            0 :         FAILED_REMOTE_OP_RETRIES,
     195            0 :         "remove_tenant_remote_delete_mark",
     196            0 :         cancel,
     197            0 :     )
     198            0 :     .await
     199            0 :     .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
     200            0 :     .and_then(|x| x)
     201            0 :     .context("remove_tenant_remote_delete_mark")?;
     202            0 :     Ok(())
     203            0 : }
     204              : 
     205              : // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
     206            0 : async fn cleanup_remaining_fs_traces(
     207            0 :     conf: &PageServerConf,
     208            0 :     tenant_shard_id: &TenantShardId,
     209            0 : ) -> Result<(), DeleteTenantError> {
     210            0 :     let rm = |p: Utf8PathBuf, is_dir: bool| async move {
     211            0 :         if is_dir {
     212            0 :             tokio::fs::remove_dir(&p).await
     213            0 :         } else {
     214            0 :             tokio::fs::remove_file(&p).await
     215            0 :         }
     216            0 :         .or_else(fs_ext::ignore_not_found)
     217            0 :         .with_context(|| format!("failed to delete {p}"))
     218            0 :     };
     219              : 
     220            0 :     rm(conf.tenant_config_path(tenant_shard_id), false).await?;
     221            0 :     rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
     222              : 
     223            0 :     fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
     224            0 :         Err(anyhow::anyhow!(
     225            0 :             "failpoint: tenant-delete-before-remove-timelines-dir"
     226            0 :         ))?
     227            0 :     });
     228              : 
     229            0 :     rm(conf.timelines_path(tenant_shard_id), true).await?;
     230              : 
     231            0 :     fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
     232            0 :         Err(anyhow::anyhow!(
     233            0 :             "failpoint: tenant-delete-before-remove-deleted-mark"
     234            0 :         ))?
     235            0 :     });
     236              : 
     237              :     // Make sure previous deletions are ordered before mark removal.
     238              :     // Otherwise there is no guarantee that they reach the disk before mark deletion.
     239              :     // So its possible for mark to reach disk first and for other deletions
     240              :     // to be reordered later and thus missed if a crash occurs.
     241              :     // Note that we dont need to sync after mark file is removed
     242              :     // because we can tolerate the case when mark file reappears on startup.
     243            0 :     let tenant_path = &conf.tenant_path(tenant_shard_id);
     244            0 :     if tenant_path.exists() {
     245            0 :         crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
     246            0 :             .await
     247            0 :             .context("fsync_pre_mark_remove")?;
     248            0 :     }
     249              : 
     250            0 :     rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
     251              : 
     252            0 :     rm(conf.tenant_heatmap_path(tenant_shard_id), false).await?;
     253              : 
     254            0 :     fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
     255            0 :         Err(anyhow::anyhow!(
     256            0 :             "failpoint: tenant-delete-before-remove-tenant-dir"
     257            0 :         ))?
     258            0 :     });
     259              : 
     260            0 :     rm(conf.tenant_path(tenant_shard_id), true).await?;
     261              : 
     262            0 :     Ok(())
     263            0 : }
     264              : 
     265              : /// Orchestrates tenant shut down of all tasks, removes its in-memory structures,
     266              : /// and deletes its data from both disk and s3.
     267              : /// The sequence of steps:
     268              : /// 1. Upload remote deletion mark.
     269              : /// 2. Create local mark file.
     270              : /// 3. Shutdown tasks
     271              : /// 4. Run ordered timeline deletions
     272              : /// 5. Wait for timeline deletion operations that were scheduled before tenant deletion was requested
     273              : /// 6. Remove remote mark
     274              : /// 7. Cleanup remaining fs traces, tenant dir, config, timelines dir, local delete mark
     275              : /// It is resumable from any step in case a crash/restart occurs.
     276              : /// There are two entrypoints to the process:
     277              : /// 1. [`DeleteTenantFlow::run`] this is the main one called by a management api handler.
     278              : /// 2. [`DeleteTenantFlow::resume_from_attach`] is called when deletion is resumed tenant is found to be deleted during attach process.
     279              : ///  Note the only other place that messes around timeline delete mark is the `Tenant::spawn_load` function.
     280              : #[derive(Default)]
     281              : pub enum DeleteTenantFlow {
     282              :     #[default]
     283              :     NotStarted,
     284              :     InProgress,
     285              :     Finished,
     286              : }
     287              : 
     288              : impl DeleteTenantFlow {
     289              :     // These steps are run in the context of management api request handler.
     290              :     // Long running steps are continued to run in the background.
     291              :     // NB: If this fails half-way through, and is retried, the retry will go through
     292              :     // all the same steps again. Make sure the code here is idempotent, and don't
     293              :     // error out if some of the shutdown tasks have already been completed!
     294              :     // NOTE: static needed for background part.
     295              :     // We assume that calling code sets up the span with tenant_id.
     296            0 :     #[instrument(skip_all)]
     297              :     pub(crate) async fn run(
     298              :         conf: &'static PageServerConf,
     299              :         remote_storage: GenericRemoteStorage,
     300              :         tenants: &'static std::sync::RwLock<TenantsMap>,
     301              :         tenant: Arc<Tenant>,
     302              :         cancel: &CancellationToken,
     303              :     ) -> Result<(), DeleteTenantError> {
     304              :         span::debug_assert_current_span_has_tenant_id();
     305              : 
     306              :         pausable_failpoint!("tenant-delete-before-run");
     307              : 
     308              :         let mut guard = Self::prepare(&tenant).await?;
     309              : 
     310              :         if let Err(e) = Self::run_inner(&mut guard, conf, &remote_storage, &tenant, cancel).await {
     311              :             tenant.set_broken(format!("{e:#}")).await;
     312              :             return Err(e);
     313              :         }
     314              : 
     315              :         Self::schedule_background(guard, conf, remote_storage, tenants, tenant);
     316              : 
     317              :         Ok(())
     318              :     }
     319              : 
     320              :     // Helper function needed to be able to match once on returned error and transition tenant into broken state.
     321              :     // This is needed because tenant.shutwodn is not idempotent. If tenant state is set to stopping another call to tenant.shutdown
     322              :     // will result in an error, but here we need to be able to retry shutdown when tenant deletion is retried.
     323              :     // So the solution is to set tenant state to broken.
     324            0 :     async fn run_inner(
     325            0 :         guard: &mut OwnedMutexGuard<Self>,
     326            0 :         conf: &'static PageServerConf,
     327            0 :         remote_storage: &GenericRemoteStorage,
     328            0 :         tenant: &Tenant,
     329            0 :         cancel: &CancellationToken,
     330            0 :     ) -> Result<(), DeleteTenantError> {
     331            0 :         guard.mark_in_progress()?;
     332              : 
     333            0 :         fail::fail_point!("tenant-delete-before-create-remote-mark", |_| {
     334            0 :             Err(anyhow::anyhow!(
     335            0 :                 "failpoint: tenant-delete-before-create-remote-mark"
     336            0 :             ))?
     337            0 :         });
     338              : 
     339            0 :         create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id, cancel)
     340            0 :             .await
     341            0 :             .context("remote_mark")?;
     342              : 
     343            0 :         fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
     344            0 :             Err(anyhow::anyhow!(
     345            0 :                 "failpoint: tenant-delete-before-create-local-mark"
     346            0 :             ))?
     347            0 :         });
     348              : 
     349            0 :         create_local_delete_mark(conf, &tenant.tenant_shard_id)
     350            0 :             .await
     351            0 :             .context("local delete mark")?;
     352              : 
     353            0 :         fail::fail_point!("tenant-delete-before-background", |_| {
     354            0 :             Err(anyhow::anyhow!(
     355            0 :                 "failpoint: tenant-delete-before-background"
     356            0 :             ))?
     357            0 :         });
     358              : 
     359            0 :         Ok(())
     360            0 :     }
     361              : 
     362            0 :     fn mark_in_progress(&mut self) -> anyhow::Result<()> {
     363            0 :         match self {
     364            0 :             Self::Finished => anyhow::bail!("Bug. Is in finished state"),
     365            0 :             Self::InProgress { .. } => { /* We're in a retry */ }
     366            0 :             Self::NotStarted => { /* Fresh start */ }
     367              :         }
     368              : 
     369            0 :         *self = Self::InProgress;
     370            0 : 
     371            0 :         Ok(())
     372            0 :     }
     373              : 
     374            0 :     pub(crate) async fn should_resume_deletion(
     375            0 :         conf: &'static PageServerConf,
     376            0 :         remote_mark_exists: bool,
     377            0 :         tenant: &Tenant,
     378            0 :     ) -> Result<Option<DeletionGuard>, DeleteTenantError> {
     379            0 :         let acquire = |t: &Tenant| {
     380            0 :             Some(
     381            0 :                 Arc::clone(&t.delete_progress)
     382            0 :                     .try_lock_owned()
     383            0 :                     .expect("we're the only owner during init"),
     384            0 :             )
     385            0 :         };
     386              : 
     387            0 :         if remote_mark_exists {
     388            0 :             return Ok(acquire(tenant));
     389            0 :         }
     390            0 : 
     391            0 :         // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
     392            0 :         if conf
     393            0 :             .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
     394            0 :             .exists()
     395              :         {
     396            0 :             Ok(acquire(tenant))
     397              :         } else {
     398            0 :             Ok(None)
     399              :         }
     400            0 :     }
     401              : 
     402            0 :     pub(crate) async fn resume_from_attach(
     403            0 :         guard: DeletionGuard,
     404            0 :         tenant: &Arc<Tenant>,
     405            0 :         preload: Option<TenantPreload>,
     406            0 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     407            0 :         ctx: &RequestContext,
     408            0 :     ) -> Result<(), DeleteTenantError> {
     409            0 :         let (_, progress) = completion::channel();
     410            0 : 
     411            0 :         tenant
     412            0 :             .set_stopping(progress, false, true)
     413            0 :             .await
     414            0 :             .expect("cant be stopping or broken");
     415            0 : 
     416            0 :         tenant
     417            0 :             .attach(preload, super::SpawnMode::Eager, ctx)
     418            0 :             .await
     419            0 :             .context("attach")?;
     420              : 
     421            0 :         Self::background(
     422            0 :             guard,
     423            0 :             tenant.conf,
     424            0 :             tenant.remote_storage.clone(),
     425            0 :             tenants,
     426            0 :             tenant,
     427            0 :         )
     428            0 :         .await
     429            0 :     }
     430              : 
     431              :     /// Check whether background deletion of this tenant is currently in progress
     432            0 :     pub(crate) fn is_in_progress(tenant: &Tenant) -> bool {
     433            0 :         tenant.delete_progress.try_lock().is_err()
     434            0 :     }
     435              : 
     436            0 :     async fn prepare(
     437            0 :         tenant: &Arc<Tenant>,
     438            0 :     ) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
     439              :         // FIXME: unsure about active only. Our init jobs may not be cancellable properly,
     440              :         // so at least for now allow deletions only for active tenants. TODO recheck
     441              :         // Broken and Stopping is needed for retries.
     442            0 :         if !matches!(
     443            0 :             tenant.current_state(),
     444              :             TenantState::Active | TenantState::Broken { .. }
     445              :         ) {
     446            0 :             return Err(DeleteTenantError::InvalidState(tenant.current_state()));
     447            0 :         }
     448              : 
     449            0 :         let guard = Arc::clone(&tenant.delete_progress)
     450            0 :             .try_lock_owned()
     451            0 :             .map_err(|_| DeleteTenantError::AlreadyInProgress)?;
     452              : 
     453            0 :         fail::fail_point!("tenant-delete-before-shutdown", |_| {
     454            0 :             Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))?
     455            0 :         });
     456              : 
     457              :         // make pageserver shutdown not to wait for our completion
     458            0 :         let (_, progress) = completion::channel();
     459            0 : 
     460            0 :         // It would be good to only set stopping here and continue shutdown in the background, but shutdown is not idempotent.
     461            0 :         // i e it is an error to do:
     462            0 :         // tenant.set_stopping
     463            0 :         // tenant.shutdown
     464            0 :         // Its also bad that we're holding tenants.read here.
     465            0 :         // TODO relax set_stopping to be idempotent?
     466            0 :         if tenant.shutdown(progress, ShutdownMode::Hard).await.is_err() {
     467            0 :             return Err(DeleteTenantError::Other(anyhow::anyhow!(
     468            0 :                 "tenant shutdown is already in progress"
     469            0 :             )));
     470            0 :         }
     471            0 : 
     472            0 :         Ok(guard)
     473            0 :     }
     474              : 
     475            0 :     fn schedule_background(
     476            0 :         guard: OwnedMutexGuard<Self>,
     477            0 :         conf: &'static PageServerConf,
     478            0 :         remote_storage: GenericRemoteStorage,
     479            0 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     480            0 :         tenant: Arc<Tenant>,
     481            0 :     ) {
     482            0 :         let tenant_shard_id = tenant.tenant_shard_id;
     483            0 : 
     484            0 :         task_mgr::spawn(
     485            0 :             task_mgr::BACKGROUND_RUNTIME.handle(),
     486            0 :             TaskKind::TimelineDeletionWorker,
     487            0 :             Some(tenant_shard_id),
     488            0 :             None,
     489            0 :             "tenant_delete",
     490              :             false,
     491            0 :             async move {
     492            0 :                 if let Err(err) =
     493            0 :                     Self::background(guard, conf, remote_storage, tenants, &tenant).await
     494              :                 {
     495            0 :                     error!("Error: {err:#}");
     496            0 :                     tenant.set_broken(format!("{err:#}")).await;
     497            0 :                 };
     498            0 :                 Ok(())
     499            0 :             }
     500            0 :             .instrument(tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug())),
     501              :         );
     502            0 :     }
     503              : 
     504            0 :     async fn background(
     505            0 :         mut guard: OwnedMutexGuard<Self>,
     506            0 :         conf: &PageServerConf,
     507            0 :         remote_storage: GenericRemoteStorage,
     508            0 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     509            0 :         tenant: &Arc<Tenant>,
     510            0 :     ) -> Result<(), DeleteTenantError> {
     511              :         // Tree sort timelines, schedule delete for them. Mention retries from the console side.
     512              :         // Note that if deletion fails we dont mark timelines as broken,
     513              :         // the whole tenant will become broken as by `Self::schedule_background` logic
     514            0 :         let already_running_timeline_deletions = schedule_ordered_timeline_deletions(tenant)
     515            0 :             .await
     516            0 :             .context("schedule_ordered_timeline_deletions")?;
     517              : 
     518            0 :         fail::fail_point!("tenant-delete-before-polling-ongoing-deletions", |_| {
     519            0 :             Err(anyhow::anyhow!(
     520            0 :                 "failpoint: tenant-delete-before-polling-ongoing-deletions"
     521            0 :             ))?
     522            0 :         });
     523              : 
     524              :         // Wait for deletions that were already running at the moment when tenant deletion was requested.
     525              :         // When we can lock deletion guard it means that corresponding timeline deletion finished.
     526            0 :         for (guard, timeline_id) in already_running_timeline_deletions {
     527            0 :             let flow = guard.lock().await;
     528            0 :             if !flow.is_finished() {
     529            0 :                 return Err(DeleteTenantError::Other(anyhow::anyhow!(
     530            0 :                     "already running timeline deletion failed: {timeline_id}"
     531            0 :                 )));
     532            0 :             }
     533              :         }
     534              : 
     535              :         // Remove top-level tenant objects that don't belong to a timeline, such as heatmap
     536            0 :         let heatmap_path = remote_heatmap_path(&tenant.tenant_shard_id());
     537            0 :         if let Some(Err(e)) = backoff::retry(
     538            0 :             || async {
     539            0 :                 remote_storage
     540            0 :                     .delete(&heatmap_path, &task_mgr::shutdown_token())
     541            0 :                     .await
     542            0 :             },
     543            0 :             TimeoutOrCancel::caused_by_cancel,
     544            0 :             FAILED_UPLOAD_WARN_THRESHOLD,
     545            0 :             FAILED_REMOTE_OP_RETRIES,
     546            0 :             "remove_remote_tenant_heatmap",
     547            0 :             &task_mgr::shutdown_token(),
     548            0 :         )
     549            0 :         .await
     550              :         {
     551            0 :             tracing::warn!("Failed to delete heatmap at {heatmap_path}: {e}");
     552            0 :         }
     553              : 
     554            0 :         let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
     555            0 :         // May not exist if we fail in cleanup_remaining_fs_traces after removing it
     556            0 :         if timelines_path.exists() {
     557              :             // sanity check to guard against layout changes
     558            0 :             ensure_timelines_dir_empty(&timelines_path)
     559            0 :                 .await
     560            0 :                 .context("timelines dir not empty")?;
     561            0 :         }
     562              : 
     563            0 :         remove_tenant_remote_delete_mark(
     564            0 :             conf,
     565            0 :             &remote_storage,
     566            0 :             &tenant.tenant_shard_id,
     567            0 :             &task_mgr::shutdown_token(),
     568            0 :         )
     569            0 :         .await?;
     570              : 
     571              :         pausable_failpoint!("tenant-delete-before-cleanup-remaining-fs-traces-pausable");
     572            0 :         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
     573            0 :             Err(anyhow::anyhow!(
     574            0 :                 "failpoint: tenant-delete-before-cleanup-remaining-fs-traces"
     575            0 :             ))?
     576            0 :         });
     577              : 
     578            0 :         cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
     579            0 :             .await
     580            0 :             .context("cleanup_remaining_fs_traces")?;
     581              : 
     582              :         {
     583              :             pausable_failpoint!("tenant-delete-before-map-remove");
     584              : 
     585              :             // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
     586              :             // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
     587              :             //
     588              :             // This complexity will go away when we simplify how deletion works:
     589              :             // https://github.com/neondatabase/neon/issues/5080
     590              :             loop {
     591              :                 // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
     592              :                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
     593            0 :                 let barrier = {
     594            0 :                     let mut locked = tenants.write().unwrap();
     595            0 :                     let removed = locked.remove(tenant.tenant_shard_id);
     596            0 : 
     597            0 :                     // FIXME: we should not be modifying this from outside of mgr.rs.
     598            0 :                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
     599            0 : 
     600            0 :                     // Update stats
     601            0 :                     match &removed {
     602            0 :                         TenantsMapRemoveResult::Occupied(slot) => {
     603            0 :                             crate::metrics::TENANT_MANAGER.slot_removed(slot);
     604            0 :                         }
     605            0 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     606            0 :                             crate::metrics::TENANT_MANAGER
     607            0 :                                 .slot_removed(&TenantSlot::InProgress(barrier.clone()));
     608            0 :                         }
     609            0 :                         TenantsMapRemoveResult::Vacant => {
     610            0 :                             // Nothing changed in map, no metric update
     611            0 :                         }
     612              :                     }
     613              : 
     614            0 :                     match removed {
     615            0 :                         TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
     616            0 :                             match tenant.current_state() {
     617            0 :                                 TenantState::Stopping { .. } | TenantState::Broken { .. } => {
     618            0 :                                     // Expected: we put the tenant into stopping state before we start deleting it
     619            0 :                                 }
     620            0 :                                 state => {
     621            0 :                                     // Unexpected state
     622            0 :                                     tracing::warn!(
     623            0 :                                         "Tenant in unexpected state {state} after deletion"
     624              :                                     );
     625              :                                 }
     626              :                             }
     627            0 :                             break;
     628              :                         }
     629              :                         TenantsMapRemoveResult::Occupied(TenantSlot::Secondary(_)) => {
     630              :                             // This is unexpected: this secondary tenants should not have been created, and we
     631              :                             // are not in a position to shut it down from here.
     632            0 :                             tracing::warn!("Tenant transitioned to secondary mode while deleting!");
     633            0 :                             break;
     634              :                         }
     635              :                         TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
     636            0 :                             unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
     637              :                         }
     638              :                         TenantsMapRemoveResult::Vacant => {
     639            0 :                             tracing::warn!(
     640            0 :                                 "Tenant removed from TenantsMap before deletion completed"
     641              :                             );
     642            0 :                             break;
     643              :                         }
     644            0 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     645            0 :                             // An InProgress entry was found, we must wait on its barrier
     646            0 :                             barrier
     647            0 :                         }
     648            0 :                     }
     649            0 :                 };
     650            0 : 
     651            0 :                 tracing::info!(
     652            0 :                     "Waiting for competing operation to complete before deleting state for tenant"
     653              :                 );
     654            0 :                 barrier.wait().await;
     655              :             }
     656              :         }
     657              : 
     658            0 :         *guard = Self::Finished;
     659            0 : 
     660            0 :         Ok(())
     661            0 :     }
     662              : }
        

Generated by: LCOV version 2.1-beta