LCOV - code coverage report
Current view: top level - pageserver/src/tenant - delete.rs (source / functions) Coverage Total Hit
Test: 32f4a56327bc9da697706839ed4836b2a00a408f.info Lines: 93.2 % 412 384
Test Date: 2024-02-07 07:37:29 Functions: 77.0 % 74 57

            Line data    Source code
       1              : use std::sync::Arc;
       2              : 
       3              : use anyhow::Context;
       4              : use camino::{Utf8Path, Utf8PathBuf};
       5              : use pageserver_api::{models::TenantState, shard::TenantShardId};
       6              : use remote_storage::{GenericRemoteStorage, RemotePath};
       7              : use tokio::sync::OwnedMutexGuard;
       8              : use tokio_util::sync::CancellationToken;
       9              : use tracing::{error, instrument, Instrument, Span};
      10              : 
      11              : use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId};
      12              : 
      13              : use crate::{
      14              :     config::PageServerConf,
      15              :     context::RequestContext,
      16              :     task_mgr::{self, TaskKind},
      17              :     tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
      18              : };
      19              : 
      20              : use super::{
      21              :     mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
      22              :     remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
      23              :     span,
      24              :     timeline::delete::DeleteTimelineFlow,
      25              :     tree_sort_timelines, DeleteTimelineError, Tenant, TenantPreload,
      26              : };
      27              : 
      28          204 : #[derive(Debug, thiserror::Error)]
      29              : pub(crate) enum DeleteTenantError {
      30              :     #[error("GetTenant {0}")]
      31              :     Get(#[from] GetTenantError),
      32              : 
      33              :     #[error("Tenant not attached")]
      34              :     NotAttached,
      35              : 
      36              :     #[error("Invalid state {0}. Expected Active or Broken")]
      37              :     InvalidState(TenantState),
      38              : 
      39              :     #[error("Tenant deletion is already in progress")]
      40              :     AlreadyInProgress,
      41              : 
      42              :     #[error("Tenant map slot error {0}")]
      43              :     SlotError(#[from] TenantSlotError),
      44              : 
      45              :     #[error("Tenant map slot upsert error {0}")]
      46              :     SlotUpsertError(#[from] TenantSlotUpsertError),
      47              : 
      48              :     #[error("Timeline {0}")]
      49              :     Timeline(#[from] DeleteTimelineError),
      50              : 
      51              :     #[error("Cancelled")]
      52              :     Cancelled,
      53              : 
      54              :     #[error(transparent)]
      55              :     Other(#[from] anyhow::Error),
      56              : }
      57              : 
      58              : type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
      59              : 
      60          180 : fn remote_tenant_delete_mark_path(
      61          180 :     conf: &PageServerConf,
      62          180 :     tenant_shard_id: &TenantShardId,
      63          180 : ) -> anyhow::Result<RemotePath> {
      64          180 :     let tenant_remote_path = conf
      65          180 :         .tenant_path(tenant_shard_id)
      66          180 :         .strip_prefix(&conf.workdir)
      67          180 :         .context("Failed to strip workdir prefix")
      68          180 :         .and_then(RemotePath::new)
      69          180 :         .context("tenant path")?;
      70          180 :     Ok(tenant_remote_path.join(Utf8Path::new("timelines/deleted")))
      71          180 : }
      72              : 
      73           94 : async fn create_remote_delete_mark(
      74           94 :     conf: &PageServerConf,
      75           94 :     remote_storage: &GenericRemoteStorage,
      76           94 :     tenant_shard_id: &TenantShardId,
      77           94 :     cancel: &CancellationToken,
      78           94 : ) -> Result<(), DeleteTenantError> {
      79           94 :     let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
      80              : 
      81           94 :     let data: &[u8] = &[];
      82           94 :     backoff::retry(
      83          132 :         || async {
      84          132 :             let data = bytes::Bytes::from_static(data);
      85          132 :             let stream = futures::stream::once(futures::future::ready(Ok(data)));
      86          132 :             remote_storage
      87          132 :                 .upload(stream, 0, &remote_mark_path, None)
      88          195 :                 .await
      89          132 :         },
      90           94 :         |_e| false,
      91           94 :         FAILED_UPLOAD_WARN_THRESHOLD,
      92           94 :         FAILED_REMOTE_OP_RETRIES,
      93           94 :         "mark_upload",
      94           94 :         cancel,
      95           94 :     )
      96          195 :     .await
      97           94 :     .ok_or_else(|| anyhow::anyhow!("Cancelled"))
      98           94 :     .and_then(|x| x)
      99           94 :     .context("mark_upload")?;
     100              : 
     101           94 :     Ok(())
     102           94 : }
     103              : 
     104           90 : async fn create_local_delete_mark(
     105           90 :     conf: &PageServerConf,
     106           90 :     tenant_shard_id: &TenantShardId,
     107           90 : ) -> Result<(), DeleteTenantError> {
     108           90 :     let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
     109           90 : 
     110           90 :     // Note: we're ok to replace existing file.
     111           90 :     let _ = std::fs::OpenOptions::new()
     112           90 :         .write(true)
     113           90 :         .create(true)
     114           90 :         .open(&marker_path)
     115           90 :         .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
     116              : 
     117           90 :     crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
     118              : 
     119           90 :     Ok(())
     120           90 : }
     121              : 
     122          107 : async fn schedule_ordered_timeline_deletions(
     123          107 :     tenant: &Arc<Tenant>,
     124          107 : ) -> Result<Vec<(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>, TimelineId)>, DeleteTenantError> {
     125          107 :     // Tenant is stopping at this point. We know it will be deleted.
     126          107 :     // No new timelines should be created.
     127          107 :     // Tree sort timelines to delete from leafs to the root.
     128          107 :     // NOTE: by calling clone we release the mutex which creates a possibility for a race: pending deletion
     129          107 :     // can complete and remove timeline from the map in between our call to clone
     130          107 :     // and `DeleteTimelineFlow::run`, so `run` wont find timeline in `timelines` map.
     131          107 :     // timelines.lock is currently synchronous so we cant hold it across await point.
     132          107 :     // So just ignore NotFound error if we get it from `run`.
     133          107 :     // Beware: in case it becomes async and we try to hold it here, `run` also locks it, which can create a deadlock.
     134          107 :     let timelines = tenant.timelines.lock().unwrap().clone();
     135          107 :     let sorted =
     136          163 :         tree_sort_timelines(timelines, |t| t.get_ancestor_timeline_id()).context("tree sort")?;
     137              : 
     138          107 :     let mut already_running_deletions = vec![];
     139              : 
     140          145 :     for (timeline_id, _) in sorted.into_iter().rev() {
     141          145 :         let span = tracing::info_span!("timeline_delete", %timeline_id);
     142          145 :         let res = DeleteTimelineFlow::run(tenant, timeline_id, true)
     143          145 :             .instrument(span)
     144         4908 :             .await;
     145          145 :         if let Err(e) = res {
     146           22 :             match e {
     147              :                 DeleteTimelineError::NotFound => {
     148              :                     // Timeline deletion finished after call to clone above but before call
     149              :                     // to `DeleteTimelineFlow::run` and removed timeline from the map.
     150            1 :                     continue;
     151              :                 }
     152            4 :                 DeleteTimelineError::AlreadyInProgress(guard) => {
     153            4 :                     already_running_deletions.push((guard, timeline_id));
     154            4 :                     continue;
     155              :                 }
     156           17 :                 e => return Err(DeleteTenantError::Timeline(e)),
     157              :             }
     158          123 :         }
     159              :     }
     160              : 
     161           90 :     Ok(already_running_deletions)
     162          107 : }
     163              : 
     164           80 : async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), DeleteTenantError> {
     165           80 :     // Assert timelines dir is empty.
     166           80 :     if !fs_ext::is_directory_empty(timelines_path).await? {
     167              :         // Display first 10 items in directory
     168            0 :         let list = fs_ext::list_dir(timelines_path).await.context("list_dir")?;
     169            0 :         let list = &list.into_iter().take(10).collect::<Vec<_>>();
     170            0 :         return Err(DeleteTenantError::Other(anyhow::anyhow!(
     171            0 :             "Timelines directory is not empty after all timelines deletion: {list:?}"
     172            0 :         )));
     173           80 :     }
     174           80 : 
     175           80 :     Ok(())
     176           80 : }
     177              : 
     178           86 : async fn remove_tenant_remote_delete_mark(
     179           86 :     conf: &PageServerConf,
     180           86 :     remote_storage: Option<&GenericRemoteStorage>,
     181           86 :     tenant_shard_id: &TenantShardId,
     182           86 :     cancel: &CancellationToken,
     183           86 : ) -> Result<(), DeleteTenantError> {
     184           86 :     if let Some(remote_storage) = remote_storage {
     185           86 :         let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
     186           86 :         backoff::retry(
     187          308 :             || async { remote_storage.delete(&path).await },
     188           86 :             |_e| false,
     189           86 :             FAILED_UPLOAD_WARN_THRESHOLD,
     190           86 :             FAILED_REMOTE_OP_RETRIES,
     191           86 :             "remove_tenant_remote_delete_mark",
     192           86 :             cancel,
     193           86 :         )
     194          308 :         .await
     195           86 :         .ok_or_else(|| anyhow::anyhow!("Cancelled"))
     196           86 :         .and_then(|x| x)
     197           86 :         .context("remove_tenant_remote_delete_mark")?;
     198            0 :     }
     199           86 :     Ok(())
     200           86 : }
     201              : 
     202              : // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
     203           82 : async fn cleanup_remaining_fs_traces(
     204           82 :     conf: &PageServerConf,
     205           82 :     tenant_shard_id: &TenantShardId,
     206           82 : ) -> Result<(), DeleteTenantError> {
     207          386 :     let rm = |p: Utf8PathBuf, is_dir: bool| async move {
     208          386 :         if is_dir {
     209          148 :             tokio::fs::remove_dir(&p).await
     210           82 :         } else {
     211          238 :             tokio::fs::remove_file(&p).await
     212           82 :         }
     213          386 :         .or_else(fs_ext::ignore_not_found)
     214          386 :         .with_context(|| format!("failed to delete {p}"))
     215          386 :     };
     216           82 : 
     217           82 :     rm(conf.tenant_config_path(tenant_shard_id), false).await?;
     218           82 :     rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
     219              : 
     220           82 :     fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
     221            4 :         Err(anyhow::anyhow!(
     222            4 :             "failpoint: tenant-delete-before-remove-timelines-dir"
     223            4 :         ))?
     224           82 :     });
     225              : 
     226           78 :     rm(conf.timelines_path(tenant_shard_id), true).await?;
     227              : 
     228           78 :     fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
     229            4 :         Err(anyhow::anyhow!(
     230            4 :             "failpoint: tenant-delete-before-remove-deleted-mark"
     231            4 :         ))?
     232           78 :     });
     233              : 
     234              :     // Make sure previous deletions are ordered before mark removal.
     235              :     // Otherwise there is no guarantee that they reach the disk before mark deletion.
     236              :     // So its possible for mark to reach disk first and for other deletions
     237              :     // to be reordered later and thus missed if a crash occurs.
     238              :     // Note that we dont need to sync after mark file is removed
     239              :     // because we can tolerate the case when mark file reappears on startup.
     240           74 :     let tenant_path = &conf.tenant_path(tenant_shard_id);
     241           74 :     if tenant_path.exists() {
     242           74 :         crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
     243          147 :             .await
     244           74 :             .context("fsync_pre_mark_remove")?;
     245            0 :     }
     246              : 
     247           74 :     rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
     248              : 
     249           74 :     fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
     250            4 :         Err(anyhow::anyhow!(
     251            4 :             "failpoint: tenant-delete-before-remove-tenant-dir"
     252            4 :         ))?
     253           74 :     });
     254              : 
     255           70 :     rm(conf.tenant_path(tenant_shard_id), true).await?;
     256              : 
     257           70 :     Ok(())
     258           82 : }
     259              : 
     260              : /// Orchestrates tenant shut down of all tasks, removes its in-memory structures,
     261              : /// and deletes its data from both disk and s3.
     262              : /// The sequence of steps:
     263              : /// 1. Upload remote deletion mark.
     264              : /// 2. Create local mark file.
     265              : /// 3. Shutdown tasks
     266              : /// 4. Run ordered timeline deletions
     267              : /// 5. Wait for timeline deletion operations that were scheduled before tenant deletion was requested
     268              : /// 6. Remove remote mark
     269              : /// 7. Cleanup remaining fs traces, tenant dir, config, timelines dir, local delete mark
     270              : /// It is resumable from any step in case a crash/restart occurs.
     271              : /// There are two entrypoints to the process:
     272              : /// 1. [`DeleteTenantFlow::run`] this is the main one called by a management api handler.
     273              : /// 2. [`DeleteTenantFlow::resume_from_attach`] is called when deletion is resumed tenant is found to be deleted during attach process.
     274              : ///  Note the only other place that messes around timeline delete mark is the `Tenant::spawn_load` function.
     275          942 : #[derive(Default)]
     276              : pub enum DeleteTenantFlow {
     277              :     #[default]
     278              :     NotStarted,
     279              :     InProgress,
     280              :     Finished,
     281              : }
     282              : 
     283              : impl DeleteTenantFlow {
     284              :     // These steps are run in the context of management api request handler.
     285              :     // Long running steps are continued to run in the background.
     286              :     // NB: If this fails half-way through, and is retried, the retry will go through
     287              :     // all the same steps again. Make sure the code here is idempotent, and don't
     288              :     // error out if some of the shutdown tasks have already been completed!
     289              :     // NOTE: static needed for background part.
     290              :     // We assume that calling code sets up the span with tenant_id.
     291          104 :     #[instrument(skip_all)]
     292              :     pub(crate) async fn run(
     293              :         conf: &'static PageServerConf,
     294              :         remote_storage: Option<GenericRemoteStorage>,
     295              :         tenants: &'static std::sync::RwLock<TenantsMap>,
     296              :         tenant: Arc<Tenant>,
     297              :     ) -> Result<(), DeleteTenantError> {
     298              :         span::debug_assert_current_span_has_tenant_id();
     299              : 
     300          104 :         pausable_failpoint!("tenant-delete-before-run");
     301              : 
     302              :         let mut guard = Self::prepare(&tenant).await?;
     303              : 
     304              :         if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await {
     305              :             tenant.set_broken(format!("{e:#}")).await;
     306              :             return Err(e);
     307              :         }
     308              : 
     309              :         Self::schedule_background(guard, conf, remote_storage, tenants, tenant);
     310              : 
     311              :         Ok(())
     312              :     }
     313              : 
     314              :     // Helper function needed to be able to match once on returned error and transition tenant into broken state.
     315              :     // This is needed because tenant.shutwodn is not idempotent. If tenant state is set to stopping another call to tenant.shutdown
     316              :     // will result in an error, but here we need to be able to retry shutdown when tenant deletion is retried.
     317              :     // So the solution is to set tenant state to broken.
     318           98 :     async fn run_inner(
     319           98 :         guard: &mut OwnedMutexGuard<Self>,
     320           98 :         conf: &'static PageServerConf,
     321           98 :         remote_storage: Option<&GenericRemoteStorage>,
     322           98 :         tenant: &Tenant,
     323           98 :     ) -> Result<(), DeleteTenantError> {
     324           98 :         guard.mark_in_progress()?;
     325              : 
     326           98 :         fail::fail_point!("tenant-delete-before-create-remote-mark", |_| {
     327            4 :             Err(anyhow::anyhow!(
     328            4 :                 "failpoint: tenant-delete-before-create-remote-mark"
     329            4 :             ))?
     330           98 :         });
     331              : 
     332              :         // IDEA: implement detach as delete without remote storage. Then they would use the same lock (deletion_progress) so wont contend.
     333              :         // Though sounds scary, different mark name?
     334              :         // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
     335           94 :         if let Some(remote_storage) = &remote_storage {
     336           94 :             create_remote_delete_mark(
     337           94 :                 conf,
     338           94 :                 remote_storage,
     339           94 :                 &tenant.tenant_shard_id,
     340           94 :                 // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
     341           94 :                 &CancellationToken::new(),
     342           94 :             )
     343          195 :             .await
     344           94 :             .context("remote_mark")?
     345            0 :         }
     346              : 
     347           94 :         fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
     348            4 :             Err(anyhow::anyhow!(
     349            4 :                 "failpoint: tenant-delete-before-create-local-mark"
     350            4 :             ))?
     351           94 :         });
     352              : 
     353           90 :         create_local_delete_mark(conf, &tenant.tenant_shard_id)
     354            0 :             .await
     355           90 :             .context("local delete mark")?;
     356              : 
     357           90 :         fail::fail_point!("tenant-delete-before-background", |_| {
     358            4 :             Err(anyhow::anyhow!(
     359            4 :                 "failpoint: tenant-delete-before-background"
     360            4 :             ))?
     361           90 :         });
     362              : 
     363           86 :         Ok(())
     364           98 :     }
     365              : 
     366           98 :     fn mark_in_progress(&mut self) -> anyhow::Result<()> {
     367           98 :         match self {
     368            0 :             Self::Finished => anyhow::bail!("Bug. Is in finished state"),
     369           24 :             Self::InProgress { .. } => { /* We're in a retry */ }
     370           74 :             Self::NotStarted => { /* Fresh start */ }
     371              :         }
     372              : 
     373           98 :         *self = Self::InProgress;
     374           98 : 
     375           98 :         Ok(())
     376           98 :     }
     377              : 
     378          851 :     pub(crate) async fn should_resume_deletion(
     379          851 :         conf: &'static PageServerConf,
     380          851 :         remote_mark_exists: bool,
     381          851 :         tenant: &Tenant,
     382          851 :     ) -> Result<Option<DeletionGuard>, DeleteTenantError> {
     383          851 :         let acquire = |t: &Tenant| {
     384           21 :             Some(
     385           21 :                 Arc::clone(&t.delete_progress)
     386           21 :                     .try_lock_owned()
     387           21 :                     .expect("we're the only owner during init"),
     388           21 :             )
     389          851 :         };
     390          851 : 
     391          851 :         if remote_mark_exists {
     392           15 :             return Ok(acquire(tenant));
     393          836 :         }
     394          836 : 
     395          836 :         // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
     396          836 :         if conf
     397          836 :             .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
     398          836 :             .exists()
     399              :         {
     400            6 :             Ok(acquire(tenant))
     401              :         } else {
     402          830 :             Ok(None)
     403              :         }
     404          851 :     }
     405              : 
     406           21 :     pub(crate) async fn resume_from_attach(
     407           21 :         guard: DeletionGuard,
     408           21 :         tenant: &Arc<Tenant>,
     409           21 :         preload: Option<TenantPreload>,
     410           21 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     411           21 :         ctx: &RequestContext,
     412           21 :     ) -> Result<(), DeleteTenantError> {
     413           21 :         let (_, progress) = completion::channel();
     414           21 : 
     415           21 :         tenant
     416           21 :             .set_stopping(progress, false, true)
     417            0 :             .await
     418           21 :             .expect("cant be stopping or broken");
     419           21 : 
     420           21 :         tenant
     421           21 :             .attach(preload, super::SpawnMode::Normal, ctx)
     422           40 :             .await
     423           21 :             .context("attach")?;
     424              : 
     425           21 :         Self::background(
     426           21 :             guard,
     427           21 :             tenant.conf,
     428           21 :             tenant.remote_storage.clone(),
     429           21 :             tenants,
     430           21 :             tenant,
     431           21 :         )
     432          749 :         .await
     433           21 :     }
     434              : 
     435          104 :     async fn prepare(
     436          104 :         tenant: &Arc<Tenant>,
     437          104 :     ) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
     438          104 :         // FIXME: unsure about active only. Our init jobs may not be cancellable properly,
     439          104 :         // so at least for now allow deletions only for active tenants. TODO recheck
     440          104 :         // Broken and Stopping is needed for retries.
     441          104 :         if !matches!(
     442          104 :             tenant.current_state(),
     443              :             TenantState::Active | TenantState::Broken { .. }
     444              :         ) {
     445            2 :             return Err(DeleteTenantError::InvalidState(tenant.current_state()));
     446          102 :         }
     447              : 
     448          102 :         let guard = Arc::clone(&tenant.delete_progress)
     449          102 :             .try_lock_owned()
     450          102 :             .map_err(|_| DeleteTenantError::AlreadyInProgress)?;
     451              : 
     452          102 :         fail::fail_point!("tenant-delete-before-shutdown", |_| {
     453            4 :             Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))?
     454          102 :         });
     455              : 
     456              :         // make pageserver shutdown not to wait for our completion
     457           98 :         let (_, progress) = completion::channel();
     458           98 : 
     459           98 :         // It would be good to only set stopping here and continue shutdown in the background, but shutdown is not idempotent.
     460           98 :         // i e it is an error to do:
     461           98 :         // tenant.set_stopping
     462           98 :         // tenant.shutdown
     463           98 :         // Its also bad that we're holding tenants.read here.
     464           98 :         // TODO relax set_stopping to be idempotent?
     465          229 :         if tenant.shutdown(progress, false).await.is_err() {
     466            0 :             return Err(DeleteTenantError::Other(anyhow::anyhow!(
     467            0 :                 "tenant shutdown is already in progress"
     468            0 :             )));
     469           98 :         }
     470           98 : 
     471           98 :         Ok(guard)
     472          104 :     }
     473              : 
     474           86 :     fn schedule_background(
     475           86 :         guard: OwnedMutexGuard<Self>,
     476           86 :         conf: &'static PageServerConf,
     477           86 :         remote_storage: Option<GenericRemoteStorage>,
     478           86 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     479           86 :         tenant: Arc<Tenant>,
     480           86 :     ) {
     481           86 :         let tenant_shard_id = tenant.tenant_shard_id;
     482           86 : 
     483           86 :         task_mgr::spawn(
     484           86 :             task_mgr::BACKGROUND_RUNTIME.handle(),
     485           86 :             TaskKind::TimelineDeletionWorker,
     486           86 :             Some(tenant_shard_id),
     487           86 :             None,
     488           86 :             "tenant_delete",
     489              :             false,
     490           86 :             async move {
     491           37 :                 if let Err(err) =
     492         5232 :                     Self::background(guard, conf, remote_storage, tenants, &tenant).await
     493              :                 {
     494           37 :                     error!("Error: {err:#}");
     495           37 :                     tenant.set_broken(format!("{err:#}")).await;
     496           49 :                 };
     497           86 :                 Ok(())
     498           86 :             }
     499           86 :             .instrument({
     500           86 :                 let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
     501           86 :                 span.follows_from(Span::current());
     502           86 :                 span
     503           86 :             }),
     504           86 :         );
     505           86 :     }
     506              : 
     507          107 :     async fn background(
     508          107 :         mut guard: OwnedMutexGuard<Self>,
     509          107 :         conf: &PageServerConf,
     510          107 :         remote_storage: Option<GenericRemoteStorage>,
     511          107 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     512          107 :         tenant: &Arc<Tenant>,
     513          107 :     ) -> Result<(), DeleteTenantError> {
     514              :         // Tree sort timelines, schedule delete for them. Mention retries from the console side.
     515              :         // Note that if deletion fails we dont mark timelines as broken,
     516              :         // the whole tenant will become broken as by `Self::schedule_background` logic
     517          107 :         let already_running_timeline_deletions = schedule_ordered_timeline_deletions(tenant)
     518         4908 :             .await
     519          107 :             .context("schedule_ordered_timeline_deletions")?;
     520              : 
     521           90 :         fail::fail_point!("tenant-delete-before-polling-ongoing-deletions", |_| {
     522            4 :             Err(anyhow::anyhow!(
     523            4 :                 "failpoint: tenant-delete-before-polling-ongoing-deletions"
     524            4 :             ))?
     525           90 :         });
     526              : 
     527              :         // Wait for deletions that were already running at the moment when tenant deletion was requested.
     528              :         // When we can lock deletion guard it means that corresponding timeline deletion finished.
     529           90 :         for (guard, timeline_id) in already_running_timeline_deletions {
     530            4 :             let flow = guard.lock().await;
     531            4 :             if !flow.is_finished() {
     532            0 :                 return Err(DeleteTenantError::Other(anyhow::anyhow!(
     533            0 :                     "already running timeline deletion failed: {timeline_id}"
     534            0 :                 )));
     535            4 :             }
     536              :         }
     537              : 
     538           86 :         let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
     539           86 :         // May not exist if we fail in cleanup_remaining_fs_traces after removing it
     540           86 :         if timelines_path.exists() {
     541              :             // sanity check to guard against layout changes
     542           80 :             ensure_timelines_dir_empty(&timelines_path)
     543           79 :                 .await
     544           80 :                 .context("timelines dir not empty")?;
     545            6 :         }
     546              : 
     547           86 :         remove_tenant_remote_delete_mark(
     548           86 :             conf,
     549           86 :             remote_storage.as_ref(),
     550           86 :             &tenant.tenant_shard_id,
     551           86 :             // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
     552           86 :             &CancellationToken::new(),
     553           86 :         )
     554          308 :         .await?;
     555              : 
     556           86 :         pausable_failpoint!("tenant-delete-before-cleanup-remaining-fs-traces-pausable");
     557           86 :         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
     558            4 :             Err(anyhow::anyhow!(
     559            4 :                 "failpoint: tenant-delete-before-cleanup-remaining-fs-traces"
     560            4 :             ))?
     561           86 :         });
     562              : 
     563           82 :         cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
     564          530 :             .await
     565           82 :             .context("cleanup_remaining_fs_traces")?;
     566              : 
     567              :         {
     568           70 :             pausable_failpoint!("tenant-delete-before-map-remove");
     569              : 
     570              :             // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
     571              :             // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
     572              :             //
     573              :             // This complexity will go away when we simplify how deletion works:
     574              :             // https://github.com/neondatabase/neon/issues/5080
     575              :             loop {
     576              :                 // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
     577              :                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
     578            1 :                 let barrier = {
     579           71 :                     let mut locked = tenants.write().unwrap();
     580           71 :                     let removed = locked.remove(tenant.tenant_shard_id);
     581           71 : 
     582           71 :                     // FIXME: we should not be modifying this from outside of mgr.rs.
     583           71 :                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
     584           71 :                     crate::metrics::TENANT_MANAGER
     585           71 :                         .tenant_slots
     586           71 :                         .set(locked.len() as u64);
     587              : 
     588           70 :                     match removed {
     589           70 :                         TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
     590           70 :                             match tenant.current_state() {
     591           70 :                                 TenantState::Stopping { .. } | TenantState::Broken { .. } => {
     592           70 :                                     // Expected: we put the tenant into stopping state before we start deleting it
     593           70 :                                 }
     594            0 :                                 state => {
     595              :                                     // Unexpected state
     596            0 :                                     tracing::warn!(
     597            0 :                                         "Tenant in unexpected state {state} after deletion"
     598            0 :                                     );
     599              :                                 }
     600              :                             }
     601           70 :                             break;
     602              :                         }
     603              :                         TenantsMapRemoveResult::Occupied(TenantSlot::Secondary(_)) => {
     604              :                             // This is unexpected: this secondary tenants should not have been created, and we
     605              :                             // are not in a position to shut it down from here.
     606            0 :                             tracing::warn!("Tenant transitioned to secondary mode while deleting!");
     607            0 :                             break;
     608              :                         }
     609              :                         TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
     610            0 :                             unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
     611              :                         }
     612              :                         TenantsMapRemoveResult::Vacant => {
     613            0 :                             tracing::warn!(
     614            0 :                                 "Tenant removed from TenantsMap before deletion completed"
     615            0 :                             );
     616            0 :                             break;
     617              :                         }
     618            1 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     619            1 :                             // An InProgress entry was found, we must wait on its barrier
     620            1 :                             barrier
     621              :                         }
     622              :                     }
     623              :                 };
     624              : 
     625            1 :                 tracing::info!(
     626            1 :                     "Waiting for competing operation to complete before deleting state for tenant"
     627            1 :                 );
     628            1 :                 barrier.wait().await;
     629              :             }
     630              :         }
     631              : 
     632           70 :         *guard = Self::Finished;
     633           70 : 
     634           70 :         Ok(())
     635          107 :     }
     636              : }
        

Generated by: LCOV version 2.1-beta