LCOV - differential code coverage report
Current view: top level - pageserver/src/tenant - delete.rs (source / functions) Coverage Total Hit UBC CBC
Current: cd44433dd675caa99df17a61b18949c8387e2242.info Lines: 93.0 % 400 372 28 372
Current Date: 2024-01-09 02:06:09 Functions: 76.8 % 69 53 16 53
Baseline: 66c52a629a0f4a503e193045e0df4c77139e344b.info
Baseline Date: 2024-01-08 15:34:46

           TLA  Line data    Source code
       1                 : use std::sync::Arc;
       2                 : 
       3                 : use anyhow::Context;
       4                 : use camino::{Utf8Path, Utf8PathBuf};
       5                 : use pageserver_api::{models::TenantState, shard::TenantShardId};
       6                 : use remote_storage::{GenericRemoteStorage, RemotePath};
       7                 : use tokio::sync::OwnedMutexGuard;
       8                 : use tokio_util::sync::CancellationToken;
       9                 : use tracing::{error, instrument, Instrument, Span};
      10                 : 
      11                 : use utils::{backoff, completion, crashsafe, fs_ext, id::TimelineId};
      12                 : 
      13                 : use crate::{
      14                 :     config::PageServerConf,
      15                 :     context::RequestContext,
      16                 :     task_mgr::{self, TaskKind},
      17                 :     tenant::mgr::{TenantSlot, TenantsMapRemoveResult},
      18                 : };
      19                 : 
      20                 : use super::{
      21                 :     mgr::{GetTenantError, TenantSlotError, TenantSlotUpsertError, TenantsMap},
      22                 :     remote_timeline_client::{FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD},
      23                 :     span,
      24                 :     timeline::delete::DeleteTimelineFlow,
      25                 :     tree_sort_timelines, DeleteTimelineError, Tenant, TenantPreload,
      26                 : };
      27                 : 
      28 CBC         204 : #[derive(Debug, thiserror::Error)]
      29                 : pub(crate) enum DeleteTenantError {
      30                 :     #[error("GetTenant {0}")]
      31                 :     Get(#[from] GetTenantError),
      32                 : 
      33                 :     #[error("Tenant not attached")]
      34                 :     NotAttached,
      35                 : 
      36                 :     #[error("Invalid state {0}. Expected Active or Broken")]
      37                 :     InvalidState(TenantState),
      38                 : 
      39                 :     #[error("Tenant deletion is already in progress")]
      40                 :     AlreadyInProgress,
      41                 : 
      42                 :     #[error("Tenant map slot error {0}")]
      43                 :     SlotError(#[from] TenantSlotError),
      44                 : 
      45                 :     #[error("Tenant map slot upsert error {0}")]
      46                 :     SlotUpsertError(#[from] TenantSlotUpsertError),
      47                 : 
      48                 :     #[error("Timeline {0}")]
      49                 :     Timeline(#[from] DeleteTimelineError),
      50                 : 
      51                 :     #[error("Cancelled")]
      52                 :     Cancelled,
      53                 : 
      54                 :     #[error(transparent)]
      55                 :     Other(#[from] anyhow::Error),
      56                 : }
      57                 : 
      58                 : type DeletionGuard = tokio::sync::OwnedMutexGuard<DeleteTenantFlow>;
      59                 : 
      60             152 : fn remote_tenant_delete_mark_path(
      61             152 :     conf: &PageServerConf,
      62             152 :     tenant_shard_id: &TenantShardId,
      63             152 : ) -> anyhow::Result<RemotePath> {
      64             152 :     let tenant_remote_path = conf
      65             152 :         .tenant_path(tenant_shard_id)
      66             152 :         .strip_prefix(&conf.workdir)
      67             152 :         .context("Failed to strip workdir prefix")
      68             152 :         .and_then(RemotePath::new)
      69             152 :         .context("tenant path")?;
      70             152 :     Ok(tenant_remote_path.join(Utf8Path::new("timelines/deleted")))
      71             152 : }
      72                 : 
      73              80 : async fn create_remote_delete_mark(
      74              80 :     conf: &PageServerConf,
      75              80 :     remote_storage: &GenericRemoteStorage,
      76              80 :     tenant_shard_id: &TenantShardId,
      77              80 :     cancel: &CancellationToken,
      78              80 : ) -> Result<(), DeleteTenantError> {
      79              80 :     let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
      80                 : 
      81              80 :     let data: &[u8] = &[];
      82              80 :     backoff::retry(
      83             118 :         || async {
      84             118 :             let data = bytes::Bytes::from_static(data);
      85             118 :             let stream = futures::stream::once(futures::future::ready(Ok(data)));
      86             118 :             remote_storage
      87             118 :                 .upload(stream, 0, &remote_mark_path, None)
      88             160 :                 .await
      89             118 :         },
      90              80 :         |_e| false,
      91              80 :         FAILED_UPLOAD_WARN_THRESHOLD,
      92              80 :         FAILED_REMOTE_OP_RETRIES,
      93              80 :         "mark_upload",
      94              80 :         backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
      95              80 :     )
      96             160 :     .await
      97              80 :     .context("mark_upload")?;
      98                 : 
      99              80 :     Ok(())
     100              80 : }
     101                 : 
     102              76 : async fn create_local_delete_mark(
     103              76 :     conf: &PageServerConf,
     104              76 :     tenant_shard_id: &TenantShardId,
     105              76 : ) -> Result<(), DeleteTenantError> {
     106              76 :     let marker_path = conf.tenant_deleted_mark_file_path(tenant_shard_id);
     107              76 : 
     108              76 :     // Note: we're ok to replace existing file.
     109              76 :     let _ = std::fs::OpenOptions::new()
     110              76 :         .write(true)
     111              76 :         .create(true)
     112              76 :         .open(&marker_path)
     113              76 :         .with_context(|| format!("could not create delete marker file {marker_path:?}"))?;
     114                 : 
     115              76 :     crashsafe::fsync_file_and_parent(&marker_path).context("sync_mark")?;
     116                 : 
     117              76 :     Ok(())
     118              76 : }
     119                 : 
     120              93 : async fn schedule_ordered_timeline_deletions(
     121              93 :     tenant: &Arc<Tenant>,
     122              93 : ) -> Result<Vec<(Arc<tokio::sync::Mutex<DeleteTimelineFlow>>, TimelineId)>, DeleteTenantError> {
     123              93 :     // Tenant is stopping at this point. We know it will be deleted.
     124              93 :     // No new timelines should be created.
     125              93 :     // Tree sort timelines to delete from leafs to the root.
     126              93 :     // NOTE: by calling clone we release the mutex which creates a possibility for a race: pending deletion
     127              93 :     // can complete and remove timeline from the map in between our call to clone
     128              93 :     // and `DeleteTimelineFlow::run`, so `run` wont find timeline in `timelines` map.
     129              93 :     // timelines.lock is currently synchronous so we cant hold it across await point.
     130              93 :     // So just ignore NotFound error if we get it from `run`.
     131              93 :     // Beware: in case it becomes async and we try to hold it here, `run` also locks it, which can create a deadlock.
     132              93 :     let timelines = tenant.timelines.lock().unwrap().clone();
     133              93 :     let sorted =
     134             147 :         tree_sort_timelines(timelines, |t| t.get_ancestor_timeline_id()).context("tree sort")?;
     135                 : 
     136              93 :     let mut already_running_deletions = vec![];
     137                 : 
     138             129 :     for (timeline_id, _) in sorted.into_iter().rev() {
     139            4617 :         if let Err(e) = DeleteTimelineFlow::run(tenant, timeline_id, true).await {
     140              22 :             match e {
     141                 :                 DeleteTimelineError::NotFound => {
     142                 :                     // Timeline deletion finished after call to clone above but before call
     143                 :                     // to `DeleteTimelineFlow::run` and removed timeline from the map.
     144               1 :                     continue;
     145                 :                 }
     146               4 :                 DeleteTimelineError::AlreadyInProgress(guard) => {
     147               4 :                     already_running_deletions.push((guard, timeline_id));
     148               4 :                     continue;
     149                 :                 }
     150              17 :                 e => return Err(DeleteTenantError::Timeline(e)),
     151                 :             }
     152             107 :         }
     153                 :     }
     154                 : 
     155              76 :     Ok(already_running_deletions)
     156              93 : }
     157                 : 
     158              66 : async fn ensure_timelines_dir_empty(timelines_path: &Utf8Path) -> Result<(), DeleteTenantError> {
     159              66 :     // Assert timelines dir is empty.
     160              66 :     if !fs_ext::is_directory_empty(timelines_path).await? {
     161                 :         // Display first 10 items in directory
     162 UBC           0 :         let list = fs_ext::list_dir(timelines_path).await.context("list_dir")?;
     163               0 :         let list = &list.into_iter().take(10).collect::<Vec<_>>();
     164               0 :         return Err(DeleteTenantError::Other(anyhow::anyhow!(
     165               0 :             "Timelines directory is not empty after all timelines deletion: {list:?}"
     166               0 :         )));
     167 CBC          66 :     }
     168              66 : 
     169              66 :     Ok(())
     170              66 : }
     171                 : 
     172              72 : async fn remove_tenant_remote_delete_mark(
     173              72 :     conf: &PageServerConf,
     174              72 :     remote_storage: Option<&GenericRemoteStorage>,
     175              72 :     tenant_shard_id: &TenantShardId,
     176              72 :     cancel: &CancellationToken,
     177              72 : ) -> Result<(), DeleteTenantError> {
     178              72 :     if let Some(remote_storage) = remote_storage {
     179              72 :         let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
     180              72 :         backoff::retry(
     181             280 :             || async { remote_storage.delete(&path).await },
     182              72 :             |_e| false,
     183              72 :             FAILED_UPLOAD_WARN_THRESHOLD,
     184              72 :             FAILED_REMOTE_OP_RETRIES,
     185              72 :             "remove_tenant_remote_delete_mark",
     186              72 :             backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
     187              72 :         )
     188             280 :         .await
     189              72 :         .context("remove_tenant_remote_delete_mark")?;
     190 UBC           0 :     }
     191 CBC          72 :     Ok(())
     192              72 : }
     193                 : 
     194                 : // Cleanup fs traces: tenant config, timelines dir local delete mark, tenant dir
     195              68 : async fn cleanup_remaining_fs_traces(
     196              68 :     conf: &PageServerConf,
     197              68 :     tenant_shard_id: &TenantShardId,
     198              68 : ) -> Result<(), DeleteTenantError> {
     199             316 :     let rm = |p: Utf8PathBuf, is_dir: bool| async move {
     200             316 :         if is_dir {
     201             120 :             tokio::fs::remove_dir(&p).await
     202              68 :         } else {
     203             196 :             tokio::fs::remove_file(&p).await
     204              68 :         }
     205             316 :         .or_else(fs_ext::ignore_not_found)
     206             316 :         .with_context(|| format!("failed to delete {p}"))
     207             316 :     };
     208              68 : 
     209              68 :     rm(conf.tenant_config_path(tenant_shard_id), false).await?;
     210              68 :     rm(conf.tenant_location_config_path(tenant_shard_id), false).await?;
     211                 : 
     212              68 :     fail::fail_point!("tenant-delete-before-remove-timelines-dir", |_| {
     213               4 :         Err(anyhow::anyhow!(
     214               4 :             "failpoint: tenant-delete-before-remove-timelines-dir"
     215               4 :         ))?
     216              68 :     });
     217                 : 
     218              64 :     rm(conf.timelines_path(tenant_shard_id), true).await?;
     219                 : 
     220              64 :     fail::fail_point!("tenant-delete-before-remove-deleted-mark", |_| {
     221               4 :         Err(anyhow::anyhow!(
     222               4 :             "failpoint: tenant-delete-before-remove-deleted-mark"
     223               4 :         ))?
     224              64 :     });
     225                 : 
     226                 :     // Make sure previous deletions are ordered before mark removal.
     227                 :     // Otherwise there is no guarantee that they reach the disk before mark deletion.
     228                 :     // So its possible for mark to reach disk first and for other deletions
     229                 :     // to be reordered later and thus missed if a crash occurs.
     230                 :     // Note that we dont need to sync after mark file is removed
     231                 :     // because we can tolerate the case when mark file reappears on startup.
     232              60 :     let tenant_path = &conf.tenant_path(tenant_shard_id);
     233              60 :     if tenant_path.exists() {
     234              60 :         crashsafe::fsync_async(&conf.tenant_path(tenant_shard_id))
     235             120 :             .await
     236              60 :             .context("fsync_pre_mark_remove")?;
     237 UBC           0 :     }
     238                 : 
     239 CBC          60 :     rm(conf.tenant_deleted_mark_file_path(tenant_shard_id), false).await?;
     240                 : 
     241              60 :     fail::fail_point!("tenant-delete-before-remove-tenant-dir", |_| {
     242               4 :         Err(anyhow::anyhow!(
     243               4 :             "failpoint: tenant-delete-before-remove-tenant-dir"
     244               4 :         ))?
     245              60 :     });
     246                 : 
     247              56 :     rm(conf.tenant_path(tenant_shard_id), true).await?;
     248                 : 
     249              56 :     Ok(())
     250              68 : }
     251                 : 
     252                 : /// Orchestrates tenant shut down of all tasks, removes its in-memory structures,
     253                 : /// and deletes its data from both disk and s3.
     254                 : /// The sequence of steps:
     255                 : /// 1. Upload remote deletion mark.
     256                 : /// 2. Create local mark file.
     257                 : /// 3. Shutdown tasks
     258                 : /// 4. Run ordered timeline deletions
     259                 : /// 5. Wait for timeline deletion operations that were scheduled before tenant deletion was requested
     260                 : /// 6. Remove remote mark
     261                 : /// 7. Cleanup remaining fs traces, tenant dir, config, timelines dir, local delete mark
     262                 : /// It is resumable from any step in case a crash/restart occurs.
     263                 : /// There are two entrypoints to the process:
     264                 : /// 1. [`DeleteTenantFlow::run`] this is the main one called by a management api handler.
     265                 : /// 2. [`DeleteTenantFlow::resume_from_attach`] is called when deletion is resumed tenant is found to be deleted during attach process.
     266                 : ///  Note the only other place that messes around timeline delete mark is the `Tenant::spawn_load` function.
     267             778 : #[derive(Default)]
     268                 : pub enum DeleteTenantFlow {
     269                 :     #[default]
     270                 :     NotStarted,
     271                 :     InProgress,
     272                 :     Finished,
     273                 : }
     274                 : 
     275                 : impl DeleteTenantFlow {
     276                 :     // These steps are run in the context of management api request handler.
     277                 :     // Long running steps are continued to run in the background.
     278                 :     // NB: If this fails half-way through, and is retried, the retry will go through
     279                 :     // all the same steps again. Make sure the code here is idempotent, and don't
     280                 :     // error out if some of the shutdown tasks have already been completed!
     281                 :     // NOTE: static needed for background part.
     282                 :     // We assume that calling code sets up the span with tenant_id.
     283              90 :     #[instrument(skip_all)]
     284                 :     pub(crate) async fn run(
     285                 :         conf: &'static PageServerConf,
     286                 :         remote_storage: Option<GenericRemoteStorage>,
     287                 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     288                 :         tenant: Arc<Tenant>,
     289                 :     ) -> Result<(), DeleteTenantError> {
     290                 :         span::debug_assert_current_span_has_tenant_id();
     291                 : 
     292              90 :         pausable_failpoint!("tenant-delete-before-run");
     293                 : 
     294                 :         let mut guard = Self::prepare(&tenant).await?;
     295                 : 
     296                 :         if let Err(e) = Self::run_inner(&mut guard, conf, remote_storage.as_ref(), &tenant).await {
     297                 :             tenant.set_broken(format!("{e:#}")).await;
     298                 :             return Err(e);
     299                 :         }
     300                 : 
     301                 :         Self::schedule_background(guard, conf, remote_storage, tenants, tenant);
     302                 : 
     303                 :         Ok(())
     304                 :     }
     305                 : 
     306                 :     // Helper function needed to be able to match once on returned error and transition tenant into broken state.
     307                 :     // This is needed because tenant.shutwodn is not idempotent. If tenant state is set to stopping another call to tenant.shutdown
     308                 :     // will result in an error, but here we need to be able to retry shutdown when tenant deletion is retried.
     309                 :     // So the solution is to set tenant state to broken.
     310              84 :     async fn run_inner(
     311              84 :         guard: &mut OwnedMutexGuard<Self>,
     312              84 :         conf: &'static PageServerConf,
     313              84 :         remote_storage: Option<&GenericRemoteStorage>,
     314              84 :         tenant: &Tenant,
     315              84 :     ) -> Result<(), DeleteTenantError> {
     316              84 :         guard.mark_in_progress()?;
     317                 : 
     318              84 :         fail::fail_point!("tenant-delete-before-create-remote-mark", |_| {
     319               4 :             Err(anyhow::anyhow!(
     320               4 :                 "failpoint: tenant-delete-before-create-remote-mark"
     321               4 :             ))?
     322              84 :         });
     323                 : 
     324                 :         // IDEA: implement detach as delete without remote storage. Then they would use the same lock (deletion_progress) so wont contend.
     325                 :         // Though sounds scary, different mark name?
     326                 :         // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
     327              80 :         if let Some(remote_storage) = &remote_storage {
     328              80 :             create_remote_delete_mark(
     329              80 :                 conf,
     330              80 :                 remote_storage,
     331              80 :                 &tenant.tenant_shard_id,
     332              80 :                 // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
     333              80 :                 &CancellationToken::new(),
     334              80 :             )
     335             160 :             .await
     336              80 :             .context("remote_mark")?
     337 UBC           0 :         }
     338                 : 
     339 CBC          80 :         fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
     340               4 :             Err(anyhow::anyhow!(
     341               4 :                 "failpoint: tenant-delete-before-create-local-mark"
     342               4 :             ))?
     343              80 :         });
     344                 : 
     345              76 :         create_local_delete_mark(conf, &tenant.tenant_shard_id)
     346 UBC           0 :             .await
     347 CBC          76 :             .context("local delete mark")?;
     348                 : 
     349              76 :         fail::fail_point!("tenant-delete-before-background", |_| {
     350               4 :             Err(anyhow::anyhow!(
     351               4 :                 "failpoint: tenant-delete-before-background"
     352               4 :             ))?
     353              76 :         });
     354                 : 
     355              72 :         Ok(())
     356              84 :     }
     357                 : 
     358              84 :     fn mark_in_progress(&mut self) -> anyhow::Result<()> {
     359              84 :         match self {
     360 UBC           0 :             Self::Finished => anyhow::bail!("Bug. Is in finished state"),
     361 CBC          24 :             Self::InProgress { .. } => { /* We're in a retry */ }
     362              60 :             Self::NotStarted => { /* Fresh start */ }
     363                 :         }
     364                 : 
     365              84 :         *self = Self::InProgress;
     366              84 : 
     367              84 :         Ok(())
     368              84 :     }
     369                 : 
     370             730 :     pub(crate) async fn should_resume_deletion(
     371             730 :         conf: &'static PageServerConf,
     372             730 :         remote_mark_exists: bool,
     373             730 :         tenant: &Tenant,
     374             730 :     ) -> Result<Option<DeletionGuard>, DeleteTenantError> {
     375             730 :         let acquire = |t: &Tenant| {
     376              21 :             Some(
     377              21 :                 Arc::clone(&t.delete_progress)
     378              21 :                     .try_lock_owned()
     379              21 :                     .expect("we're the only owner during init"),
     380              21 :             )
     381             730 :         };
     382             730 : 
     383             730 :         if remote_mark_exists {
     384              15 :             return Ok(acquire(tenant));
     385             715 :         }
     386             715 : 
     387             715 :         // Check local mark first, if its there there is no need to go to s3 to check whether remote one exists.
     388             715 :         if conf
     389             715 :             .tenant_deleted_mark_file_path(&tenant.tenant_shard_id)
     390             715 :             .exists()
     391                 :         {
     392               6 :             Ok(acquire(tenant))
     393                 :         } else {
     394             709 :             Ok(None)
     395                 :         }
     396             730 :     }
     397                 : 
     398              21 :     pub(crate) async fn resume_from_attach(
     399              21 :         guard: DeletionGuard,
     400              21 :         tenant: &Arc<Tenant>,
     401              21 :         preload: Option<TenantPreload>,
     402              21 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     403              21 :         ctx: &RequestContext,
     404              21 :     ) -> Result<(), DeleteTenantError> {
     405              21 :         let (_, progress) = completion::channel();
     406              21 : 
     407              21 :         tenant
     408              21 :             .set_stopping(progress, false, true)
     409 UBC           0 :             .await
     410 CBC          21 :             .expect("cant be stopping or broken");
     411              21 : 
     412              40 :         tenant.attach(preload, ctx).await.context("attach")?;
     413                 : 
     414              21 :         Self::background(
     415              21 :             guard,
     416              21 :             tenant.conf,
     417              21 :             tenant.remote_storage.clone(),
     418              21 :             tenants,
     419              21 :             tenant,
     420              21 :         )
     421             723 :         .await
     422              21 :     }
     423                 : 
     424              90 :     async fn prepare(
     425              90 :         tenant: &Arc<Tenant>,
     426              90 :     ) -> Result<tokio::sync::OwnedMutexGuard<Self>, DeleteTenantError> {
     427              90 :         // FIXME: unsure about active only. Our init jobs may not be cancellable properly,
     428              90 :         // so at least for now allow deletions only for active tenants. TODO recheck
     429              90 :         // Broken and Stopping is needed for retries.
     430              90 :         if !matches!(
     431              90 :             tenant.current_state(),
     432                 :             TenantState::Active | TenantState::Broken { .. }
     433                 :         ) {
     434               2 :             return Err(DeleteTenantError::InvalidState(tenant.current_state()));
     435              88 :         }
     436                 : 
     437              88 :         let guard = Arc::clone(&tenant.delete_progress)
     438              88 :             .try_lock_owned()
     439              88 :             .map_err(|_| DeleteTenantError::AlreadyInProgress)?;
     440                 : 
     441              88 :         fail::fail_point!("tenant-delete-before-shutdown", |_| {
     442               4 :             Err(anyhow::anyhow!("failpoint: tenant-delete-before-shutdown"))?
     443              88 :         });
     444                 : 
     445                 :         // make pageserver shutdown not to wait for our completion
     446              84 :         let (_, progress) = completion::channel();
     447              84 : 
     448              84 :         // It would be good to only set stopping here and continue shutdown in the background, but shutdown is not idempotent.
     449              84 :         // i e it is an error to do:
     450              84 :         // tenant.set_stopping
     451              84 :         // tenant.shutdown
     452              84 :         // Its also bad that we're holding tenants.read here.
     453              84 :         // TODO relax set_stopping to be idempotent?
     454             196 :         if tenant.shutdown(progress, false).await.is_err() {
     455 UBC           0 :             return Err(DeleteTenantError::Other(anyhow::anyhow!(
     456               0 :                 "tenant shutdown is already in progress"
     457               0 :             )));
     458 CBC          84 :         }
     459              84 : 
     460              84 :         Ok(guard)
     461              90 :     }
     462                 : 
     463              72 :     fn schedule_background(
     464              72 :         guard: OwnedMutexGuard<Self>,
     465              72 :         conf: &'static PageServerConf,
     466              72 :         remote_storage: Option<GenericRemoteStorage>,
     467              72 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     468              72 :         tenant: Arc<Tenant>,
     469              72 :     ) {
     470              72 :         let tenant_shard_id = tenant.tenant_shard_id;
     471              72 : 
     472              72 :         task_mgr::spawn(
     473              72 :             task_mgr::BACKGROUND_RUNTIME.handle(),
     474              72 :             TaskKind::TimelineDeletionWorker,
     475              72 :             Some(tenant_shard_id),
     476              72 :             None,
     477              72 :             "tenant_delete",
     478                 :             false,
     479              72 :             async move {
     480              37 :                 if let Err(err) =
     481            4733 :                     Self::background(guard, conf, remote_storage, tenants, &tenant).await
     482                 :                 {
     483              37 :                     error!("Error: {err:#}");
     484              37 :                     tenant.set_broken(format!("{err:#}")).await;
     485              35 :                 };
     486              72 :                 Ok(())
     487              72 :             }
     488              72 :             .instrument({
     489              72 :                 let span = tracing::info_span!(parent: None, "delete_tenant", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug());
     490              72 :                 span.follows_from(Span::current());
     491              72 :                 span
     492              72 :             }),
     493              72 :         );
     494              72 :     }
     495                 : 
     496              93 :     async fn background(
     497              93 :         mut guard: OwnedMutexGuard<Self>,
     498              93 :         conf: &PageServerConf,
     499              93 :         remote_storage: Option<GenericRemoteStorage>,
     500              93 :         tenants: &'static std::sync::RwLock<TenantsMap>,
     501              93 :         tenant: &Arc<Tenant>,
     502              93 :     ) -> Result<(), DeleteTenantError> {
     503                 :         // Tree sort timelines, schedule delete for them. Mention retries from the console side.
     504                 :         // Note that if deletion fails we dont mark timelines as broken,
     505                 :         // the whole tenant will become broken as by `Self::schedule_background` logic
     506              93 :         let already_running_timeline_deletions = schedule_ordered_timeline_deletions(tenant)
     507            4617 :             .await
     508              93 :             .context("schedule_ordered_timeline_deletions")?;
     509                 : 
     510              76 :         fail::fail_point!("tenant-delete-before-polling-ongoing-deletions", |_| {
     511               4 :             Err(anyhow::anyhow!(
     512               4 :                 "failpoint: tenant-delete-before-polling-ongoing-deletions"
     513               4 :             ))?
     514              76 :         });
     515                 : 
     516                 :         // Wait for deletions that were already running at the moment when tenant deletion was requested.
     517                 :         // When we can lock deletion guard it means that corresponding timeline deletion finished.
     518              76 :         for (guard, timeline_id) in already_running_timeline_deletions {
     519               4 :             let flow = guard.lock().await;
     520               4 :             if !flow.is_finished() {
     521 UBC           0 :                 return Err(DeleteTenantError::Other(anyhow::anyhow!(
     522               0 :                     "already running timeline deletion failed: {timeline_id}"
     523               0 :                 )));
     524 CBC           4 :             }
     525                 :         }
     526                 : 
     527              72 :         let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
     528              72 :         // May not exist if we fail in cleanup_remaining_fs_traces after removing it
     529              72 :         if timelines_path.exists() {
     530                 :             // sanity check to guard against layout changes
     531              66 :             ensure_timelines_dir_empty(&timelines_path)
     532              66 :                 .await
     533              66 :                 .context("timelines dir not empty")?;
     534               6 :         }
     535                 : 
     536              72 :         remove_tenant_remote_delete_mark(
     537              72 :             conf,
     538              72 :             remote_storage.as_ref(),
     539              72 :             &tenant.tenant_shard_id,
     540              72 :             // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
     541              72 :             &CancellationToken::new(),
     542              72 :         )
     543             280 :         .await?;
     544                 : 
     545              72 :         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
     546               4 :             Err(anyhow::anyhow!(
     547               4 :                 "failpoint: tenant-delete-before-cleanup-remaining-fs-traces"
     548               4 :             ))?
     549              72 :         });
     550                 : 
     551              68 :         cleanup_remaining_fs_traces(conf, &tenant.tenant_shard_id)
     552             436 :             .await
     553              68 :             .context("cleanup_remaining_fs_traces")?;
     554                 : 
     555                 :         {
     556              56 :             pausable_failpoint!("tenant-delete-before-map-remove");
     557                 : 
     558                 :             // This block is simply removing the TenantSlot for this tenant.  It requires a loop because
     559                 :             // we might conflict with a TenantSlot::InProgress marker and need to wait for it.
     560                 :             //
     561                 :             // This complexity will go away when we simplify how deletion works:
     562                 :             // https://github.com/neondatabase/neon/issues/5080
     563                 :             loop {
     564                 :                 // Under the TenantMap lock, try to remove the tenant.  We usually succeed, but if
     565                 :                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
     566               1 :                 let barrier = {
     567              57 :                     let mut locked = tenants.write().unwrap();
     568              57 :                     let removed = locked.remove(tenant.tenant_shard_id);
     569              57 : 
     570              57 :                     // FIXME: we should not be modifying this from outside of mgr.rs.
     571              57 :                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
     572              57 :                     crate::metrics::TENANT_MANAGER
     573              57 :                         .tenant_slots
     574              57 :                         .set(locked.len() as u64);
     575                 : 
     576              56 :                     match removed {
     577              56 :                         TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
     578              56 :                             match tenant.current_state() {
     579              56 :                                 TenantState::Stopping { .. } | TenantState::Broken { .. } => {
     580              56 :                                     // Expected: we put the tenant into stopping state before we start deleting it
     581              56 :                                 }
     582 UBC           0 :                                 state => {
     583                 :                                     // Unexpected state
     584               0 :                                     tracing::warn!(
     585               0 :                                         "Tenant in unexpected state {state} after deletion"
     586               0 :                                     );
     587                 :                                 }
     588                 :                             }
     589 CBC          56 :                             break;
     590                 :                         }
     591                 :                         TenantsMapRemoveResult::Occupied(TenantSlot::Secondary(_)) => {
     592                 :                             // This is unexpected: this secondary tenants should not have been created, and we
     593                 :                             // are not in a position to shut it down from here.
     594 UBC           0 :                             tracing::warn!("Tenant transitioned to secondary mode while deleting!");
     595               0 :                             break;
     596                 :                         }
     597                 :                         TenantsMapRemoveResult::Occupied(TenantSlot::InProgress(_)) => {
     598               0 :                             unreachable!("TenantsMap::remove handles InProgress separately, should never return it here");
     599                 :                         }
     600                 :                         TenantsMapRemoveResult::Vacant => {
     601               0 :                             tracing::warn!(
     602               0 :                                 "Tenant removed from TenantsMap before deletion completed"
     603               0 :                             );
     604               0 :                             break;
     605                 :                         }
     606 CBC           1 :                         TenantsMapRemoveResult::InProgress(barrier) => {
     607               1 :                             // An InProgress entry was found, we must wait on its barrier
     608               1 :                             barrier
     609                 :                         }
     610                 :                     }
     611                 :                 };
     612                 : 
     613               1 :                 tracing::info!(
     614               1 :                     "Waiting for competing operation to complete before deleting state for tenant"
     615               1 :                 );
     616               1 :                 barrier.wait().await;
     617                 :             }
     618                 :         }
     619                 : 
     620              56 :         *guard = Self::Finished;
     621              56 : 
     622              56 :         Ok(())
     623              93 :     }
     624                 : }
        

Generated by: LCOV version 2.1-beta