LCOV - code coverage report
Current view: top level - pageserver/src/tenant/timeline - uninit.rs (source / functions) Coverage Total Hit
Test: 050dd70dd490b28fffe527eae9fb8a1222b5c59c.info Lines: 70.2 % 131 92
Test Date: 2024-06-25 21:28:46 Functions: 50.0 % 18 9

            Line data    Source code
       1              : use std::{collections::hash_map::Entry, fs, sync::Arc};
       2              : 
       3              : use anyhow::Context;
       4              : use camino::Utf8PathBuf;
       5              : use tracing::{error, info, info_span};
       6              : use utils::{fs_ext, id::TimelineId, lsn::Lsn};
       7              : 
       8              : use crate::{context::RequestContext, import_datadir, tenant::Tenant};
       9              : 
      10              : use super::Timeline;
      11              : 
      12              : /// A timeline with some of its files on disk, being initialized.
      13              : /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
      14              : /// its local files are removed.  If we crash while this class exists, then the timeline's local
      15              : /// state is cleaned up during [`Tenant::clean_up_timelines`], because the timeline's content isn't in remote storage.
      16              : ///
      17              : /// The caller is responsible for proper timeline data filling before the final init.
      18              : #[must_use]
      19              : pub struct UninitializedTimeline<'t> {
      20              :     pub(crate) owning_tenant: &'t Tenant,
      21              :     timeline_id: TimelineId,
      22              :     raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
      23              : }
      24              : 
      25              : impl<'t> UninitializedTimeline<'t> {
      26          375 :     pub(crate) fn new(
      27          375 :         owning_tenant: &'t Tenant,
      28          375 :         timeline_id: TimelineId,
      29          375 :         raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
      30          375 :     ) -> Self {
      31          375 :         Self {
      32          375 :             owning_tenant,
      33          375 :             timeline_id,
      34          375 :             raw_timeline,
      35          375 :         }
      36          375 :     }
      37              : 
      38              :     /// Finish timeline creation: insert it into the Tenant's timelines map
      39              :     ///
      40              :     /// This function launches the flush loop if not already done.
      41              :     ///
      42              :     /// The caller is responsible for activating the timeline (function `.activate()`).
      43          366 :     pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
      44          366 :         let timeline_id = self.timeline_id;
      45          366 :         let tenant_shard_id = self.owning_tenant.tenant_shard_id;
      46          366 : 
      47          366 :         if self.raw_timeline.is_none() {
      48            0 :             return Err(anyhow::anyhow!(
      49            0 :                 "No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
      50            0 :             ));
      51          366 :         }
      52          366 : 
      53          366 :         // Check that the caller initialized disk_consistent_lsn
      54          366 :         let new_disk_consistent_lsn = self
      55          366 :             .raw_timeline
      56          366 :             .as_ref()
      57          366 :             .expect("checked above")
      58          366 :             .0
      59          366 :             .get_disk_consistent_lsn();
      60          366 : 
      61          366 :         anyhow::ensure!(
      62          366 :             new_disk_consistent_lsn.is_valid(),
      63            0 :             "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
      64              :         );
      65              : 
      66          366 :         let mut timelines = self.owning_tenant.timelines.lock().unwrap();
      67          366 :         match timelines.entry(timeline_id) {
      68            0 :             Entry::Occupied(_) => anyhow::bail!(
      69            0 :                 "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
      70            0 :             ),
      71          366 :             Entry::Vacant(v) => {
      72          366 :                 // after taking here should be no fallible operations, because the drop guard will not
      73          366 :                 // cleanup after and would block for example the tenant deletion
      74          366 :                 let (new_timeline, _create_guard) =
      75          366 :                     self.raw_timeline.take().expect("already checked");
      76          366 : 
      77          366 :                 v.insert(Arc::clone(&new_timeline));
      78          366 : 
      79          366 :                 new_timeline.maybe_spawn_flush_loop();
      80          366 : 
      81          366 :                 Ok(new_timeline)
      82              :             }
      83              :         }
      84          366 :     }
      85              : 
      86              :     /// Prepares timeline data by loading it from the basebackup archive.
      87            0 :     pub(crate) async fn import_basebackup_from_tar(
      88            0 :         self,
      89            0 :         tenant: Arc<Tenant>,
      90            0 :         copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
      91            0 :         base_lsn: Lsn,
      92            0 :         broker_client: storage_broker::BrokerClientChannel,
      93            0 :         ctx: &RequestContext,
      94            0 :     ) -> anyhow::Result<Arc<Timeline>> {
      95            0 :         let raw_timeline = self.raw_timeline()?;
      96              : 
      97            0 :         import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
      98            0 :             .await
      99            0 :             .context("Failed to import basebackup")?;
     100              : 
     101              :         // Flush the new layer files to disk, before we make the timeline as available to
     102              :         // the outside world.
     103              :         //
     104              :         // Flush loop needs to be spawned in order to be able to flush.
     105            0 :         raw_timeline.maybe_spawn_flush_loop();
     106            0 : 
     107            0 :         fail::fail_point!("before-checkpoint-new-timeline", |_| {
     108            0 :             anyhow::bail!("failpoint before-checkpoint-new-timeline");
     109            0 :         });
     110              : 
     111            0 :         raw_timeline
     112            0 :             .freeze_and_flush()
     113            0 :             .await
     114            0 :             .context("Failed to flush after basebackup import")?;
     115              : 
     116              :         // All the data has been imported. Insert the Timeline into the tenant's timelines map
     117            0 :         let tl = self.finish_creation()?;
     118            0 :         tl.activate(tenant, broker_client, None, ctx);
     119            0 :         Ok(tl)
     120            0 :     }
     121              : 
     122          151 :     pub(crate) fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
     123          151 :         Ok(&self
     124          151 :             .raw_timeline
     125          151 :             .as_ref()
     126          151 :             .with_context(|| {
     127            0 :                 format!(
     128            0 :                     "No raw timeline {}/{} found",
     129            0 :                     self.owning_tenant.tenant_shard_id, self.timeline_id
     130            0 :                 )
     131          151 :             })?
     132              :             .0)
     133          151 :     }
     134              : }
     135              : 
     136              : impl Drop for UninitializedTimeline<'_> {
     137          373 :     fn drop(&mut self) {
     138          373 :         if let Some((_, create_guard)) = self.raw_timeline.take() {
     139            7 :             let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
     140            7 :             error!("Timeline got dropped without initializing, cleaning its files");
     141            7 :             cleanup_timeline_directory(create_guard);
     142          366 :         }
     143          373 :     }
     144              : }
     145              : 
     146            8 : pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
     147            8 :     let timeline_path = &create_guard.timeline_path;
     148            8 :     match fs_ext::ignore_absent_files(|| fs::remove_dir_all(timeline_path)) {
     149              :         Ok(()) => {
     150            8 :             info!("Timeline dir {timeline_path:?} removed successfully")
     151              :         }
     152            0 :         Err(e) => {
     153            0 :             error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
     154              :         }
     155              :     }
     156              :     // Having cleaned up, we can release this TimelineId in `[Tenant::timelines_creating]` to allow other
     157              :     // timeline creation attempts under this TimelineId to proceed
     158            8 :     drop(create_guard);
     159            8 : }
     160              : 
     161              : /// A guard for timeline creations in process: as long as this object exists, the timeline ID
     162              : /// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
     163              : #[must_use]
     164              : pub(crate) struct TimelineCreateGuard<'t> {
     165              :     owning_tenant: &'t Tenant,
     166              :     timeline_id: TimelineId,
     167              :     pub(crate) timeline_path: Utf8PathBuf,
     168              : }
     169              : 
     170              : /// Errors when acquiring exclusive access to a timeline ID for creation
     171            2 : #[derive(thiserror::Error, Debug)]
     172              : pub(crate) enum TimelineExclusionError {
     173              :     #[error("Already exists")]
     174              :     AlreadyExists(Arc<Timeline>),
     175              :     #[error("Already creating")]
     176              :     AlreadyCreating,
     177              : 
     178              :     // e.g. I/O errors, or some failure deep in postgres initdb
     179              :     #[error(transparent)]
     180              :     Other(#[from] anyhow::Error),
     181              : }
     182              : 
     183              : impl<'t> TimelineCreateGuard<'t> {
     184          382 :     pub(crate) fn new(
     185          382 :         owning_tenant: &'t Tenant,
     186          382 :         timeline_id: TimelineId,
     187          382 :         timeline_path: Utf8PathBuf,
     188          382 :     ) -> Result<Self, TimelineExclusionError> {
     189          382 :         // Lock order: this is the only place we take both locks.  During drop() we only
     190          382 :         // lock creating_timelines
     191          382 :         let timelines = owning_tenant.timelines.lock().unwrap();
     192          382 :         let mut creating_timelines: std::sync::MutexGuard<
     193          382 :             '_,
     194          382 :             std::collections::HashSet<TimelineId>,
     195          382 :         > = owning_tenant.timelines_creating.lock().unwrap();
     196              : 
     197          382 :         if let Some(existing) = timelines.get(&timeline_id) {
     198            2 :             Err(TimelineExclusionError::AlreadyExists(existing.clone()))
     199          380 :         } else if creating_timelines.contains(&timeline_id) {
     200            0 :             Err(TimelineExclusionError::AlreadyCreating)
     201              :         } else {
     202          380 :             creating_timelines.insert(timeline_id);
     203          380 :             Ok(Self {
     204          380 :                 owning_tenant,
     205          380 :                 timeline_id,
     206          380 :                 timeline_path,
     207          380 :             })
     208              :         }
     209          382 :     }
     210              : }
     211              : 
     212              : impl Drop for TimelineCreateGuard<'_> {
     213          378 :     fn drop(&mut self) {
     214          378 :         self.owning_tenant
     215          378 :             .timelines_creating
     216          378 :             .lock()
     217          378 :             .unwrap()
     218          378 :             .remove(&self.timeline_id);
     219          378 :     }
     220              : }
        

Generated by: LCOV version 2.1-beta