LCOV - differential code coverage report
Current view: top level - pageserver/src/tenant/timeline - uninit.rs (source / functions) Coverage Total Hit UBC CBC
Current: cd44433dd675caa99df17a61b18949c8387e2242.info Lines: 86.1 % 173 149 24 149
Current Date: 2024-01-09 02:06:09 Functions: 51.6 % 31 16 15 16
Baseline: 66c52a629a0f4a503e193045e0df4c77139e344b.info
Baseline Date: 2024-01-08 15:34:46

           TLA  Line data    Source code
       1                 : use std::{collections::hash_map::Entry, fs, sync::Arc};
       2                 : 
       3                 : use anyhow::Context;
       4                 : use camino::Utf8PathBuf;
       5                 : use tracing::{error, info, info_span, warn};
       6                 : use utils::{crashsafe, fs_ext, id::TimelineId, lsn::Lsn};
       7                 : 
       8                 : use crate::{context::RequestContext, import_datadir, tenant::Tenant};
       9                 : 
      10                 : use super::Timeline;
      11                 : 
      12                 : /// A timeline with some of its files on disk, being initialized.
      13                 : /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
      14                 : /// its local files are removed. In the worst case of a crash, an uninit mark file is left behind, which causes the directory
      15                 : /// to be removed on next restart.
      16                 : ///
      17                 : /// The caller is responsible for proper timeline data filling before the final init.
      18                 : #[must_use]
      19                 : pub struct UninitializedTimeline<'t> {
      20                 :     pub(crate) owning_tenant: &'t Tenant,
      21                 :     timeline_id: TimelineId,
      22                 :     raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
      23                 : }
      24                 : 
      25                 : impl<'t> UninitializedTimeline<'t> {
      26 CBC         920 :     pub(crate) fn new(
      27             920 :         owning_tenant: &'t Tenant,
      28             920 :         timeline_id: TimelineId,
      29             920 :         raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
      30             920 :     ) -> Self {
      31             920 :         Self {
      32             920 :             owning_tenant,
      33             920 :             timeline_id,
      34             920 :             raw_timeline,
      35             920 :         }
      36             920 :     }
      37                 : 
      38                 :     /// Finish timeline creation: insert it into the Tenant's timelines map and remove the
      39                 :     /// uninit mark file.
      40                 :     ///
      41                 :     /// This function launches the flush loop if not already done.
      42                 :     ///
      43                 :     /// The caller is responsible for activating the timeline (function `.activate()`).
      44             913 :     pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
      45             913 :         let timeline_id = self.timeline_id;
      46             913 :         let tenant_shard_id = self.owning_tenant.tenant_shard_id;
      47             913 : 
      48             913 :         if self.raw_timeline.is_none() {
      49 UBC           0 :             return Err(anyhow::anyhow!(
      50               0 :                 "No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
      51               0 :             ));
      52 CBC         913 :         }
      53             913 : 
      54             913 :         // Check that the caller initialized disk_consistent_lsn
      55             913 :         let new_disk_consistent_lsn = self
      56             913 :             .raw_timeline
      57             913 :             .as_ref()
      58             913 :             .expect("checked above")
      59             913 :             .0
      60             913 :             .get_disk_consistent_lsn();
      61             913 : 
      62             913 :         anyhow::ensure!(
      63             913 :             new_disk_consistent_lsn.is_valid(),
      64               1 :             "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
      65                 :         );
      66                 : 
      67             912 :         let mut timelines = self.owning_tenant.timelines.lock().unwrap();
      68             912 :         match timelines.entry(timeline_id) {
      69 UBC           0 :             Entry::Occupied(_) => anyhow::bail!(
      70               0 :                 "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
      71               0 :             ),
      72 CBC         912 :             Entry::Vacant(v) => {
      73             912 :                 // after taking here should be no fallible operations, because the drop guard will not
      74             912 :                 // cleanup after and would block for example the tenant deletion
      75             912 :                 let (new_timeline, uninit_mark) =
      76             912 :                     self.raw_timeline.take().expect("already checked");
      77             912 : 
      78             912 :                 // this is the mutual exclusion between different retries to create the timeline;
      79             912 :                 // this should be an assertion.
      80             912 :                 uninit_mark.remove_uninit_mark().with_context(|| {
      81 UBC           0 :                     format!(
      82               0 :                         "Failed to remove uninit mark file for timeline {tenant_shard_id}/{timeline_id}"
      83               0 :                     )
      84 CBC         912 :                 })?;
      85             912 :                 v.insert(Arc::clone(&new_timeline));
      86             912 : 
      87             912 :                 new_timeline.maybe_spawn_flush_loop();
      88             912 : 
      89             912 :                 Ok(new_timeline)
      90                 :             }
      91                 :         }
      92             913 :     }
      93                 : 
      94                 :     /// Prepares timeline data by loading it from the basebackup archive.
      95              11 :     pub(crate) async fn import_basebackup_from_tar(
      96              11 :         self,
      97              11 :         copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
      98              11 :         base_lsn: Lsn,
      99              11 :         broker_client: storage_broker::BrokerClientChannel,
     100              11 :         ctx: &RequestContext,
     101              11 :     ) -> anyhow::Result<Arc<Timeline>> {
     102              11 :         let raw_timeline = self.raw_timeline()?;
     103                 : 
     104              11 :         import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
     105           10168 :             .await
     106              11 :             .context("Failed to import basebackup")?;
     107                 : 
     108                 :         // Flush the new layer files to disk, before we make the timeline as available to
     109                 :         // the outside world.
     110                 :         //
     111                 :         // Flush loop needs to be spawned in order to be able to flush.
     112               9 :         raw_timeline.maybe_spawn_flush_loop();
     113               9 : 
     114               9 :         fail::fail_point!("before-checkpoint-new-timeline", |_| {
     115 UBC           0 :             anyhow::bail!("failpoint before-checkpoint-new-timeline");
     116 CBC           9 :         });
     117                 : 
     118               9 :         raw_timeline
     119               9 :             .freeze_and_flush()
     120               9 :             .await
     121               9 :             .context("Failed to flush after basebackup import")?;
     122                 : 
     123                 :         // All the data has been imported. Insert the Timeline into the tenant's timelines
     124                 :         // map and remove the uninit mark file.
     125               9 :         let tl = self.finish_creation()?;
     126               9 :         tl.activate(broker_client, None, ctx);
     127               9 :         Ok(tl)
     128              11 :     }
     129                 : 
     130             571 :     pub(crate) fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
     131             571 :         Ok(&self
     132             571 :             .raw_timeline
     133             571 :             .as_ref()
     134             571 :             .with_context(|| {
     135 UBC           0 :                 format!(
     136               0 :                     "No raw timeline {}/{} found",
     137               0 :                     self.owning_tenant.tenant_shard_id, self.timeline_id
     138               0 :                 )
     139 CBC         571 :             })?
     140                 :             .0)
     141             571 :     }
     142                 : }
     143                 : 
     144                 : impl Drop for UninitializedTimeline<'_> {
     145             918 :     fn drop(&mut self) {
     146             918 :         if let Some((_, uninit_mark)) = self.raw_timeline.take() {
     147               6 :             let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
     148               6 :             error!("Timeline got dropped without initializing, cleaning its files");
     149               6 :             cleanup_timeline_directory(uninit_mark);
     150             912 :         }
     151             918 :     }
     152                 : }
     153                 : 
     154               7 : pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) {
     155               7 :     let timeline_path = &uninit_mark.timeline_path;
     156               7 :     match fs_ext::ignore_absent_files(|| fs::remove_dir_all(timeline_path)) {
     157                 :         Ok(()) => {
     158               7 :             info!("Timeline dir {timeline_path:?} removed successfully, removing the uninit mark")
     159                 :         }
     160 UBC           0 :         Err(e) => {
     161               0 :             error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
     162                 :         }
     163                 :     }
     164 CBC           7 :     drop(uninit_mark); // mark handles its deletion on drop, gets retained if timeline dir exists
     165               7 : }
     166                 : 
     167                 : /// An uninit mark file, created along the timeline dir to ensure the timeline either gets fully initialized and loaded into pageserver's memory,
     168                 : /// or gets removed eventually.
     169                 : ///
     170                 : /// XXX: it's important to create it near the timeline dir, not inside it to ensure timeline dir gets removed first.
     171                 : #[must_use]
     172                 : pub(crate) struct TimelineUninitMark<'t> {
     173                 :     owning_tenant: &'t Tenant,
     174                 :     timeline_id: TimelineId,
     175                 :     uninit_mark_deleted: bool,
     176                 :     uninit_mark_path: Utf8PathBuf,
     177                 :     pub(crate) timeline_path: Utf8PathBuf,
     178                 : }
     179                 : 
     180                 : /// Errors when acquiring exclusive access to a timeline ID for creation
     181               1 : #[derive(thiserror::Error, Debug)]
     182                 : pub(crate) enum TimelineExclusionError {
     183                 :     #[error("Already exists")]
     184                 :     AlreadyExists(Arc<Timeline>),
     185                 :     #[error("Already creating")]
     186                 :     AlreadyCreating,
     187                 : 
     188                 :     // e.g. I/O errors, or some failure deep in postgres initdb
     189                 :     #[error(transparent)]
     190                 :     Other(#[from] anyhow::Error),
     191                 : }
     192                 : 
     193                 : impl<'t> TimelineUninitMark<'t> {
     194             939 :     pub(crate) fn new(
     195             939 :         owning_tenant: &'t Tenant,
     196             939 :         timeline_id: TimelineId,
     197             939 :         uninit_mark_path: Utf8PathBuf,
     198             939 :         timeline_path: Utf8PathBuf,
     199             939 :     ) -> Result<Self, TimelineExclusionError> {
     200             939 :         // Lock order: this is the only place we take both locks.  During drop() we only
     201             939 :         // lock creating_timelines
     202             939 :         let timelines = owning_tenant.timelines.lock().unwrap();
     203             939 :         let mut creating_timelines: std::sync::MutexGuard<
     204             939 :             '_,
     205             939 :             std::collections::HashSet<TimelineId>,
     206             939 :         > = owning_tenant.timelines_creating.lock().unwrap();
     207                 : 
     208             939 :         if let Some(existing) = timelines.get(&timeline_id) {
     209               1 :             Err(TimelineExclusionError::AlreadyExists(existing.clone()))
     210             938 :         } else if creating_timelines.contains(&timeline_id) {
     211 UBC           0 :             Err(TimelineExclusionError::AlreadyCreating)
     212                 :         } else {
     213 CBC         938 :             creating_timelines.insert(timeline_id);
     214             938 :             Ok(Self {
     215             938 :                 owning_tenant,
     216             938 :                 timeline_id,
     217             938 :                 uninit_mark_deleted: false,
     218             938 :                 uninit_mark_path,
     219             938 :                 timeline_path,
     220             938 :             })
     221                 :         }
     222             939 :     }
     223                 : 
     224             912 :     fn remove_uninit_mark(mut self) -> anyhow::Result<()> {
     225             912 :         if !self.uninit_mark_deleted {
     226             912 :             self.delete_mark_file_if_present()?;
     227 UBC           0 :         }
     228                 : 
     229 CBC         912 :         Ok(())
     230             912 :     }
     231                 : 
     232             935 :     fn delete_mark_file_if_present(&mut self) -> anyhow::Result<()> {
     233             935 :         let uninit_mark_file = &self.uninit_mark_path;
     234             935 :         let uninit_mark_parent = uninit_mark_file
     235             935 :             .parent()
     236             935 :             .with_context(|| format!("Uninit mark file {uninit_mark_file:?} has no parent"))?;
     237             935 :         fs_ext::ignore_absent_files(|| fs::remove_file(uninit_mark_file)).with_context(|| {
     238 UBC           0 :             format!("Failed to remove uninit mark file at path {uninit_mark_file:?}")
     239 CBC         935 :         })?;
     240             935 :         crashsafe::fsync(uninit_mark_parent).context("Failed to fsync uninit mark parent")?;
     241             935 :         self.uninit_mark_deleted = true;
     242             935 : 
     243             935 :         Ok(())
     244             935 :     }
     245                 : }
     246                 : 
     247                 : impl Drop for TimelineUninitMark<'_> {
     248             935 :     fn drop(&mut self) {
     249             935 :         if !self.uninit_mark_deleted {
     250              23 :             if self.timeline_path.exists() {
     251 UBC           0 :                 error!(
     252               0 :                     "Uninit mark {} is not removed, timeline {} stays uninitialized",
     253               0 :                     self.uninit_mark_path, self.timeline_path
     254               0 :                 )
     255                 :             } else {
     256                 :                 // unblock later timeline creation attempts
     257 CBC          23 :                 warn!(
     258              23 :                     "Removing intermediate uninit mark file {}",
     259              23 :                     self.uninit_mark_path
     260              23 :                 );
     261              23 :                 if let Err(e) = self.delete_mark_file_if_present() {
     262 UBC           0 :                     error!("Failed to remove the uninit mark file: {e}")
     263 CBC          23 :                 }
     264                 :             }
     265             912 :         }
     266                 : 
     267             935 :         self.owning_tenant
     268             935 :             .timelines_creating
     269             935 :             .lock()
     270             935 :             .unwrap()
     271             935 :             .remove(&self.timeline_id);
     272             935 :     }
     273                 : }
        

Generated by: LCOV version 2.1-beta