LCOV - code coverage report
Current view: top level - pageserver/src/tenant/timeline - uninit.rs (source / functions) Coverage Total Hit
Test: 32f4a56327bc9da697706839ed4836b2a00a408f.info Lines: 86.7 % 173 150
Test Date: 2024-02-07 07:37:29 Functions: 51.6 % 31 16

            Line data    Source code
       1              : use std::{collections::hash_map::Entry, fs, sync::Arc};
       2              : 
       3              : use anyhow::Context;
       4              : use camino::Utf8PathBuf;
       5              : use tracing::{error, info, info_span, warn};
       6              : use utils::{crashsafe, fs_ext, id::TimelineId, lsn::Lsn};
       7              : 
       8              : use crate::{context::RequestContext, import_datadir, tenant::Tenant};
       9              : 
      10              : use super::Timeline;
      11              : 
      12              : /// A timeline with some of its files on disk, being initialized.
      13              : /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or
      14              : /// its local files are removed. In the worst case of a crash, an uninit mark file is left behind, which causes the directory
      15              : /// to be removed on next restart.
      16              : ///
      17              : /// The caller is responsible for proper timeline data filling before the final init.
      18              : #[must_use]
      19              : pub struct UninitializedTimeline<'t> {
      20              :     pub(crate) owning_tenant: &'t Tenant,
      21              :     timeline_id: TimelineId,
      22              :     raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
      23              : }
      24              : 
      25              : impl<'t> UninitializedTimeline<'t> {
      26         1143 :     pub(crate) fn new(
      27         1143 :         owning_tenant: &'t Tenant,
      28         1143 :         timeline_id: TimelineId,
      29         1143 :         raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
      30         1143 :     ) -> Self {
      31         1143 :         Self {
      32         1143 :             owning_tenant,
      33         1143 :             timeline_id,
      34         1143 :             raw_timeline,
      35         1143 :         }
      36         1143 :     }
      37              : 
      38              :     /// Finish timeline creation: insert it into the Tenant's timelines map and remove the
      39              :     /// uninit mark file.
      40              :     ///
      41              :     /// This function launches the flush loop if not already done.
      42              :     ///
      43              :     /// The caller is responsible for activating the timeline (function `.activate()`).
      44         1134 :     pub(crate) fn finish_creation(mut self) -> anyhow::Result<Arc<Timeline>> {
      45         1134 :         let timeline_id = self.timeline_id;
      46         1134 :         let tenant_shard_id = self.owning_tenant.tenant_shard_id;
      47         1134 : 
      48         1134 :         if self.raw_timeline.is_none() {
      49            0 :             return Err(anyhow::anyhow!(
      50            0 :                 "No timeline for initialization found for {tenant_shard_id}/{timeline_id}"
      51            0 :             ));
      52         1134 :         }
      53         1134 : 
      54         1134 :         // Check that the caller initialized disk_consistent_lsn
      55         1134 :         let new_disk_consistent_lsn = self
      56         1134 :             .raw_timeline
      57         1134 :             .as_ref()
      58         1134 :             .expect("checked above")
      59         1134 :             .0
      60         1134 :             .get_disk_consistent_lsn();
      61         1134 : 
      62         1134 :         anyhow::ensure!(
      63         1134 :             new_disk_consistent_lsn.is_valid(),
      64            1 :             "new timeline {tenant_shard_id}/{timeline_id} has invalid disk_consistent_lsn"
      65              :         );
      66              : 
      67         1133 :         let mut timelines = self.owning_tenant.timelines.lock().unwrap();
      68         1133 :         match timelines.entry(timeline_id) {
      69            0 :             Entry::Occupied(_) => anyhow::bail!(
      70            0 :                 "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map"
      71            0 :             ),
      72         1133 :             Entry::Vacant(v) => {
      73         1133 :                 // after taking here should be no fallible operations, because the drop guard will not
      74         1133 :                 // cleanup after and would block for example the tenant deletion
      75         1133 :                 let (new_timeline, uninit_mark) =
      76         1133 :                     self.raw_timeline.take().expect("already checked");
      77         1133 : 
      78         1133 :                 // this is the mutual exclusion between different retries to create the timeline;
      79         1133 :                 // this should be an assertion.
      80         1133 :                 uninit_mark.remove_uninit_mark().with_context(|| {
      81            0 :                     format!(
      82            0 :                         "Failed to remove uninit mark file for timeline {tenant_shard_id}/{timeline_id}"
      83            0 :                     )
      84         1133 :                 })?;
      85         1133 :                 v.insert(Arc::clone(&new_timeline));
      86         1133 : 
      87         1133 :                 new_timeline.maybe_spawn_flush_loop();
      88         1133 : 
      89         1133 :                 Ok(new_timeline)
      90              :             }
      91              :         }
      92         1134 :     }
      93              : 
      94              :     /// Prepares timeline data by loading it from the basebackup archive.
      95           12 :     pub(crate) async fn import_basebackup_from_tar(
      96           12 :         self,
      97           12 :         copyin_read: &mut (impl tokio::io::AsyncRead + Send + Sync + Unpin),
      98           12 :         base_lsn: Lsn,
      99           12 :         broker_client: storage_broker::BrokerClientChannel,
     100           12 :         ctx: &RequestContext,
     101           12 :     ) -> anyhow::Result<Arc<Timeline>> {
     102           12 :         let raw_timeline = self.raw_timeline()?;
     103              : 
     104           12 :         import_datadir::import_basebackup_from_tar(raw_timeline, copyin_read, base_lsn, ctx)
     105         7696 :             .await
     106           12 :             .context("Failed to import basebackup")?;
     107              : 
     108              :         // Flush the new layer files to disk, before we make the timeline as available to
     109              :         // the outside world.
     110              :         //
     111              :         // Flush loop needs to be spawned in order to be able to flush.
     112           10 :         raw_timeline.maybe_spawn_flush_loop();
     113           10 : 
     114           10 :         fail::fail_point!("before-checkpoint-new-timeline", |_| {
     115            0 :             anyhow::bail!("failpoint before-checkpoint-new-timeline");
     116           10 :         });
     117              : 
     118           10 :         raw_timeline
     119           10 :             .freeze_and_flush()
     120           11 :             .await
     121           10 :             .context("Failed to flush after basebackup import")?;
     122              : 
     123              :         // All the data has been imported. Insert the Timeline into the tenant's timelines
     124              :         // map and remove the uninit mark file.
     125           10 :         let tl = self.finish_creation()?;
     126           10 :         tl.activate(broker_client, None, ctx);
     127           10 :         Ok(tl)
     128           12 :     }
     129              : 
     130          677 :     pub(crate) fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
     131          677 :         Ok(&self
     132          677 :             .raw_timeline
     133          677 :             .as_ref()
     134          677 :             .with_context(|| {
     135            0 :                 format!(
     136            0 :                     "No raw timeline {}/{} found",
     137            0 :                     self.owning_tenant.tenant_shard_id, self.timeline_id
     138            0 :                 )
     139          677 :             })?
     140              :             .0)
     141          677 :     }
     142              : }
     143              : 
     144              : impl Drop for UninitializedTimeline<'_> {
     145         1140 :     fn drop(&mut self) {
     146         1140 :         if let Some((_, uninit_mark)) = self.raw_timeline.take() {
     147            7 :             let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_shard_id.tenant_id, shard_id = %self.owning_tenant.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id).entered();
     148            7 :             error!("Timeline got dropped without initializing, cleaning its files");
     149            7 :             cleanup_timeline_directory(uninit_mark);
     150         1133 :         }
     151         1140 :     }
     152              : }
     153              : 
     154            8 : pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) {
     155            8 :     let timeline_path = &uninit_mark.timeline_path;
     156            8 :     match fs_ext::ignore_absent_files(|| fs::remove_dir_all(timeline_path)) {
     157              :         Ok(()) => {
     158            8 :             info!("Timeline dir {timeline_path:?} removed successfully, removing the uninit mark")
     159              :         }
     160            0 :         Err(e) => {
     161            0 :             error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}")
     162              :         }
     163              :     }
     164            8 :     drop(uninit_mark); // mark handles its deletion on drop, gets retained if timeline dir exists
     165            8 : }
     166              : 
     167              : /// An uninit mark file, created along the timeline dir to ensure the timeline either gets fully initialized and loaded into pageserver's memory,
     168              : /// or gets removed eventually.
     169              : ///
     170              : /// XXX: it's important to create it near the timeline dir, not inside it to ensure timeline dir gets removed first.
     171              : #[must_use]
     172              : pub(crate) struct TimelineUninitMark<'t> {
     173              :     owning_tenant: &'t Tenant,
     174              :     timeline_id: TimelineId,
     175              :     uninit_mark_deleted: bool,
     176              :     uninit_mark_path: Utf8PathBuf,
     177              :     pub(crate) timeline_path: Utf8PathBuf,
     178              : }
     179              : 
     180              : /// Errors when acquiring exclusive access to a timeline ID for creation
     181            2 : #[derive(thiserror::Error, Debug)]
     182              : pub(crate) enum TimelineExclusionError {
     183              :     #[error("Already exists")]
     184              :     AlreadyExists(Arc<Timeline>),
     185              :     #[error("Already creating")]
     186              :     AlreadyCreating,
     187              : 
     188              :     // e.g. I/O errors, or some failure deep in postgres initdb
     189              :     #[error(transparent)]
     190              :     Other(#[from] anyhow::Error),
     191              : }
     192              : 
     193              : impl<'t> TimelineUninitMark<'t> {
     194         1195 :     pub(crate) fn new(
     195         1195 :         owning_tenant: &'t Tenant,
     196         1195 :         timeline_id: TimelineId,
     197         1195 :         uninit_mark_path: Utf8PathBuf,
     198         1195 :         timeline_path: Utf8PathBuf,
     199         1195 :     ) -> Result<Self, TimelineExclusionError> {
     200         1195 :         // Lock order: this is the only place we take both locks.  During drop() we only
     201         1195 :         // lock creating_timelines
     202         1195 :         let timelines = owning_tenant.timelines.lock().unwrap();
     203         1195 :         let mut creating_timelines: std::sync::MutexGuard<
     204         1195 :             '_,
     205         1195 :             std::collections::HashSet<TimelineId>,
     206         1195 :         > = owning_tenant.timelines_creating.lock().unwrap();
     207              : 
     208         1195 :         if let Some(existing) = timelines.get(&timeline_id) {
     209           29 :             Err(TimelineExclusionError::AlreadyExists(existing.clone()))
     210         1166 :         } else if creating_timelines.contains(&timeline_id) {
     211            1 :             Err(TimelineExclusionError::AlreadyCreating)
     212              :         } else {
     213         1165 :             creating_timelines.insert(timeline_id);
     214         1165 :             Ok(Self {
     215         1165 :                 owning_tenant,
     216         1165 :                 timeline_id,
     217         1165 :                 uninit_mark_deleted: false,
     218         1165 :                 uninit_mark_path,
     219         1165 :                 timeline_path,
     220         1165 :             })
     221              :         }
     222         1195 :     }
     223              : 
     224         1133 :     fn remove_uninit_mark(mut self) -> anyhow::Result<()> {
     225         1133 :         if !self.uninit_mark_deleted {
     226         1133 :             self.delete_mark_file_if_present()?;
     227            0 :         }
     228              : 
     229         1133 :         Ok(())
     230         1133 :     }
     231              : 
     232         1161 :     fn delete_mark_file_if_present(&mut self) -> anyhow::Result<()> {
     233         1161 :         let uninit_mark_file = &self.uninit_mark_path;
     234         1161 :         let uninit_mark_parent = uninit_mark_file
     235         1161 :             .parent()
     236         1161 :             .with_context(|| format!("Uninit mark file {uninit_mark_file:?} has no parent"))?;
     237         1161 :         fs_ext::ignore_absent_files(|| fs::remove_file(uninit_mark_file)).with_context(|| {
     238            0 :             format!("Failed to remove uninit mark file at path {uninit_mark_file:?}")
     239         1161 :         })?;
     240         1161 :         crashsafe::fsync(uninit_mark_parent).context("Failed to fsync uninit mark parent")?;
     241         1161 :         self.uninit_mark_deleted = true;
     242         1161 : 
     243         1161 :         Ok(())
     244         1161 :     }
     245              : }
     246              : 
     247              : impl Drop for TimelineUninitMark<'_> {
     248         1161 :     fn drop(&mut self) {
     249         1161 :         if !self.uninit_mark_deleted {
     250           28 :             if self.timeline_path.exists() {
     251            0 :                 error!(
     252            0 :                     "Uninit mark {} is not removed, timeline {} stays uninitialized",
     253            0 :                     self.uninit_mark_path, self.timeline_path
     254            0 :                 )
     255              :             } else {
     256              :                 // unblock later timeline creation attempts
     257           28 :                 warn!(
     258           28 :                     "Removing intermediate uninit mark file {}",
     259           28 :                     self.uninit_mark_path
     260           28 :                 );
     261           28 :                 if let Err(e) = self.delete_mark_file_if_present() {
     262            0 :                     error!("Failed to remove the uninit mark file: {e}")
     263           28 :                 }
     264              :             }
     265         1133 :         }
     266              : 
     267         1161 :         self.owning_tenant
     268         1161 :             .timelines_creating
     269         1161 :             .lock()
     270         1161 :             .unwrap()
     271         1161 :             .remove(&self.timeline_id);
     272         1161 :     }
     273              : }
        

Generated by: LCOV version 2.1-beta