LCOV - code coverage report
Current view: top level - libs/utils/src - crashsafe.rs (source / functions) Coverage Total Hit
Test: 2e3a7638747e564a4f6d1af1cc0c3b3438fbb740.info Lines: 87.9 % 190 167
Test Date: 2024-11-20 01:36:58 Functions: 50.0 % 42 21

            Line data    Source code
       1              : use std::os::fd::AsRawFd;
       2              : use std::{
       3              :     borrow::Cow,
       4              :     fs::{self, File},
       5              :     io::{self, Write},
       6              : };
       7              : 
       8              : use camino::{Utf8Path, Utf8PathBuf};
       9              : 
      10              : /// Similar to [`std::fs::create_dir`], except we fsync the
      11              : /// created directory and its parent.
      12          415 : pub fn create_dir(path: impl AsRef<Utf8Path>) -> io::Result<()> {
      13          415 :     let path = path.as_ref();
      14          415 : 
      15          415 :     fs::create_dir(path)?;
      16          413 :     fsync_file_and_parent(path)?;
      17          413 :     Ok(())
      18          415 : }
      19              : 
      20              : /// Similar to [`std::fs::create_dir_all`], except we fsync all
      21              : /// newly created directories and the pre-existing parent.
      22            5 : pub fn create_dir_all(path: impl AsRef<Utf8Path>) -> io::Result<()> {
      23            5 :     let mut path = path.as_ref();
      24            5 : 
      25            5 :     let mut dirs_to_create = Vec::new();
      26              : 
      27              :     // Figure out which directories we need to create.
      28              :     loop {
      29            8 :         match path.metadata() {
      30            4 :             Ok(metadata) if metadata.is_dir() => break,
      31              :             Ok(_) => {
      32            1 :                 return Err(io::Error::new(
      33            1 :                     io::ErrorKind::AlreadyExists,
      34            1 :                     format!("non-directory found in path: {path}"),
      35            1 :                 ));
      36              :             }
      37            4 :             Err(ref e) if e.kind() == io::ErrorKind::NotFound => {}
      38            1 :             Err(e) => return Err(e),
      39              :         }
      40              : 
      41            3 :         dirs_to_create.push(path);
      42            3 : 
      43            3 :         match path.parent() {
      44            3 :             Some(parent) => path = parent,
      45              :             None => {
      46            0 :                 return Err(io::Error::new(
      47            0 :                     io::ErrorKind::InvalidInput,
      48            0 :                     format!("can't find parent of path '{path}'"),
      49            0 :                 ));
      50              :             }
      51              :         }
      52              :     }
      53              : 
      54              :     // Create directories from parent to child.
      55            3 :     for &path in dirs_to_create.iter().rev() {
      56            3 :         fs::create_dir(path)?;
      57              :     }
      58              : 
      59              :     // Fsync the created directories from child to parent.
      60            3 :     for &path in dirs_to_create.iter() {
      61            3 :         fsync(path)?;
      62              :     }
      63              : 
      64              :     // If we created any new directories, fsync the parent.
      65            3 :     if !dirs_to_create.is_empty() {
      66            2 :         fsync(path)?;
      67            1 :     }
      68              : 
      69            3 :     Ok(())
      70            5 : }
      71              : 
      72              : /// Adds a suffix to the file(directory) name, either appending the suffix to the end of its extension,
      73              : /// or if there's no extension, creates one and puts a suffix there.
      74         2918 : pub fn path_with_suffix_extension(
      75         2918 :     original_path: impl AsRef<Utf8Path>,
      76         2918 :     suffix: &str,
      77         2918 : ) -> Utf8PathBuf {
      78         2918 :     let new_extension = match original_path.as_ref().extension() {
      79         1478 :         Some(extension) => Cow::Owned(format!("{extension}.{suffix}")),
      80         1440 :         None => Cow::Borrowed(suffix),
      81              :     };
      82         2918 :     original_path.as_ref().with_extension(new_extension)
      83         2918 : }
      84              : 
      85          413 : pub fn fsync_file_and_parent(file_path: &Utf8Path) -> io::Result<()> {
      86          413 :     let parent = file_path.parent().ok_or_else(|| {
      87            0 :         io::Error::new(
      88            0 :             io::ErrorKind::Other,
      89            0 :             format!("File {file_path:?} has no parent"),
      90            0 :         )
      91          413 :     })?;
      92              : 
      93          413 :     fsync(file_path)?;
      94          413 :     fsync(parent)?;
      95          413 :     Ok(())
      96          413 : }
      97              : 
      98          831 : pub fn fsync(path: &Utf8Path) -> io::Result<()> {
      99          831 :     File::open(path)
     100          831 :         .map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
     101          831 :         .and_then(|file| {
     102          831 :             file.sync_all().map_err(|e| {
     103            0 :                 io::Error::new(
     104            0 :                     e.kind(),
     105            0 :                     format!("Failed to sync file {path:?} data and metadata: {e}"),
     106            0 :                 )
     107          831 :             })
     108          831 :         })
     109          831 :         .map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
     110          831 : }
     111              : 
     112            2 : pub async fn fsync_async(path: impl AsRef<Utf8Path>) -> Result<(), std::io::Error> {
     113            2 :     tokio::fs::File::open(path.as_ref()).await?.sync_all().await
     114            2 : }
     115              : 
     116            8 : pub async fn fsync_async_opt(
     117            8 :     path: impl AsRef<Utf8Path>,
     118            8 :     do_fsync: bool,
     119            8 : ) -> Result<(), std::io::Error> {
     120            8 :     if do_fsync {
     121            0 :         fsync_async(path.as_ref()).await?;
     122            8 :     }
     123            8 :     Ok(())
     124            8 : }
     125              : 
     126              : /// Like postgres' durable_rename, renames a file and issues fsyncs to make it durable. After
     127              : /// returning, both the file and rename are guaranteed to be persisted. Both paths must be on the
     128              : /// same file system.
     129              : ///
     130              : /// Unlike postgres, it only fsyncs 1) the file to make contents durable, and 2) the directory to
     131              : /// make the rename durable. This sequence ensures the target file will never be incomplete.
     132              : ///
     133              : /// Postgres also:
     134              : ///
     135              : /// * Fsyncs the target file, if it exists, before the rename, to ensure either the new or existing
     136              : ///   file survives a crash. Current callers don't need this as it should already be fsynced if
     137              : ///   durability is needed.
     138              : ///
     139              : /// * Fsyncs the file after the rename. This can be required with certain OSes or file systems (e.g.
     140              : ///   NFS), but not on Linux with most common file systems like ext4 (which we currently use).
     141              : ///
     142              : /// An audit of 8 other databases found that none fsynced the file after a rename:
     143              : /// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837180535>
     144              : ///
     145              : /// eBPF probes confirmed that this is sufficient with ext4, XFS, and ZFS, but possibly not Btrfs:
     146              : /// <https://github.com/neondatabase/neon/pull/9686#discussion_r1837926218>
     147              : ///
     148              : /// virtual_file.rs has similar code, but it doesn't use vfs.
     149              : ///
     150              : /// Useful links: <https://lwn.net/Articles/457667/>
     151              : /// <https://www.postgresql.org/message-id/flat/56583BDD.9060302%402ndquadrant.com>
     152              : /// <https://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/>
     153            4 : pub async fn durable_rename(
     154            4 :     old_path: impl AsRef<Utf8Path>,
     155            4 :     new_path: impl AsRef<Utf8Path>,
     156            4 :     do_fsync: bool,
     157            4 : ) -> io::Result<()> {
     158            4 :     // first fsync the file
     159            4 :     fsync_async_opt(old_path.as_ref(), do_fsync).await?;
     160              : 
     161              :     // Time to do the real deal.
     162            4 :     tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;
     163              : 
     164              :     // Now fsync the parent
     165            4 :     let parent = match new_path.as_ref().parent() {
     166            4 :         Some(p) => p,
     167            0 :         None => Utf8Path::new("./"), // assume current dir if there is no parent
     168              :     };
     169            4 :     fsync_async_opt(parent, do_fsync).await?;
     170              : 
     171            4 :     Ok(())
     172            4 : }
     173              : 
     174              : /// Writes a file to the specified `final_path` in a crash safe fasion, using [`std::fs`].
     175              : ///
     176              : /// The file is first written to the specified `tmp_path`, and in a second
     177              : /// step, the `tmp_path` is renamed to the `final_path`. Intermediary fsync
     178              : /// and atomic rename guarantee that, if we crash at any point, there will never
     179              : /// be a partially written file at `final_path` (but maybe at `tmp_path`).
     180              : ///
     181              : /// Callers are responsible for serializing calls of this function for a given `final_path`.
     182              : /// If they don't, there may be an error due to conflicting `tmp_path`, or there will
     183              : /// be no error and the content of `final_path` will be the "winner" caller's `content`.
     184              : /// I.e., the atomticity guarantees still hold.
     185           28 : pub fn overwrite(
     186           28 :     final_path: &Utf8Path,
     187           28 :     tmp_path: &Utf8Path,
     188           28 :     content: &[u8],
     189           28 : ) -> std::io::Result<()> {
     190           28 :     let Some(final_path_parent) = final_path.parent() else {
     191            0 :         return Err(std::io::Error::from_raw_os_error(
     192            0 :             nix::errno::Errno::EINVAL as i32,
     193            0 :         ));
     194              :     };
     195           28 :     std::fs::remove_file(tmp_path).or_else(crate::fs_ext::ignore_not_found)?;
     196           28 :     let mut file = std::fs::OpenOptions::new()
     197           28 :         .write(true)
     198           28 :         // Use `create_new` so that, if we race with ourselves or something else,
     199           28 :         // we bail out instead of causing damage.
     200           28 :         .create_new(true)
     201           28 :         .open(tmp_path)?;
     202           28 :     file.write_all(content)?;
     203           28 :     file.sync_all()?;
     204           28 :     drop(file); // don't keep the fd open for longer than we have to
     205           28 : 
     206           28 :     std::fs::rename(tmp_path, final_path)?;
     207              : 
     208           28 :     let final_parent_dirfd = std::fs::OpenOptions::new()
     209           28 :         .read(true)
     210           28 :         .open(final_path_parent)?;
     211              : 
     212           28 :     final_parent_dirfd.sync_all()?;
     213           28 :     Ok(())
     214           28 : }
     215              : 
     216              : /// Syncs the filesystem for the given file descriptor.
     217              : #[cfg_attr(target_os = "macos", allow(unused_variables))]
     218            0 : pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
     219              :     // Linux guarantees durability for syncfs.
     220              :     // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
     221              :     #[cfg(target_os = "linux")]
     222            0 :     {
     223            0 :         use anyhow::Context;
     224            0 :         nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
     225              :     }
     226              :     #[cfg(target_os = "macos")]
     227              :     {
     228              :         // macOS is not a production platform for Neon, don't even bother.
     229              :     }
     230              :     #[cfg(not(any(target_os = "linux", target_os = "macos")))]
     231              :     {
     232              :         compile_error!("Unsupported OS");
     233              :     }
     234            0 :     Ok(())
     235            0 : }
     236              : 
     237              : #[cfg(test)]
     238              : mod tests {
     239              : 
     240              :     use super::*;
     241              : 
     242              :     #[test]
     243            1 :     fn test_create_dir_fsyncd() {
     244            1 :         let dir = camino_tempfile::tempdir().unwrap();
     245            1 : 
     246            1 :         let existing_dir_path = dir.path();
     247            1 :         let err = create_dir(existing_dir_path).unwrap_err();
     248            1 :         assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);
     249              : 
     250            1 :         let child_dir = existing_dir_path.join("child");
     251            1 :         create_dir(child_dir).unwrap();
     252            1 : 
     253            1 :         let nested_child_dir = existing_dir_path.join("child1").join("child2");
     254            1 :         let err = create_dir(nested_child_dir).unwrap_err();
     255            1 :         assert_eq!(err.kind(), io::ErrorKind::NotFound);
     256            1 :     }
     257              : 
     258              :     #[test]
     259            1 :     fn test_create_dir_all_fsyncd() {
     260            1 :         let dir = camino_tempfile::tempdir().unwrap();
     261            1 : 
     262            1 :         let existing_dir_path = dir.path();
     263            1 :         create_dir_all(existing_dir_path).unwrap();
     264            1 : 
     265            1 :         let child_dir = existing_dir_path.join("child");
     266            1 :         assert!(!child_dir.exists());
     267            1 :         create_dir_all(&child_dir).unwrap();
     268            1 :         assert!(child_dir.exists());
     269              : 
     270            1 :         let nested_child_dir = existing_dir_path.join("child1").join("child2");
     271            1 :         assert!(!nested_child_dir.exists());
     272            1 :         create_dir_all(&nested_child_dir).unwrap();
     273            1 :         assert!(nested_child_dir.exists());
     274              : 
     275            1 :         let file_path = existing_dir_path.join("file");
     276            1 :         std::fs::write(&file_path, b"").unwrap();
     277            1 : 
     278            1 :         let err = create_dir_all(&file_path).unwrap_err();
     279            1 :         assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);
     280              : 
     281            1 :         let invalid_dir_path = file_path.join("folder");
     282            1 :         create_dir_all(invalid_dir_path).unwrap_err();
     283            1 :     }
     284              : 
     285              :     #[test]
     286            1 :     fn test_path_with_suffix_extension() {
     287            1 :         let p = Utf8PathBuf::from("/foo/bar");
     288            1 :         assert_eq!(
     289            1 :             &path_with_suffix_extension(p, "temp").to_string(),
     290            1 :             "/foo/bar.temp"
     291            1 :         );
     292            1 :         let p = Utf8PathBuf::from("/foo/bar");
     293            1 :         assert_eq!(
     294            1 :             &path_with_suffix_extension(p, "temp.temp").to_string(),
     295            1 :             "/foo/bar.temp.temp"
     296            1 :         );
     297            1 :         let p = Utf8PathBuf::from("/foo/bar.baz");
     298            1 :         assert_eq!(
     299            1 :             &path_with_suffix_extension(p, "temp.temp").to_string(),
     300            1 :             "/foo/bar.baz.temp.temp"
     301            1 :         );
     302            1 :         let p = Utf8PathBuf::from("/foo/bar.baz");
     303            1 :         assert_eq!(
     304            1 :             &path_with_suffix_extension(p, ".temp").to_string(),
     305            1 :             "/foo/bar.baz..temp"
     306            1 :         );
     307            1 :         let p = Utf8PathBuf::from("/foo/bar/dir/");
     308            1 :         assert_eq!(
     309            1 :             &path_with_suffix_extension(p, ".temp").to_string(),
     310            1 :             "/foo/bar/dir..temp"
     311            1 :         );
     312            1 :     }
     313              : }
        

Generated by: LCOV version 2.1-beta