LCOV - code coverage report
Current view: top level - libs/utils/src - pid_file.rs (source / functions) Coverage Total Hit
Test: 8ac049b474321fdc72ddcb56d7165153a1a900e8.info Lines: 86.1 % 36 31
Test Date: 2023-09-06 10:18:01 Functions: 60.0 % 5 3

            Line data    Source code
       1              : //! Abstraction to create & read pidfiles.
       2              : //!
       3              : //! A pidfile is a file in the filesystem that stores a process's PID.
       4              : //! Its purpose is to implement a singleton behavior where only
       5              : //! one process of some "kind" is supposed to be running at a given time.
       6              : //! The "kind" is identified by the pidfile.
       7              : //!
       8              : //! During process startup, the process that is supposed to be a singleton
       9              : //! must [claim][`claim_for_current_process`] the pidfile first.
      10              : //! If that is unsuccessful, the process must not act as the singleton, i.e.,
      11              : //! it must not access any of the resources that only the singleton may access.
      12              : //!
      13              : //! A common need is to signal a running singleton process, e.g., to make
      14              : //! it shut down and exit.
      15              : //! For that, we have to [`read`] the pidfile. The result of the `read` operation
      16              : //! tells us if there is any singleton process, and if so, what PID it has.
      17              : //! We can then proceed to signal it, although some caveats still apply.
      18              : //! Read the function-level documentation of [`read`] for that.
      19              : //!
      20              : //! ## Never Remove Pidfiles
      21              : //!
      22              : //! It would be natural to assume that the process who claimed the pidfile
      23              : //! should remove it upon exit to avoid leaving a stale pidfile in place.
      24              : //! However, we already have a reliable way to detect staleness of the pidfile,
      25              : //! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
      26              : //!
      27              : //! And further, removing pidfiles would introduce a **catastrophic race condition**
      28              : //! where two processes are running that are supposed to be singletons.
      29              : //! Suppose we were to remove our pidfile during process shutdown.
      30              : //! Here is how the race plays out:
      31              : //! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
      32              : //! - Process `A` starts to shut down.
      33              : //! - Process `B` is just starting up
      34              : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
      35              : //!     - It blocks on `flock`
      36              : //! - Process `A` removes the pidfile as the last step of its shutdown procedure
      37              : //!     - `unlink("myservice.pid")
      38              : //! - Process `A` exits
      39              : //!     - This releases its `flock` and unblocks `B`
      40              : //! - Process `B` still has the file descriptor for `myservice.pid` open
      41              : //! - Process `B` writes its PID into `myservice.pid`.
      42              : //! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
      43              : //!   in the directory.
      44              : //! - Process `C` starts
      45              : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
      46              : //!     - It `flock`s the file, which, since it's a different file, does not block
      47              : //!     - It writes its PID into the file
      48              : //!
      49              : //! At this point, `B` and `C` are running, which is hazardous.
      50              : //! Morale of the story: don't unlink pidfiles, ever.
      51              : 
      52              : use std::{ops::Deref, path::Path};
      53              : 
      54              : use anyhow::Context;
      55              : use nix::unistd::Pid;
      56              : 
      57              : use crate::lock_file::{self, LockFileRead};
      58              : 
      59              : /// Keeps a claim on a pidfile alive until it is dropped.
      60              : /// Returned by [`claim_for_current_process`].
      61              : #[must_use]
      62              : pub struct PidFileGuard(lock_file::LockFileGuard);
      63              : 
      64              : impl Deref for PidFileGuard {
      65              :     type Target = lock_file::LockFileGuard;
      66              : 
      67            0 :     fn deref(&self) -> &Self::Target {
      68            0 :         &self.0
      69            0 :     }
      70              : }
      71              : 
      72              : /// Try to claim `path` as a pidfile for the current process.
      73              : ///
      74              : /// If another process has already claimed the pidfile, and it is still running,
      75              : /// this function returns ane error.
      76              : /// Otherwise, the function `flock`s the file and updates its contents to the
      77              : /// current process's PID.
      78              : /// If the update fails, the flock is released and an error returned.
      79              : /// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
      80              : ///
      81              : /// ### Maintaining A Claim
      82              : ///
      83              : /// It is the caller's responsibility to maintain the claim.
      84              : /// The claim ends as soon as the returned guard object is dropped.
      85              : /// To maintain the claim for the remaining lifetime of the current process,
      86              : /// use [`std::mem::forget`] or similar.
      87         1092 : pub fn claim_for_current_process(path: &Path) -> anyhow::Result<PidFileGuard> {
      88         1092 :     let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
      89              :     // if any of the next steps fail, we drop the file descriptor and thereby release the lock
      90         1092 :     let guard = unwritten_lock_file
      91         1092 :         .write_content(Pid::this().to_string())
      92         1092 :         .context("write pid to lock file")?;
      93         1092 :     Ok(PidFileGuard(guard))
      94         1092 : }
      95              : 
      96              : /// Returned by [`read`].
      97              : pub enum PidFileRead {
      98              :     /// No file exists at the given path.
      99              :     NotExist,
     100              :     /// The given pidfile is currently not claimed by any process.
     101              :     /// To determine this, the [`read`] operation acquired
     102              :     /// an exclusive flock on the file. The lock is still held and responsibility
     103              :     /// to release it is returned through the guard object.
     104              :     /// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
     105              :     /// will fail.
     106              :     ///
     107              :     /// ### Caveats
     108              :     ///
     109              :     /// Do not unlink the pidfile from the filesystem. See module-comment for why.
     110              :     NotHeldByAnyProcess(PidFileGuard),
     111              :     /// The given pidfile is still claimed by another process whose PID is given
     112              :     /// as part of this variant.
     113              :     ///
     114              :     /// ### Caveats
     115              :     ///
     116              :     /// 1. The other process might exit at any time, turning the given PID stale.
     117              :     /// 2. There is a small window in which `claim_for_current_process` has already
     118              :     ///    locked the file but not yet updates its contents. [`read`] will return
     119              :     ///    this variant here, but with the old file contents, i.e., a stale PID.
     120              :     ///
     121              :     /// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
     122              :     /// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
     123              :     /// system call on it, bears the risk of killing an unrelated process.
     124              :     /// This is an inherent limitation of using pidfiles.
     125              :     /// The only race-free solution is to have a supervisor-process with a lifetime
     126              :     /// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
     127              :     LockedByOtherProcess(Pid),
     128              : }
     129              : 
     130              : /// Try to read the file at the given path as a pidfile that was previously created
     131              : /// through [`claim_for_current_process`].
     132              : ///
     133              : /// On success, this function returns a [`PidFileRead`].
     134              : /// Check its docs for a description of the meaning of its different variants.
     135         2224 : pub fn read(pidfile: &Path) -> anyhow::Result<PidFileRead> {
     136         2224 :     let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
     137         2224 :     let ret = match res {
     138            0 :         LockFileRead::NotExist => PidFileRead::NotExist,
     139           38 :         LockFileRead::NotHeldByAnyProcess(guard, _) => {
     140           38 :             PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
     141              :         }
     142              :         LockFileRead::LockedByOtherProcess {
     143         2186 :             not_locked_file: _not_locked_file,
     144         2186 :             content,
     145         2186 :         } => {
     146         2186 :             // XXX the read races with the write in claim_pid_file_for_pid().
     147         2186 :             // But pids are smaller than a page, so the kernel page cache will lock for us.
     148         2186 :             // The only problem is that we might get the old contents here.
     149         2186 :             // Can only fix that by implementing some scheme that downgrades the
     150         2186 :             // exclusive lock to shared lock in claim_pid_file_for_pid().
     151         2186 :             PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
     152              :         }
     153              :     };
     154         2224 :     Ok(ret)
     155         2224 : }
     156              : 
     157         2186 : fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
     158         2186 :     let pid: i32 = content
     159         2186 :         .parse()
     160         2186 :         .map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
     161         2186 :     if pid < 1 {
     162            0 :         anyhow::bail!("bad value in pidfile '{pid}'");
     163         2186 :     }
     164         2186 :     Ok(Pid::from_raw(pid))
     165         2186 : }
        

Generated by: LCOV version 2.1-beta