LCOV - code coverage report
Current view: top level - libs/utils/src - pid_file.rs (source / functions) Coverage Total Hit
Test: e5024a5c05016c30dec7897aca22d1040a340f63.info Lines: 0.0 % 36 0
Test Date: 2024-11-20 11:45:54 Functions: 0.0 % 5 0

            Line data    Source code
       1              : //! Abstraction to create & read pidfiles.
       2              : //!
       3              : //! A pidfile is a file in the filesystem that stores a process's PID.
       4              : //! Its purpose is to implement a singleton behavior where only
       5              : //! one process of some "kind" is supposed to be running at a given time.
       6              : //! The "kind" is identified by the pidfile.
       7              : //!
       8              : //! During process startup, the process that is supposed to be a singleton
       9              : //! must [claim][`claim_for_current_process`] the pidfile first.
      10              : //! If that is unsuccessful, the process must not act as the singleton, i.e.,
      11              : //! it must not access any of the resources that only the singleton may access.
      12              : //!
      13              : //! A common need is to signal a running singleton process, e.g., to make
      14              : //! it shut down and exit.
      15              : //! For that, we have to [`read`] the pidfile. The result of the `read` operation
      16              : //! tells us if there is any singleton process, and if so, what PID it has.
      17              : //! We can then proceed to signal it, although some caveats still apply.
      18              : //! Read the function-level documentation of [`read`] for that.
      19              : //!
      20              : //! ## Never Remove Pidfiles
      21              : //!
      22              : //! It would be natural to assume that the process who claimed the pidfile
      23              : //! should remove it upon exit to avoid leaving a stale pidfile in place.
      24              : //! However, we already have a reliable way to detect staleness of the pidfile,
      25              : //! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
      26              : //!
      27              : //! And further, removing pidfiles would introduce a **catastrophic race condition**
      28              : //! where two processes are running that are supposed to be singletons.
      29              : //! Suppose we were to remove our pidfile during process shutdown.
      30              : //! Here is how the race plays out:
      31              : //! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
      32              : //! - Process `A` starts to shut down.
      33              : //! - Process `B` is just starting up
      34              : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
      35              : //!     - It blocks on `flock`
      36              : //! - Process `A` removes the pidfile as the last step of its shutdown procedure
      37              : //!     - `unlink("myservice.pid")
      38              : //! - Process `A` exits
      39              : //!     - This releases its `flock` and unblocks `B`
      40              : //! - Process `B` still has the file descriptor for `myservice.pid` open
      41              : //! - Process `B` writes its PID into `myservice.pid`.
      42              : //! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
      43              : //!   in the directory.
      44              : //! - Process `C` starts
      45              : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
      46              : //!     - It `flock`s the file, which, since it's a different file, does not block
      47              : //!     - It writes its PID into the file
      48              : //!
      49              : //! At this point, `B` and `C` are running, which is hazardous.
      50              : //! Morale of the story: don't unlink pidfiles, ever.
      51              : 
      52              : use std::ops::Deref;
      53              : 
      54              : use anyhow::Context;
      55              : use camino::Utf8Path;
      56              : use nix::unistd::Pid;
      57              : 
      58              : use crate::lock_file::{self, LockFileRead};
      59              : 
      60              : /// Keeps a claim on a pidfile alive until it is dropped.
      61              : /// Returned by [`claim_for_current_process`].
      62              : #[must_use]
      63              : pub struct PidFileGuard(lock_file::LockFileGuard);
      64              : 
      65              : impl Deref for PidFileGuard {
      66              :     type Target = lock_file::LockFileGuard;
      67              : 
      68            0 :     fn deref(&self) -> &Self::Target {
      69            0 :         &self.0
      70            0 :     }
      71              : }
      72              : 
      73              : /// Try to claim `path` as a pidfile for the current process.
      74              : ///
      75              : /// If another process has already claimed the pidfile, and it is still running,
      76              : /// this function returns ane error.
      77              : /// Otherwise, the function `flock`s the file and updates its contents to the
      78              : /// current process's PID.
      79              : /// If the update fails, the flock is released and an error returned.
      80              : /// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
      81              : ///
      82              : /// ### Maintaining A Claim
      83              : ///
      84              : /// It is the caller's responsibility to maintain the claim.
      85              : /// The claim ends as soon as the returned guard object is dropped.
      86              : /// To maintain the claim for the remaining lifetime of the current process,
      87              : /// use [`std::mem::forget`] or similar.
      88            0 : pub fn claim_for_current_process(path: &Utf8Path) -> anyhow::Result<PidFileGuard> {
      89            0 :     let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
      90              :     // if any of the next steps fail, we drop the file descriptor and thereby release the lock
      91            0 :     let guard = unwritten_lock_file
      92            0 :         .write_content(Pid::this().to_string())
      93            0 :         .context("write pid to lock file")?;
      94            0 :     Ok(PidFileGuard(guard))
      95            0 : }
      96              : 
      97              : /// Returned by [`read`].
      98              : pub enum PidFileRead {
      99              :     /// No file exists at the given path.
     100              :     NotExist,
     101              :     /// The given pidfile is currently not claimed by any process.
     102              :     /// To determine this, the [`read`] operation acquired
     103              :     /// an exclusive flock on the file. The lock is still held and responsibility
     104              :     /// to release it is returned through the guard object.
     105              :     /// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
     106              :     /// will fail.
     107              :     ///
     108              :     /// ### Caveats
     109              :     ///
     110              :     /// Do not unlink the pidfile from the filesystem. See module-comment for why.
     111              :     NotHeldByAnyProcess(PidFileGuard),
     112              :     /// The given pidfile is still claimed by another process whose PID is given
     113              :     /// as part of this variant.
     114              :     ///
     115              :     /// ### Caveats
     116              :     ///
     117              :     /// 1. The other process might exit at any time, turning the given PID stale.
     118              :     /// 2. There is a small window in which `claim_for_current_process` has already
     119              :     ///    locked the file but not yet updates its contents. [`read`] will return
     120              :     ///    this variant here, but with the old file contents, i.e., a stale PID.
     121              :     ///
     122              :     /// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
     123              :     /// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
     124              :     /// system call on it, bears the risk of killing an unrelated process.
     125              :     /// This is an inherent limitation of using pidfiles.
     126              :     /// The only race-free solution is to have a supervisor-process with a lifetime
     127              :     /// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
     128              :     LockedByOtherProcess(Pid),
     129              : }
     130              : 
     131              : /// Try to read the file at the given path as a pidfile that was previously created
     132              : /// through [`claim_for_current_process`].
     133              : ///
     134              : /// On success, this function returns a [`PidFileRead`].
     135              : /// Check its docs for a description of the meaning of its different variants.
     136            0 : pub fn read(pidfile: &Utf8Path) -> anyhow::Result<PidFileRead> {
     137            0 :     let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
     138            0 :     let ret = match res {
     139            0 :         LockFileRead::NotExist => PidFileRead::NotExist,
     140            0 :         LockFileRead::NotHeldByAnyProcess(guard, _) => {
     141            0 :             PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
     142              :         }
     143              :         LockFileRead::LockedByOtherProcess {
     144            0 :             not_locked_file: _not_locked_file,
     145            0 :             content,
     146            0 :         } => {
     147            0 :             // XXX the read races with the write in claim_pid_file_for_pid().
     148            0 :             // But pids are smaller than a page, so the kernel page cache will lock for us.
     149            0 :             // The only problem is that we might get the old contents here.
     150            0 :             // Can only fix that by implementing some scheme that downgrades the
     151            0 :             // exclusive lock to shared lock in claim_pid_file_for_pid().
     152            0 :             PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
     153              :         }
     154              :     };
     155            0 :     Ok(ret)
     156            0 : }
     157              : 
     158            0 : fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
     159            0 :     let pid: i32 = content
     160            0 :         .parse()
     161            0 :         .map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
     162            0 :     if pid < 1 {
     163            0 :         anyhow::bail!("bad value in pidfile '{pid}'");
     164            0 :     }
     165            0 :     Ok(Pid::from_raw(pid))
     166            0 : }
        

Generated by: LCOV version 2.1-beta