LCOV - differential code coverage report
Current view: top level - libs/utils/src - pid_file.rs (source / functions) Coverage Total Hit UBC CBC
Current: f6946e90941b557c917ac98cd5a7e9506d180f3e.info Lines: 86.1 % 36 31 5 31
Current Date: 2023-10-19 02:04:12 Functions: 60.0 % 5 3 2 3
Baseline: c8637f37369098875162f194f92736355783b050.info
Baseline Date: 2023-10-18 20:25:20

           TLA  Line data    Source code
       1                 : //! Abstraction to create & read pidfiles.
       2                 : //!
       3                 : //! A pidfile is a file in the filesystem that stores a process's PID.
       4                 : //! Its purpose is to implement a singleton behavior where only
       5                 : //! one process of some "kind" is supposed to be running at a given time.
       6                 : //! The "kind" is identified by the pidfile.
       7                 : //!
       8                 : //! During process startup, the process that is supposed to be a singleton
       9                 : //! must [claim][`claim_for_current_process`] the pidfile first.
      10                 : //! If that is unsuccessful, the process must not act as the singleton, i.e.,
      11                 : //! it must not access any of the resources that only the singleton may access.
      12                 : //!
      13                 : //! A common need is to signal a running singleton process, e.g., to make
      14                 : //! it shut down and exit.
      15                 : //! For that, we have to [`read`] the pidfile. The result of the `read` operation
      16                 : //! tells us if there is any singleton process, and if so, what PID it has.
      17                 : //! We can then proceed to signal it, although some caveats still apply.
      18                 : //! Read the function-level documentation of [`read`] for that.
      19                 : //!
      20                 : //! ## Never Remove Pidfiles
      21                 : //!
      22                 : //! It would be natural to assume that the process who claimed the pidfile
      23                 : //! should remove it upon exit to avoid leaving a stale pidfile in place.
      24                 : //! However, we already have a reliable way to detect staleness of the pidfile,
      25                 : //! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
      26                 : //!
      27                 : //! And further, removing pidfiles would introduce a **catastrophic race condition**
      28                 : //! where two processes are running that are supposed to be singletons.
      29                 : //! Suppose we were to remove our pidfile during process shutdown.
      30                 : //! Here is how the race plays out:
      31                 : //! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
      32                 : //! - Process `A` starts to shut down.
      33                 : //! - Process `B` is just starting up
      34                 : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
      35                 : //!     - It blocks on `flock`
      36                 : //! - Process `A` removes the pidfile as the last step of its shutdown procedure
      37                 : //!     - `unlink("myservice.pid")
      38                 : //! - Process `A` exits
      39                 : //!     - This releases its `flock` and unblocks `B`
      40                 : //! - Process `B` still has the file descriptor for `myservice.pid` open
      41                 : //! - Process `B` writes its PID into `myservice.pid`.
      42                 : //! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
      43                 : //!   in the directory.
      44                 : //! - Process `C` starts
      45                 : //!     - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
      46                 : //!     - It `flock`s the file, which, since it's a different file, does not block
      47                 : //!     - It writes its PID into the file
      48                 : //!
      49                 : //! At this point, `B` and `C` are running, which is hazardous.
      50                 : //! Morale of the story: don't unlink pidfiles, ever.
      51                 : 
      52                 : use std::ops::Deref;
      53                 : 
      54                 : use anyhow::Context;
      55                 : use camino::Utf8Path;
      56                 : use nix::unistd::Pid;
      57                 : 
      58                 : use crate::lock_file::{self, LockFileRead};
      59                 : 
      60                 : /// Keeps a claim on a pidfile alive until it is dropped.
      61                 : /// Returned by [`claim_for_current_process`].
      62                 : #[must_use]
      63                 : pub struct PidFileGuard(lock_file::LockFileGuard);
      64                 : 
      65                 : impl Deref for PidFileGuard {
      66                 :     type Target = lock_file::LockFileGuard;
      67                 : 
      68 UBC           0 :     fn deref(&self) -> &Self::Target {
      69               0 :         &self.0
      70               0 :     }
      71                 : }
      72                 : 
      73                 : /// Try to claim `path` as a pidfile for the current process.
      74                 : ///
      75                 : /// If another process has already claimed the pidfile, and it is still running,
      76                 : /// this function returns ane error.
      77                 : /// Otherwise, the function `flock`s the file and updates its contents to the
      78                 : /// current process's PID.
      79                 : /// If the update fails, the flock is released and an error returned.
      80                 : /// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
      81                 : ///
      82                 : /// ### Maintaining A Claim
      83                 : ///
      84                 : /// It is the caller's responsibility to maintain the claim.
      85                 : /// The claim ends as soon as the returned guard object is dropped.
      86                 : /// To maintain the claim for the remaining lifetime of the current process,
      87                 : /// use [`std::mem::forget`] or similar.
      88 CBC        1060 : pub fn claim_for_current_process(path: &Utf8Path) -> anyhow::Result<PidFileGuard> {
      89            1060 :     let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
      90                 :     // if any of the next steps fail, we drop the file descriptor and thereby release the lock
      91            1060 :     let guard = unwritten_lock_file
      92            1060 :         .write_content(Pid::this().to_string())
      93            1060 :         .context("write pid to lock file")?;
      94            1060 :     Ok(PidFileGuard(guard))
      95            1060 : }
      96                 : 
      97                 : /// Returned by [`read`].
      98                 : pub enum PidFileRead {
      99                 :     /// No file exists at the given path.
     100                 :     NotExist,
     101                 :     /// The given pidfile is currently not claimed by any process.
     102                 :     /// To determine this, the [`read`] operation acquired
     103                 :     /// an exclusive flock on the file. The lock is still held and responsibility
     104                 :     /// to release it is returned through the guard object.
     105                 :     /// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
     106                 :     /// will fail.
     107                 :     ///
     108                 :     /// ### Caveats
     109                 :     ///
     110                 :     /// Do not unlink the pidfile from the filesystem. See module-comment for why.
     111                 :     NotHeldByAnyProcess(PidFileGuard),
     112                 :     /// The given pidfile is still claimed by another process whose PID is given
     113                 :     /// as part of this variant.
     114                 :     ///
     115                 :     /// ### Caveats
     116                 :     ///
     117                 :     /// 1. The other process might exit at any time, turning the given PID stale.
     118                 :     /// 2. There is a small window in which `claim_for_current_process` has already
     119                 :     ///    locked the file but not yet updates its contents. [`read`] will return
     120                 :     ///    this variant here, but with the old file contents, i.e., a stale PID.
     121                 :     ///
     122                 :     /// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
     123                 :     /// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
     124                 :     /// system call on it, bears the risk of killing an unrelated process.
     125                 :     /// This is an inherent limitation of using pidfiles.
     126                 :     /// The only race-free solution is to have a supervisor-process with a lifetime
     127                 :     /// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
     128                 :     LockedByOtherProcess(Pid),
     129                 : }
     130                 : 
     131                 : /// Try to read the file at the given path as a pidfile that was previously created
     132                 : /// through [`claim_for_current_process`].
     133                 : ///
     134                 : /// On success, this function returns a [`PidFileRead`].
     135                 : /// Check its docs for a description of the meaning of its different variants.
     136            2190 : pub fn read(pidfile: &Utf8Path) -> anyhow::Result<PidFileRead> {
     137            2190 :     let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
     138            2190 :     let ret = match res {
     139 UBC           0 :         LockFileRead::NotExist => PidFileRead::NotExist,
     140 CBC          44 :         LockFileRead::NotHeldByAnyProcess(guard, _) => {
     141              44 :             PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
     142                 :         }
     143                 :         LockFileRead::LockedByOtherProcess {
     144            2146 :             not_locked_file: _not_locked_file,
     145            2146 :             content,
     146            2146 :         } => {
     147            2146 :             // XXX the read races with the write in claim_pid_file_for_pid().
     148            2146 :             // But pids are smaller than a page, so the kernel page cache will lock for us.
     149            2146 :             // The only problem is that we might get the old contents here.
     150            2146 :             // Can only fix that by implementing some scheme that downgrades the
     151            2146 :             // exclusive lock to shared lock in claim_pid_file_for_pid().
     152            2146 :             PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
     153                 :         }
     154                 :     };
     155            2190 :     Ok(ret)
     156            2190 : }
     157                 : 
     158            2146 : fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
     159            2146 :     let pid: i32 = content
     160            2146 :         .parse()
     161            2146 :         .map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
     162            2146 :     if pid < 1 {
     163 UBC           0 :         anyhow::bail!("bad value in pidfile '{pid}'");
     164 CBC        2146 :     }
     165            2146 :     Ok(Pid::from_raw(pid))
     166            2146 : }
        

Generated by: LCOV version 2.1-beta