LCOV - code coverage report
Current view: top level - libs/utils/src - logging.rs (source / functions) Coverage Total Hit
Test: 6df3fc19ec669bcfbbf9aba41d1338898d24eaa0.info Lines: 64.4 % 194 125
Test Date: 2025-03-12 18:28:53 Functions: 53.1 % 32 17

            Line data    Source code
       1              : use std::future::Future;
       2              : use std::str::FromStr;
       3              : use std::time::Duration;
       4              : 
       5              : use anyhow::Context;
       6              : use metrics::{IntCounter, IntCounterVec};
       7              : use once_cell::sync::Lazy;
       8              : use strum_macros::{EnumString, VariantNames};
       9              : use tokio::time::Instant;
      10              : use tracing::info;
      11              : 
      12              : /// Logs a critical error, similarly to `tracing::error!`. This will:
      13              : ///
      14              : /// * Emit an ERROR log message with prefix "CRITICAL:" and a backtrace.
      15              : /// * Trigger a pageable alert (via the metric below).
      16              : /// * Increment libmetrics_tracing_event_count{level="critical"}, and indirectly level="error".
      17              : /// * In debug builds, panic the process.
      18              : ///
      19              : /// When including errors in the message, please use {err:?} to include the error cause and original
      20              : /// backtrace.
      21              : #[macro_export]
      22              : macro_rules! critical {
      23              :     ($($arg:tt)*) => {{
      24              :         if cfg!(debug_assertions) {
      25              :             panic!($($arg)*);
      26              :         }
      27              :         $crate::logging::TRACING_EVENT_COUNT_METRIC.inc_critical();
      28              :         let backtrace = std::backtrace::Backtrace::capture();
      29              :         tracing::error!("CRITICAL: {}\n{backtrace}", format!($($arg)*));
      30              :     }};
      31              : }
      32              : 
      33            0 : #[derive(EnumString, strum_macros::Display, VariantNames, Eq, PartialEq, Debug, Clone, Copy)]
      34              : #[strum(serialize_all = "snake_case")]
      35              : pub enum LogFormat {
      36              :     Plain,
      37              :     Json,
      38              :     Test,
      39              : }
      40              : 
      41              : impl LogFormat {
      42            0 :     pub fn from_config(s: &str) -> anyhow::Result<LogFormat> {
      43              :         use strum::VariantNames;
      44            0 :         LogFormat::from_str(s).with_context(|| {
      45            0 :             format!(
      46            0 :                 "Unrecognized log format. Please specify one of: {:?}",
      47            0 :                 LogFormat::VARIANTS
      48            0 :             )
      49            0 :         })
      50            0 :     }
      51              : }
      52              : 
      53              : pub struct TracingEventCountMetric {
      54              :     /// CRITICAL is not a `tracing` log level. Instead, we increment it in the `critical!` macro,
      55              :     /// and also emit it as a regular error. These are thus double-counted, but that seems fine.
      56              :     critical: IntCounter,
      57              :     error: IntCounter,
      58              :     warn: IntCounter,
      59              :     info: IntCounter,
      60              :     debug: IntCounter,
      61              :     trace: IntCounter,
      62              : }
      63              : 
      64          548 : pub static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
      65          548 :     let vec = metrics::register_int_counter_vec!(
      66          548 :         "libmetrics_tracing_event_count",
      67          548 :         "Number of tracing events, by level",
      68          548 :         &["level"]
      69          548 :     )
      70          548 :     .expect("failed to define metric");
      71          548 :     TracingEventCountMetric::new(vec)
      72          548 : });
      73              : 
      74              : impl TracingEventCountMetric {
      75          549 :     fn new(vec: IntCounterVec) -> Self {
      76          549 :         Self {
      77          549 :             critical: vec.with_label_values(&["critical"]),
      78          549 :             error: vec.with_label_values(&["error"]),
      79          549 :             warn: vec.with_label_values(&["warn"]),
      80          549 :             info: vec.with_label_values(&["info"]),
      81          549 :             debug: vec.with_label_values(&["debug"]),
      82          549 :             trace: vec.with_label_values(&["trace"]),
      83          549 :         }
      84          549 :     }
      85              : 
      86              :     // Allow public access from `critical!` macro.
      87            0 :     pub fn inc_critical(&self) {
      88            0 :         self.critical.inc();
      89            0 :     }
      90              : 
      91        49664 :     fn inc_for_level(&self, level: tracing::Level) {
      92        49664 :         let counter = match level {
      93           69 :             tracing::Level::ERROR => &self.error,
      94          451 :             tracing::Level::WARN => &self.warn,
      95        49142 :             tracing::Level::INFO => &self.info,
      96            1 :             tracing::Level::DEBUG => &self.debug,
      97            1 :             tracing::Level::TRACE => &self.trace,
      98              :         };
      99        49664 :         counter.inc();
     100        49664 :     }
     101              : }
     102              : 
     103              : struct TracingEventCountLayer(&'static TracingEventCountMetric);
     104              : 
     105              : impl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer
     106              : where
     107              :     S: tracing::Subscriber,
     108              : {
     109        49664 :     fn on_event(
     110        49664 :         &self,
     111        49664 :         event: &tracing::Event<'_>,
     112        49664 :         _ctx: tracing_subscriber::layer::Context<'_, S>,
     113        49664 :     ) {
     114        49664 :         self.0.inc_for_level(*event.metadata().level());
     115        49664 :     }
     116              : }
     117              : 
     118              : /// Whether to add the `tracing_error` crate's `ErrorLayer`
     119              : /// to the global tracing subscriber.
     120              : ///
     121              : pub enum TracingErrorLayerEnablement {
     122              :     /// Do not add the `ErrorLayer`.
     123              :     Disabled,
     124              :     /// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset.
     125              :     EnableWithRustLogFilter,
     126              : }
     127              : 
     128              : /// Where the logging should output to.
     129              : #[derive(Clone, Copy)]
     130              : pub enum Output {
     131              :     Stdout,
     132              :     Stderr,
     133              : }
     134              : 
     135          548 : pub fn init(
     136          548 :     log_format: LogFormat,
     137          548 :     tracing_error_layer_enablement: TracingErrorLayerEnablement,
     138          548 :     output: Output,
     139          548 : ) -> anyhow::Result<()> {
     140          548 :     // We fall back to printing all spans at info-level or above if
     141          548 :     // the RUST_LOG environment variable is not set.
     142         1574 :     let rust_log_env_filter = || {
     143         1574 :         tracing_subscriber::EnvFilter::try_from_default_env()
     144         1574 :             .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"))
     145         1574 :     };
     146              : 
     147              :     // NB: the order of the with() calls does not matter.
     148              :     // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
     149              :     use tracing_subscriber::prelude::*;
     150          548 :     let r = tracing_subscriber::registry();
     151          548 :     let r = r.with({
     152          548 :         let log_layer = tracing_subscriber::fmt::layer()
     153          548 :             .with_target(false)
     154          548 :             .with_ansi(false)
     155          548 :             .with_writer(move || -> Box<dyn std::io::Write> {
     156            0 :                 match output {
     157            0 :                     Output::Stdout => Box::new(std::io::stdout()),
     158            0 :                     Output::Stderr => Box::new(std::io::stderr()),
     159              :                 }
     160          548 :             });
     161          548 :         let log_layer = match log_format {
     162            0 :             LogFormat::Json => log_layer.json().boxed(),
     163            0 :             LogFormat::Plain => log_layer.boxed(),
     164          548 :             LogFormat::Test => log_layer.with_test_writer().boxed(),
     165              :         };
     166          548 :         log_layer.with_filter(rust_log_env_filter())
     167          548 :     });
     168          548 : 
     169          548 :     let r = r.with(
     170          548 :         TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
     171          548 :     );
     172          548 :     match tracing_error_layer_enablement {
     173          478 :         TracingErrorLayerEnablement::EnableWithRustLogFilter => r
     174          478 :             .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
     175          478 :             .init(),
     176           70 :         TracingErrorLayerEnablement::Disabled => r.init(),
     177              :     }
     178              : 
     179          548 :     Ok(())
     180          548 : }
     181              : 
     182              : /// Disable the default rust panic hook by using `set_hook`.
     183              : ///
     184              : /// For neon binaries, the assumption is that tracing is configured before with [`init`], after
     185              : /// that sentry is configured (if needed). sentry will install it's own on top of this, always
     186              : /// processing the panic before we log it.
     187              : ///
     188              : /// When the return value is dropped, the hook is reverted to std default hook (prints to stderr).
     189              : /// If the assumptions about the initialization order are not held, use
     190              : /// [`TracingPanicHookGuard::forget`] but keep in mind, if tracing is stopped, then panics will be
     191              : /// lost.
     192              : #[must_use]
     193            8 : pub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard {
     194            8 :     std::panic::set_hook(Box::new(tracing_panic_hook));
     195            8 :     TracingPanicHookGuard::new()
     196            8 : }
     197              : 
     198              : /// Drop guard which restores the std panic hook on drop.
     199              : ///
     200              : /// Tracing should not be used when it's not configured, but we cannot really latch on to any
     201              : /// imaginary lifetime of tracing.
     202              : pub struct TracingPanicHookGuard {
     203              :     act: bool,
     204              : }
     205              : 
     206              : impl TracingPanicHookGuard {
     207            8 :     fn new() -> Self {
     208            8 :         TracingPanicHookGuard { act: true }
     209            8 :     }
     210              : 
     211              :     /// Make this hook guard not do anything when dropped.
     212            8 :     pub fn forget(&mut self) {
     213            8 :         self.act = false;
     214            8 :     }
     215              : }
     216              : 
     217              : impl Drop for TracingPanicHookGuard {
     218            8 :     fn drop(&mut self) {
     219            8 :         if self.act {
     220            0 :             let _ = std::panic::take_hook();
     221            8 :         }
     222            8 :     }
     223              : }
     224              : 
     225              : /// Named symbol for our panic hook, which logs the panic.
     226           12 : fn tracing_panic_hook(info: &std::panic::PanicHookInfo) {
     227           12 :     // following rust 1.66.1 std implementation:
     228           12 :     // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
     229           12 :     let location = info.location();
     230              : 
     231           12 :     let msg = match info.payload().downcast_ref::<&'static str>() {
     232            0 :         Some(s) => *s,
     233           12 :         None => match info.payload().downcast_ref::<String>() {
     234           12 :             Some(s) => &s[..],
     235            0 :             None => "Box<dyn Any>",
     236              :         },
     237              :     };
     238              : 
     239           12 :     let thread = std::thread::current();
     240           12 :     let thread = thread.name().unwrap_or("<unnamed>");
     241           12 :     let backtrace = std::backtrace::Backtrace::capture();
     242              : 
     243           12 :     let _entered = if let Some(location) = location {
     244           12 :         tracing::error_span!("panic", %thread, location = %PrettyLocation(location))
     245              :     } else {
     246              :         // very unlikely to hit here, but the guarantees of std could change
     247            0 :         tracing::error_span!("panic", %thread)
     248              :     }
     249           12 :     .entered();
     250           12 : 
     251           12 :     if backtrace.status() == std::backtrace::BacktraceStatus::Captured {
     252              :         // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really
     253              :         // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to
     254              :         // string, maybe even to a TLS one but tracing already does that.
     255           12 :         tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}");
     256              :     } else {
     257            0 :         tracing::error!("{msg}");
     258              :     }
     259              : 
     260              :     // ensure that we log something on the panic if this hook is left after tracing has been
     261              :     // unconfigured. worst case when teardown is racing the panic is to log the panic twice.
     262           12 :     tracing::dispatcher::get_default(|d| {
     263           12 :         if let Some(_none) = d.downcast_ref::<tracing::subscriber::NoSubscriber>() {
     264            0 :             let location = location.map(PrettyLocation);
     265            0 :             log_panic_to_stderr(thread, msg, location, &backtrace);
     266           12 :         }
     267           12 :     });
     268           12 : }
     269              : 
     270              : #[cold]
     271            0 : fn log_panic_to_stderr(
     272            0 :     thread: &str,
     273            0 :     msg: &str,
     274            0 :     location: Option<PrettyLocation<'_, '_>>,
     275            0 :     backtrace: &std::backtrace::Backtrace,
     276            0 : ) {
     277            0 :     eprintln!(
     278            0 :         "panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}"
     279            0 :     );
     280            0 : }
     281              : 
     282              : struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>);
     283              : 
     284              : impl std::fmt::Display for PrettyLocation<'_, '_> {
     285           12 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     286           12 :         write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column())
     287           12 :     }
     288              : }
     289              : 
     290              : impl std::fmt::Debug for PrettyLocation<'_, '_> {
     291            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     292            0 :         <Self as std::fmt::Display>::fmt(self, f)
     293            0 :     }
     294              : }
     295              : 
     296              : /// When you will store a secret but want to make sure it won't
     297              : /// be accidentally logged, wrap it in a SecretString, whose Debug
     298              : /// implementation does not expose the contents.
     299              : #[derive(Clone, Eq, PartialEq)]
     300              : pub struct SecretString(String);
     301              : 
     302              : impl SecretString {
     303            0 :     pub fn get_contents(&self) -> &str {
     304            0 :         self.0.as_str()
     305            0 :     }
     306              : }
     307              : 
     308              : impl From<String> for SecretString {
     309            0 :     fn from(s: String) -> Self {
     310            0 :         Self(s)
     311            0 :     }
     312              : }
     313              : 
     314              : impl FromStr for SecretString {
     315              :     type Err = std::convert::Infallible;
     316              : 
     317            0 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     318            0 :         Ok(Self(s.to_string()))
     319            0 :     }
     320              : }
     321              : 
     322              : impl std::fmt::Debug for SecretString {
     323            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     324            0 :         write!(f, "[SECRET]")
     325            0 :     }
     326              : }
     327              : 
     328              : /// Logs a periodic message if a future is slow to complete.
     329              : ///
     330              : /// This is performance-sensitive as it's used on the GetPage read path.
     331              : ///
     332              : /// TODO: consider upgrading this to a warning, but currently it fires too often.
     333              : #[inline]
     334            0 : pub async fn log_slow<O>(name: &str, threshold: Duration, f: impl Future<Output = O>) -> O {
     335            0 :     // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and
     336            0 :     // won't fit on the stack.
     337            0 :     let mut f = Box::pin(f);
     338            0 : 
     339            0 :     let started = Instant::now();
     340            0 :     let mut attempt = 1;
     341              : 
     342              :     loop {
     343              :         // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common
     344              :         // case where the timeout doesn't fire.
     345            0 :         let deadline = started + attempt * threshold;
     346            0 :         if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await {
     347              :             // NB: we check if we exceeded the threshold even if the timeout never fired, because
     348              :             // scheduling or execution delays may cause the future to succeed even if it exceeds the
     349              :             // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid
     350              :             // false negatives.
     351            0 :             let elapsed = started.elapsed();
     352            0 :             if elapsed >= threshold {
     353            0 :                 info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64());
     354            0 :             }
     355            0 :             return output;
     356            0 :         }
     357            0 : 
     358            0 :         let elapsed = started.elapsed().as_secs_f64();
     359            0 :         info!("slow {name} still running after {elapsed:.3}s",);
     360              : 
     361            0 :         attempt += 1;
     362              :     }
     363            0 : }
     364              : 
     365              : #[cfg(test)]
     366              : mod tests {
     367              :     use metrics::IntCounterVec;
     368              :     use metrics::core::Opts;
     369              : 
     370              :     use crate::logging::{TracingEventCountLayer, TracingEventCountMetric};
     371              : 
     372              :     #[test]
     373            1 :     fn tracing_event_count_metric() {
     374            1 :         let counter_vec =
     375            1 :             IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap();
     376            1 :         let metric = Box::leak(Box::new(TracingEventCountMetric::new(counter_vec.clone())));
     377            1 :         let layer = TracingEventCountLayer(metric);
     378              :         use tracing_subscriber::prelude::*;
     379              : 
     380            1 :         tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {
     381            1 :             tracing::trace!("foo");
     382            1 :             tracing::debug!("foo");
     383            1 :             tracing::info!("foo");
     384            1 :             tracing::warn!("foo");
     385            1 :             tracing::error!("foo");
     386            1 :         });
     387            1 : 
     388            1 :         assert_eq!(counter_vec.with_label_values(&["trace"]).get(), 1);
     389            1 :         assert_eq!(counter_vec.with_label_values(&["debug"]).get(), 1);
     390            1 :         assert_eq!(counter_vec.with_label_values(&["info"]).get(), 1);
     391            1 :         assert_eq!(counter_vec.with_label_values(&["warn"]).get(), 1);
     392            1 :         assert_eq!(counter_vec.with_label_values(&["error"]).get(), 1);
     393            1 :     }
     394              : }
        

Generated by: LCOV version 2.1-beta