LCOV - differential code coverage report
Current view: top level - pageserver/src/tenant - storage_layer.rs (source / functions) Coverage Total Hit UBC CBC
Current: cd44433dd675caa99df17a61b18949c8387e2242.info Lines: 66.2 % 154 102 52 102
Current Date: 2024-01-09 02:06:09 Functions: 46.2 % 39 18 21 18
Baseline: 66c52a629a0f4a503e193045e0df4c77139e344b.info
Baseline Date: 2024-01-08 15:34:46

           TLA  Line data    Source code
       1                 : //! Common traits and structs for layers
       2                 : 
       3                 : pub mod delta_layer;
       4                 : mod filename;
       5                 : pub mod image_layer;
       6                 : mod inmemory_layer;
       7                 : pub(crate) mod layer;
       8                 : mod layer_desc;
       9                 : 
      10                 : use crate::context::{AccessStatsBehavior, RequestContext};
      11                 : use crate::task_mgr::TaskKind;
      12                 : use crate::walrecord::NeonWalRecord;
      13                 : use bytes::Bytes;
      14                 : use enum_map::EnumMap;
      15                 : use enumset::EnumSet;
      16                 : use once_cell::sync::Lazy;
      17                 : use pageserver_api::models::{
      18                 :     LayerAccessKind, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
      19                 : };
      20                 : use std::ops::Range;
      21                 : use std::sync::Mutex;
      22                 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
      23                 : use tracing::warn;
      24                 : use utils::history_buffer::HistoryBufferWithDropCounter;
      25                 : use utils::rate_limit::RateLimit;
      26                 : 
      27                 : use utils::{id::TimelineId, lsn::Lsn};
      28                 : 
      29                 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
      30                 : pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
      31                 : pub use image_layer::{ImageLayer, ImageLayerWriter};
      32                 : pub use inmemory_layer::InMemoryLayer;
      33                 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
      34                 : 
      35                 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
      36                 : 
      37 UBC           0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
      38               0 : where
      39               0 :     T: PartialOrd<T>,
      40               0 : {
      41               0 :     if a.start < b.start {
      42               0 :         a.end > b.start
      43                 :     } else {
      44               0 :         b.end > a.start
      45                 :     }
      46               0 : }
      47                 : 
      48                 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
      49                 : ///
      50                 : /// Before first call, you can fill in 'page_img' if you have an older cached
      51                 : /// version of the page available. That can save work in
      52                 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
      53                 : /// when all the WAL records going back to the cached image have been collected.
      54                 : ///
      55                 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
      56                 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
      57                 : /// record that initializes the page without requiring a previous image.
      58                 : ///
      59                 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
      60                 : /// been collected, but there are more records outside the current layer. Pass
      61                 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
      62                 : /// call, to collect more records.
      63                 : ///
      64               0 : #[derive(Debug)]
      65                 : pub struct ValueReconstructState {
      66                 :     pub records: Vec<(Lsn, NeonWalRecord)>,
      67                 :     pub img: Option<(Lsn, Bytes)>,
      68                 : }
      69                 : 
      70                 : /// Return value from [`Layer::get_value_reconstruct_data`]
      71 CBC        1217 : #[derive(Clone, Copy, Debug)]
      72                 : pub enum ValueReconstructResult {
      73                 :     /// Got all the data needed to reconstruct the requested page
      74                 :     Complete,
      75                 :     /// This layer didn't contain all the required data, the caller should look up
      76                 :     /// the predecessor layer at the returned LSN and collect more data from there.
      77                 :     Continue,
      78                 : 
      79                 :     /// This layer didn't contain data needed to reconstruct the page version at
      80                 :     /// the returned LSN. This is usually considered an error, but might be OK
      81                 :     /// in some circumstances.
      82                 :     Missing,
      83                 : }
      84                 : 
      85 UBC           0 : #[derive(Debug)]
      86                 : pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
      87                 : 
      88                 : /// This struct holds two instances of [`LayerAccessStatsInner`].
      89                 : /// Accesses are recorded to both instances.
      90                 : /// The `for_scraping_api`instance can be reset from the management API via [`LayerAccessStatsReset`].
      91                 : /// The `for_eviction_policy` is never reset.
      92 CBC       78253 : #[derive(Debug, Default, Clone)]
      93                 : struct LayerAccessStatsLocked {
      94                 :     for_scraping_api: LayerAccessStatsInner,
      95                 :     for_eviction_policy: LayerAccessStatsInner,
      96                 : }
      97                 : 
      98                 : impl LayerAccessStatsLocked {
      99        15230436 :     fn iter_mut(&mut self) -> impl Iterator<Item = &mut LayerAccessStatsInner> {
     100        15230436 :         [&mut self.for_scraping_api, &mut self.for_eviction_policy].into_iter()
     101        15230436 :     }
     102                 : }
     103                 : 
     104          156506 : #[derive(Debug, Default, Clone)]
     105                 : struct LayerAccessStatsInner {
     106                 :     first_access: Option<LayerAccessStatFullDetails>,
     107                 :     count_by_access_kind: EnumMap<LayerAccessKind, u64>,
     108                 :     task_kind_flag: EnumSet<TaskKind>,
     109                 :     last_accesses: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
     110                 :     last_residence_changes: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
     111                 : }
     112                 : 
     113 UBC           0 : #[derive(Debug, Clone, Copy)]
     114                 : pub(crate) struct LayerAccessStatFullDetails {
     115                 :     pub(crate) when: SystemTime,
     116                 :     pub(crate) task_kind: TaskKind,
     117                 :     pub(crate) access_kind: LayerAccessKind,
     118                 : }
     119                 : 
     120               0 : #[derive(Clone, Copy, strum_macros::EnumString)]
     121                 : pub enum LayerAccessStatsReset {
     122                 :     NoReset,
     123                 :     JustTaskKindFlags,
     124                 :     AllStats,
     125                 : }
     126                 : 
     127 CBC       10218 : fn system_time_to_millis_since_epoch(ts: &SystemTime) -> u64 {
     128           10218 :     ts.duration_since(UNIX_EPOCH)
     129           10218 :         .expect("better to die in this unlikely case than report false stats")
     130           10218 :         .as_millis()
     131           10218 :         .try_into()
     132           10218 :         .expect("64 bits is enough for few more years")
     133           10218 : }
     134                 : 
     135                 : impl LayerAccessStatFullDetails {
     136           10218 :     fn as_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
     137           10218 :         let Self {
     138           10218 :             when,
     139           10218 :             task_kind,
     140           10218 :             access_kind,
     141           10218 :         } = self;
     142           10218 :         pageserver_api::models::LayerAccessStatFullDetails {
     143           10218 :             when_millis_since_epoch: system_time_to_millis_since_epoch(when),
     144           10218 :             task_kind: task_kind.into(), // into static str, powered by strum_macros
     145           10218 :             access_kind: *access_kind,
     146           10218 :         }
     147           10218 :     }
     148                 : }
     149                 : 
     150                 : impl LayerAccessStats {
     151                 :     /// Create an empty stats object.
     152                 :     ///
     153                 :     /// The caller is responsible for recording a residence event
     154                 :     /// using [`record_residence_event`] before calling `latest_activity`.
     155                 :     /// If they don't, [`latest_activity`] will return `None`.
     156                 :     ///
     157                 :     /// [`record_residence_event`]: Self::record_residence_event
     158                 :     /// [`latest_activity`]: Self::latest_activity
     159           20448 :     pub(crate) fn empty_will_record_residence_event_later() -> Self {
     160           20448 :         LayerAccessStats(Mutex::default())
     161           20448 :     }
     162                 : 
     163                 :     /// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
     164                 :     ///
     165                 :     /// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
     166                 :     ///
     167                 :     /// [`LayerLoad`]: LayerResidenceEventReason::LayerLoad
     168                 :     /// [`record_residence_event`]: Self::record_residence_event
     169           57805 :     pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
     170           57805 :         let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
     171           57805 :         new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
     172           57805 :         new
     173           57805 :     }
     174                 : 
     175                 :     /// Record a change in layer residency.
     176                 :     ///
     177                 :     /// Recording the event must happen while holding the layer map lock to
     178                 :     /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
     179                 :     /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
     180                 :     ///
     181                 :     /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
     182                 :     /// the following race could happen:
     183                 :     ///
     184                 :     /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
     185                 :     /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
     186                 :     /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
     187                 :     /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
     188                 :     ///
     189           90187 :     pub(crate) fn record_residence_event(
     190           90187 :         &self,
     191           90187 :         status: LayerResidenceStatus,
     192           90187 :         reason: LayerResidenceEventReason,
     193           90187 :     ) {
     194           90187 :         let mut locked = self.0.lock().unwrap();
     195          180374 :         locked.iter_mut().for_each(|inner| {
     196          180374 :             inner
     197          180374 :                 .last_residence_changes
     198          180374 :                 .write(LayerResidenceEvent::new(status, reason))
     199          180374 :         });
     200           90187 :     }
     201                 : 
     202        15458514 :     fn record_access(&self, access_kind: LayerAccessKind, ctx: &RequestContext) {
     203        15458514 :         if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
     204          318265 :             return;
     205        15140249 :         }
     206        15140249 : 
     207        15140249 :         let this_access = LayerAccessStatFullDetails {
     208        15140249 :             when: SystemTime::now(),
     209        15140249 :             task_kind: ctx.task_kind(),
     210        15140249 :             access_kind,
     211        15140249 :         };
     212        15140249 : 
     213        15140249 :         let mut locked = self.0.lock().unwrap();
     214        30280540 :         locked.iter_mut().for_each(|inner| {
     215        30280540 :             inner.first_access.get_or_insert(this_access);
     216        30280540 :             inner.count_by_access_kind[access_kind] += 1;
     217        30280540 :             inner.task_kind_flag |= ctx.task_kind();
     218        30280540 :             inner.last_accesses.write(this_access);
     219        30280540 :         })
     220        15458514 :     }
     221                 : 
     222            2974 :     fn as_api_model(
     223            2974 :         &self,
     224            2974 :         reset: LayerAccessStatsReset,
     225            2974 :     ) -> pageserver_api::models::LayerAccessStats {
     226            2974 :         let mut locked = self.0.lock().unwrap();
     227            2974 :         let inner = &mut locked.for_scraping_api;
     228            2974 :         let LayerAccessStatsInner {
     229            2974 :             first_access,
     230            2974 :             count_by_access_kind,
     231            2974 :             task_kind_flag,
     232            2974 :             last_accesses,
     233            2974 :             last_residence_changes,
     234            2974 :         } = inner;
     235            2974 :         let ret = pageserver_api::models::LayerAccessStats {
     236            2974 :             access_count_by_access_kind: count_by_access_kind
     237            2974 :                 .iter()
     238           11896 :                 .map(|(kind, count)| (kind, *count))
     239            2974 :                 .collect(),
     240            2974 :             task_kind_access_flag: task_kind_flag
     241            2974 :                 .iter()
     242            2974 :                 .map(|task_kind| task_kind.into()) // into static str, powered by strum_macros
     243            2974 :                 .collect(),
     244            2974 :             first: first_access.as_ref().map(|a| a.as_api_model()),
     245            9459 :             accesses_history: last_accesses.map(|m| m.as_api_model()),
     246            2974 :             residence_events_history: last_residence_changes.clone(),
     247            2974 :         };
     248            2974 :         match reset {
     249            2974 :             LayerAccessStatsReset::NoReset => (),
     250 UBC           0 :             LayerAccessStatsReset::JustTaskKindFlags => {
     251               0 :                 inner.task_kind_flag.clear();
     252               0 :             }
     253               0 :             LayerAccessStatsReset::AllStats => {
     254               0 :                 *inner = LayerAccessStatsInner::default();
     255               0 :             }
     256                 :         }
     257 CBC        2974 :         ret
     258            2974 :     }
     259                 : 
     260                 :     /// Get the latest access timestamp, falling back to latest residence event.
     261                 :     ///
     262                 :     /// This function can only return `None` if there has not yet been a call to the
     263                 :     /// [`record_residence_event`] method. That would generally be considered an
     264                 :     /// implementation error. This function logs a rate-limited warning in that case.
     265                 :     ///
     266                 :     /// TODO: use type system to avoid the need for `fallback`.
     267                 :     /// The approach in <https://github.com/neondatabase/neon/pull/3775>
     268                 :     /// could be used to enforce that a residence event is recorded
     269                 :     /// before a layer is added to the layer map. We could also have
     270                 :     /// a layer wrapper type that holds the LayerAccessStats, and ensure
     271                 :     /// that that type can only be produced by inserting into the layer map.
     272                 :     ///
     273                 :     /// [`record_residence_event`]: Self::record_residence_event
     274            4170 :     pub(crate) fn latest_activity(&self) -> Option<SystemTime> {
     275            4170 :         let locked = self.0.lock().unwrap();
     276            4170 :         let inner = &locked.for_eviction_policy;
     277            4170 :         match inner.last_accesses.recent() {
     278            2118 :             Some(a) => Some(a.when),
     279            2052 :             None => match inner.last_residence_changes.recent() {
     280            2052 :                 Some(e) => Some(e.timestamp),
     281                 :                 None => {
     282                 :                     static WARN_RATE_LIMIT: Lazy<Mutex<(usize, RateLimit)>> =
     283 UBC           0 :                         Lazy::new(|| Mutex::new((0, RateLimit::new(Duration::from_secs(10)))));
     284               0 :                     let mut guard = WARN_RATE_LIMIT.lock().unwrap();
     285               0 :                     guard.0 += 1;
     286               0 :                     let occurences = guard.0;
     287               0 :                     guard.1.call(move || {
     288               0 :                         warn!(parent: None, occurences, "latest_activity not available, this is an implementation bug, using fallback value");
     289               0 :                     });
     290               0 :                     None
     291                 :                 }
     292                 :             },
     293                 :         }
     294 CBC        4170 :     }
     295                 : }
     296                 : 
     297                 : /// Get a layer descriptor from a layer.
     298                 : pub trait AsLayerDesc {
     299                 :     /// Get the layer descriptor.
     300                 :     fn layer_desc(&self) -> &PersistentLayerDesc;
     301                 : }
     302                 : 
     303                 : pub mod tests {
     304                 :     use pageserver_api::shard::TenantShardId;
     305                 : 
     306                 :     use super::*;
     307                 : 
     308                 :     impl From<DeltaFileName> for PersistentLayerDesc {
     309 UBC           0 :         fn from(value: DeltaFileName) -> Self {
     310               0 :             PersistentLayerDesc::new_delta(
     311               0 :                 TenantShardId::from([0; 18]),
     312               0 :                 TimelineId::from_array([0; 16]),
     313               0 :                 value.key_range,
     314               0 :                 value.lsn_range,
     315               0 :                 233,
     316               0 :             )
     317               0 :         }
     318                 :     }
     319                 : 
     320                 :     impl From<ImageFileName> for PersistentLayerDesc {
     321               0 :         fn from(value: ImageFileName) -> Self {
     322               0 :             PersistentLayerDesc::new_img(
     323               0 :                 TenantShardId::from([0; 18]),
     324               0 :                 TimelineId::from_array([0; 16]),
     325               0 :                 value.key_range,
     326               0 :                 value.lsn,
     327               0 :                 233,
     328               0 :             )
     329               0 :         }
     330                 :     }
     331                 : 
     332                 :     impl From<LayerFileName> for PersistentLayerDesc {
     333               0 :         fn from(value: LayerFileName) -> Self {
     334               0 :             match value {
     335               0 :                 LayerFileName::Delta(d) => Self::from(d),
     336               0 :                 LayerFileName::Image(i) => Self::from(i),
     337                 :             }
     338               0 :         }
     339                 :     }
     340                 : }
     341                 : 
     342                 : /// Range wrapping newtype, which uses display to render Debug.
     343                 : ///
     344                 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
     345                 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
     346                 : 
     347                 : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
     348               0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     349               0 :         write!(f, "{}..{}", self.0.start, self.0.end)
     350               0 :     }
     351                 : }
        

Generated by: LCOV version 2.1-beta