LCOV - code coverage report
Current view: top level - pageserver/src/tenant - storage_layer.rs (source / functions) Coverage Total Hit
Test: 32f4a56327bc9da697706839ed4836b2a00a408f.info Lines: 66.2 % 154 102
Test Date: 2024-02-07 07:37:29 Functions: 46.2 % 39 18

            Line data    Source code
       1              : //! Common traits and structs for layers
       2              : 
       3              : pub mod delta_layer;
       4              : mod filename;
       5              : pub mod image_layer;
       6              : mod inmemory_layer;
       7              : pub(crate) mod layer;
       8              : mod layer_desc;
       9              : 
      10              : use crate::context::{AccessStatsBehavior, RequestContext};
      11              : use crate::task_mgr::TaskKind;
      12              : use crate::walrecord::NeonWalRecord;
      13              : use bytes::Bytes;
      14              : use enum_map::EnumMap;
      15              : use enumset::EnumSet;
      16              : use once_cell::sync::Lazy;
      17              : use pageserver_api::models::{
      18              :     LayerAccessKind, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
      19              : };
      20              : use std::ops::Range;
      21              : use std::sync::Mutex;
      22              : use std::time::{Duration, SystemTime, UNIX_EPOCH};
      23              : use tracing::warn;
      24              : use utils::history_buffer::HistoryBufferWithDropCounter;
      25              : use utils::rate_limit::RateLimit;
      26              : 
      27              : use utils::{id::TimelineId, lsn::Lsn};
      28              : 
      29              : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
      30              : pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
      31              : pub use image_layer::{ImageLayer, ImageLayerWriter};
      32              : pub use inmemory_layer::InMemoryLayer;
      33              : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
      34              : 
      35              : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
      36              : 
      37            0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
      38            0 : where
      39            0 :     T: PartialOrd<T>,
      40            0 : {
      41            0 :     if a.start < b.start {
      42            0 :         a.end > b.start
      43              :     } else {
      44            0 :         b.end > a.start
      45              :     }
      46            0 : }
      47              : 
      48              : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
      49              : ///
      50              : /// Before first call, you can fill in 'page_img' if you have an older cached
      51              : /// version of the page available. That can save work in
      52              : /// 'get_value_reconstruct_data', as it can stop searching for page versions
      53              : /// when all the WAL records going back to the cached image have been collected.
      54              : ///
      55              : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
      56              : /// of the page, or the oldest WAL record in 'records' is a will_init-type
      57              : /// record that initializes the page without requiring a previous image.
      58              : ///
      59              : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
      60              : /// been collected, but there are more records outside the current layer. Pass
      61              : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
      62              : /// call, to collect more records.
      63              : ///
      64            0 : #[derive(Debug)]
      65              : pub struct ValueReconstructState {
      66              :     pub records: Vec<(Lsn, NeonWalRecord)>,
      67              :     pub img: Option<(Lsn, Bytes)>,
      68              : }
      69              : 
      70              : /// Return value from [`Layer::get_value_reconstruct_data`]
      71         1388 : #[derive(Clone, Copy, Debug)]
      72              : pub enum ValueReconstructResult {
      73              :     /// Got all the data needed to reconstruct the requested page
      74              :     Complete,
      75              :     /// This layer didn't contain all the required data, the caller should look up
      76              :     /// the predecessor layer at the returned LSN and collect more data from there.
      77              :     Continue,
      78              : 
      79              :     /// This layer didn't contain data needed to reconstruct the page version at
      80              :     /// the returned LSN. This is usually considered an error, but might be OK
      81              :     /// in some circumstances.
      82              :     Missing,
      83              : }
      84              : 
      85            0 : #[derive(Debug)]
      86              : pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
      87              : 
      88              : /// This struct holds two instances of [`LayerAccessStatsInner`].
      89              : /// Accesses are recorded to both instances.
      90              : /// The `for_scraping_api`instance can be reset from the management API via [`LayerAccessStatsReset`].
      91              : /// The `for_eviction_policy` is never reset.
      92        78780 : #[derive(Debug, Default, Clone)]
      93              : struct LayerAccessStatsLocked {
      94              :     for_scraping_api: LayerAccessStatsInner,
      95              :     for_eviction_policy: LayerAccessStatsInner,
      96              : }
      97              : 
      98              : impl LayerAccessStatsLocked {
      99     23735825 :     fn iter_mut(&mut self) -> impl Iterator<Item = &mut LayerAccessStatsInner> {
     100     23735825 :         [&mut self.for_scraping_api, &mut self.for_eviction_policy].into_iter()
     101     23735825 :     }
     102              : }
     103              : 
     104       157560 : #[derive(Debug, Default, Clone)]
     105              : struct LayerAccessStatsInner {
     106              :     first_access: Option<LayerAccessStatFullDetails>,
     107              :     count_by_access_kind: EnumMap<LayerAccessKind, u64>,
     108              :     task_kind_flag: EnumSet<TaskKind>,
     109              :     last_accesses: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
     110              :     last_residence_changes: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
     111              : }
     112              : 
     113            0 : #[derive(Debug, Clone, Copy)]
     114              : pub(crate) struct LayerAccessStatFullDetails {
     115              :     pub(crate) when: SystemTime,
     116              :     pub(crate) task_kind: TaskKind,
     117              :     pub(crate) access_kind: LayerAccessKind,
     118              : }
     119              : 
     120            0 : #[derive(Clone, Copy, strum_macros::EnumString)]
     121              : pub enum LayerAccessStatsReset {
     122              :     NoReset,
     123              :     JustTaskKindFlags,
     124              :     AllStats,
     125              : }
     126              : 
     127        18200 : fn system_time_to_millis_since_epoch(ts: &SystemTime) -> u64 {
     128        18200 :     ts.duration_since(UNIX_EPOCH)
     129        18200 :         .expect("better to die in this unlikely case than report false stats")
     130        18200 :         .as_millis()
     131        18200 :         .try_into()
     132        18200 :         .expect("64 bits is enough for few more years")
     133        18200 : }
     134              : 
     135              : impl LayerAccessStatFullDetails {
     136        18200 :     fn as_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
     137        18200 :         let Self {
     138        18200 :             when,
     139        18200 :             task_kind,
     140        18200 :             access_kind,
     141        18200 :         } = self;
     142        18200 :         pageserver_api::models::LayerAccessStatFullDetails {
     143        18200 :             when_millis_since_epoch: system_time_to_millis_since_epoch(when),
     144        18200 :             task_kind: task_kind.into(), // into static str, powered by strum_macros
     145        18200 :             access_kind: *access_kind,
     146        18200 :         }
     147        18200 :     }
     148              : }
     149              : 
     150              : impl LayerAccessStats {
     151              :     /// Create an empty stats object.
     152              :     ///
     153              :     /// The caller is responsible for recording a residence event
     154              :     /// using [`record_residence_event`] before calling `latest_activity`.
     155              :     /// If they don't, [`latest_activity`] will return `None`.
     156              :     ///
     157              :     /// [`record_residence_event`]: Self::record_residence_event
     158              :     /// [`latest_activity`]: Self::latest_activity
     159        21944 :     pub(crate) fn empty_will_record_residence_event_later() -> Self {
     160        21944 :         LayerAccessStats(Mutex::default())
     161        21944 :     }
     162              : 
     163              :     /// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
     164              :     ///
     165              :     /// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
     166              :     ///
     167              :     /// [`LayerLoad`]: LayerResidenceEventReason::LayerLoad
     168              :     /// [`record_residence_event`]: Self::record_residence_event
     169        56836 :     pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
     170        56836 :         let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
     171        56836 :         new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
     172        56836 :         new
     173        56836 :     }
     174              : 
     175              :     /// Record a change in layer residency.
     176              :     ///
     177              :     /// Recording the event must happen while holding the layer map lock to
     178              :     /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
     179              :     /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
     180              :     ///
     181              :     /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
     182              :     /// the following race could happen:
     183              :     ///
     184              :     /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
     185              :     /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
     186              :     /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
     187              :     /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
     188              :     ///
     189        91109 :     pub(crate) fn record_residence_event(
     190        91109 :         &self,
     191        91109 :         status: LayerResidenceStatus,
     192        91109 :         reason: LayerResidenceEventReason,
     193        91109 :     ) {
     194        91109 :         let mut locked = self.0.lock().unwrap();
     195       182218 :         locked.iter_mut().for_each(|inner| {
     196       182218 :             inner
     197       182218 :                 .last_residence_changes
     198       182218 :                 .write(LayerResidenceEvent::new(status, reason))
     199       182218 :         });
     200        91109 :     }
     201              : 
     202     23938768 :     fn record_access(&self, access_kind: LayerAccessKind, ctx: &RequestContext) {
     203     23938768 :         if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
     204       294052 :             return;
     205     23644716 :         }
     206     23644716 : 
     207     23644716 :         let this_access = LayerAccessStatFullDetails {
     208     23644716 :             when: SystemTime::now(),
     209     23644716 :             task_kind: ctx.task_kind(),
     210     23644716 :             access_kind,
     211     23644716 :         };
     212     23644716 : 
     213     23644716 :         let mut locked = self.0.lock().unwrap();
     214     47289436 :         locked.iter_mut().for_each(|inner| {
     215     47289436 :             inner.first_access.get_or_insert(this_access);
     216     47289436 :             inner.count_by_access_kind[access_kind] += 1;
     217     47289436 :             inner.task_kind_flag |= ctx.task_kind();
     218     47289436 :             inner.last_accesses.write(this_access);
     219     47289436 :         })
     220     23938768 :     }
     221              : 
     222         3014 :     fn as_api_model(
     223         3014 :         &self,
     224         3014 :         reset: LayerAccessStatsReset,
     225         3014 :     ) -> pageserver_api::models::LayerAccessStats {
     226         3014 :         let mut locked = self.0.lock().unwrap();
     227         3014 :         let inner = &mut locked.for_scraping_api;
     228         3014 :         let LayerAccessStatsInner {
     229         3014 :             first_access,
     230         3014 :             count_by_access_kind,
     231         3014 :             task_kind_flag,
     232         3014 :             last_accesses,
     233         3014 :             last_residence_changes,
     234         3014 :         } = inner;
     235         3014 :         let ret = pageserver_api::models::LayerAccessStats {
     236         3014 :             access_count_by_access_kind: count_by_access_kind
     237         3014 :                 .iter()
     238        12056 :                 .map(|(kind, count)| (kind, *count))
     239         3014 :                 .collect(),
     240         3014 :             task_kind_access_flag: task_kind_flag
     241         3014 :                 .iter()
     242         3014 :                 .map(|task_kind| task_kind.into()) // into static str, powered by strum_macros
     243         3014 :                 .collect(),
     244         3014 :             first: first_access.as_ref().map(|a| a.as_api_model()),
     245        16959 :             accesses_history: last_accesses.map(|m| m.as_api_model()),
     246         3014 :             residence_events_history: last_residence_changes.clone(),
     247         3014 :         };
     248         3014 :         match reset {
     249         3014 :             LayerAccessStatsReset::NoReset => (),
     250            0 :             LayerAccessStatsReset::JustTaskKindFlags => {
     251            0 :                 inner.task_kind_flag.clear();
     252            0 :             }
     253            0 :             LayerAccessStatsReset::AllStats => {
     254            0 :                 *inner = LayerAccessStatsInner::default();
     255            0 :             }
     256              :         }
     257         3014 :         ret
     258         3014 :     }
     259              : 
     260              :     /// Get the latest access timestamp, falling back to latest residence event.
     261              :     ///
     262              :     /// This function can only return `None` if there has not yet been a call to the
     263              :     /// [`record_residence_event`] method. That would generally be considered an
     264              :     /// implementation error. This function logs a rate-limited warning in that case.
     265              :     ///
     266              :     /// TODO: use type system to avoid the need for `fallback`.
     267              :     /// The approach in <https://github.com/neondatabase/neon/pull/3775>
     268              :     /// could be used to enforce that a residence event is recorded
     269              :     /// before a layer is added to the layer map. We could also have
     270              :     /// a layer wrapper type that holds the LayerAccessStats, and ensure
     271              :     /// that that type can only be produced by inserting into the layer map.
     272              :     ///
     273              :     /// [`record_residence_event`]: Self::record_residence_event
     274         4148 :     pub(crate) fn latest_activity(&self) -> Option<SystemTime> {
     275         4148 :         let locked = self.0.lock().unwrap();
     276         4148 :         let inner = &locked.for_eviction_policy;
     277         4148 :         match inner.last_accesses.recent() {
     278         2171 :             Some(a) => Some(a.when),
     279         1977 :             None => match inner.last_residence_changes.recent() {
     280         1977 :                 Some(e) => Some(e.timestamp),
     281              :                 None => {
     282              :                     static WARN_RATE_LIMIT: Lazy<Mutex<(usize, RateLimit)>> =
     283            0 :                         Lazy::new(|| Mutex::new((0, RateLimit::new(Duration::from_secs(10)))));
     284            0 :                     let mut guard = WARN_RATE_LIMIT.lock().unwrap();
     285            0 :                     guard.0 += 1;
     286            0 :                     let occurences = guard.0;
     287            0 :                     guard.1.call(move || {
     288            0 :                         warn!(parent: None, occurences, "latest_activity not available, this is an implementation bug, using fallback value");
     289            0 :                     });
     290            0 :                     None
     291              :                 }
     292              :             },
     293              :         }
     294         4148 :     }
     295              : }
     296              : 
     297              : /// Get a layer descriptor from a layer.
     298              : pub trait AsLayerDesc {
     299              :     /// Get the layer descriptor.
     300              :     fn layer_desc(&self) -> &PersistentLayerDesc;
     301              : }
     302              : 
     303              : pub mod tests {
     304              :     use pageserver_api::shard::TenantShardId;
     305              : 
     306              :     use super::*;
     307              : 
     308              :     impl From<DeltaFileName> for PersistentLayerDesc {
     309            0 :         fn from(value: DeltaFileName) -> Self {
     310            0 :             PersistentLayerDesc::new_delta(
     311            0 :                 TenantShardId::from([0; 18]),
     312            0 :                 TimelineId::from_array([0; 16]),
     313            0 :                 value.key_range,
     314            0 :                 value.lsn_range,
     315            0 :                 233,
     316            0 :             )
     317            0 :         }
     318              :     }
     319              : 
     320              :     impl From<ImageFileName> for PersistentLayerDesc {
     321            0 :         fn from(value: ImageFileName) -> Self {
     322            0 :             PersistentLayerDesc::new_img(
     323            0 :                 TenantShardId::from([0; 18]),
     324            0 :                 TimelineId::from_array([0; 16]),
     325            0 :                 value.key_range,
     326            0 :                 value.lsn,
     327            0 :                 233,
     328            0 :             )
     329            0 :         }
     330              :     }
     331              : 
     332              :     impl From<LayerFileName> for PersistentLayerDesc {
     333            0 :         fn from(value: LayerFileName) -> Self {
     334            0 :             match value {
     335            0 :                 LayerFileName::Delta(d) => Self::from(d),
     336            0 :                 LayerFileName::Image(i) => Self::from(i),
     337              :             }
     338            0 :         }
     339              :     }
     340              : }
     341              : 
     342              : /// Range wrapping newtype, which uses display to render Debug.
     343              : ///
     344              : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
     345              : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
     346              : 
     347              : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
     348            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     349            0 :         write!(f, "{}..{}", self.0.start, self.0.end)
     350            0 :     }
     351              : }
        

Generated by: LCOV version 2.1-beta