LCOV - code coverage report
Current view: top level - pageserver/src/tenant - storage_layer.rs (source / functions) Coverage Total Hit
Test: 42f947419473a288706e86ecdf7c2863d760d5d7.info Lines: 74.9 % 339 254
Test Date: 2024-08-02 21:34:27 Functions: 76.0 % 50 38

            Line data    Source code
       1              : //! Common traits and structs for layers
       2              : 
       3              : pub mod delta_layer;
       4              : pub mod image_layer;
       5              : pub(crate) mod inmemory_layer;
       6              : pub(crate) mod layer;
       7              : mod layer_desc;
       8              : mod layer_name;
       9              : pub mod merge_iterator;
      10              : 
      11              : use crate::context::{AccessStatsBehavior, RequestContext};
      12              : use crate::repository::Value;
      13              : use crate::walrecord::NeonWalRecord;
      14              : use bytes::Bytes;
      15              : use pageserver_api::key::Key;
      16              : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
      17              : use std::cmp::{Ordering, Reverse};
      18              : use std::collections::hash_map::Entry;
      19              : use std::collections::{BinaryHeap, HashMap};
      20              : use std::ops::Range;
      21              : use std::sync::Arc;
      22              : use std::time::{Duration, SystemTime, UNIX_EPOCH};
      23              : 
      24              : use utils::lsn::Lsn;
      25              : 
      26              : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
      27              : pub use image_layer::{ImageLayer, ImageLayerWriter};
      28              : pub use inmemory_layer::InMemoryLayer;
      29              : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
      30              : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
      31              : 
      32              : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
      33              : 
      34              : use self::inmemory_layer::InMemoryLayerFileId;
      35              : 
      36              : use super::timeline::GetVectoredError;
      37              : use super::PageReconstructError;
      38              : 
      39            0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
      40            0 : where
      41            0 :     T: PartialOrd<T>,
      42            0 : {
      43            0 :     if a.start < b.start {
      44            0 :         a.end > b.start
      45              :     } else {
      46            0 :         b.end > a.start
      47              :     }
      48            0 : }
      49              : 
      50              : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
      51              : ///
      52              : /// Before first call, you can fill in 'page_img' if you have an older cached
      53              : /// version of the page available. That can save work in
      54              : /// 'get_value_reconstruct_data', as it can stop searching for page versions
      55              : /// when all the WAL records going back to the cached image have been collected.
      56              : ///
      57              : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
      58              : /// of the page, or the oldest WAL record in 'records' is a will_init-type
      59              : /// record that initializes the page without requiring a previous image.
      60              : ///
      61              : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
      62              : /// been collected, but there are more records outside the current layer. Pass
      63              : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
      64              : /// call, to collect more records.
      65              : ///
      66              : #[derive(Debug, Default)]
      67              : pub(crate) struct ValueReconstructState {
      68              :     pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
      69              :     pub(crate) img: Option<(Lsn, Bytes)>,
      70              : }
      71              : 
      72              : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
      73              : pub(crate) enum ValueReconstructSituation {
      74              :     Complete,
      75              :     #[default]
      76              :     Continue,
      77              : }
      78              : 
      79              : /// Reconstruct data accumulated for a single key during a vectored get
      80              : #[derive(Debug, Default, Clone)]
      81              : pub(crate) struct VectoredValueReconstructState {
      82              :     pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
      83              :     pub(crate) img: Option<(Lsn, Bytes)>,
      84              : 
      85              :     situation: ValueReconstructSituation,
      86              : }
      87              : 
      88              : impl VectoredValueReconstructState {
      89        40229 :     fn get_cached_lsn(&self) -> Option<Lsn> {
      90        40229 :         self.img.as_ref().map(|img| img.0)
      91        40229 :     }
      92              : }
      93              : 
      94              : impl From<VectoredValueReconstructState> for ValueReconstructState {
      95       666556 :     fn from(mut state: VectoredValueReconstructState) -> Self {
      96       666556 :         // walredo expects the records to be descending in terms of Lsn
      97       666556 :         state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
      98       666556 : 
      99       666556 :         ValueReconstructState {
     100       666556 :             records: state.records,
     101       666556 :             img: state.img,
     102       666556 :         }
     103       666556 :     }
     104              : }
     105              : 
     106              : /// Bag of data accumulated during a vectored get..
     107              : pub(crate) struct ValuesReconstructState {
     108              :     /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
     109              :     /// should not expect to get anything from this hashmap.
     110              :     pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
     111              :     /// The keys which are already retrieved
     112              :     keys_done: KeySpaceRandomAccum,
     113              : 
     114              :     /// The keys covered by the image layers
     115              :     keys_with_image_coverage: Option<Range<Key>>,
     116              : 
     117              :     // Statistics that are still accessible as a caller of `get_vectored_impl`.
     118              :     layers_visited: u32,
     119              :     delta_layers_visited: u32,
     120              : }
     121              : 
     122              : impl ValuesReconstructState {
     123       626258 :     pub(crate) fn new() -> Self {
     124       626258 :         Self {
     125       626258 :             keys: HashMap::new(),
     126       626258 :             keys_done: KeySpaceRandomAccum::new(),
     127       626258 :             keys_with_image_coverage: None,
     128       626258 :             layers_visited: 0,
     129       626258 :             delta_layers_visited: 0,
     130       626258 :         }
     131       626258 :     }
     132              : 
     133              :     /// Associate a key with the error which it encountered and mark it as done
     134            0 :     pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
     135            0 :         let previous = self.keys.insert(key, Err(err));
     136            0 :         if let Some(Ok(state)) = previous {
     137            0 :             if state.situation == ValueReconstructSituation::Continue {
     138            0 :                 self.keys_done.add_key(key);
     139            0 :             }
     140            0 :         }
     141            0 :     }
     142              : 
     143       818094 :     pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
     144       818094 :         self.layers_visited += 1;
     145       818094 :         if let ReadableLayer::PersistentLayer(layer) = layer {
     146       211897 :             if layer.layer_desc().is_delta() {
     147       204299 :                 self.delta_layers_visited += 1;
     148       204299 :             }
     149       606197 :         }
     150       818094 :     }
     151              : 
     152           24 :     pub(crate) fn get_delta_layers_visited(&self) -> u32 {
     153           24 :         self.delta_layers_visited
     154           24 :     }
     155              : 
     156       626032 :     pub(crate) fn get_layers_visited(&self) -> u32 {
     157       626032 :         self.layers_visited
     158       626032 :     }
     159              : 
     160              :     /// This function is called after reading a keyspace from a layer.
     161              :     /// It checks if the read path has now moved past the cached Lsn for any keys.
     162              :     ///
     163              :     /// Implementation note: We intentionally iterate over the keys for which we've
     164              :     /// already collected some reconstruct data. This avoids scaling complexity with
     165              :     /// the size of the search space.
     166       810496 :     pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
     167       810496 :         for (key, value) in self.keys.iter_mut() {
     168       691183 :             if !keyspace.contains(key) {
     169        42240 :                 continue;
     170       648943 :             }
     171              : 
     172       648943 :             if let Ok(state) = value {
     173       648943 :                 if state.situation != ValueReconstructSituation::Complete
     174          236 :                     && state.get_cached_lsn() >= Some(advanced_to)
     175            0 :                 {
     176            0 :                     state.situation = ValueReconstructSituation::Complete;
     177            0 :                     self.keys_done.add_key(*key);
     178       648943 :                 }
     179            0 :             }
     180              :         }
     181       810496 :     }
     182              : 
     183              :     /// On hitting image layer, we can mark all keys in this range as done, because
     184              :     /// if the image layer does not contain a key, it is deleted/never added.
     185         7606 :     pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
     186         7606 :         let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
     187         7606 :         assert_eq!(
     188              :             prev_val, None,
     189            0 :             "should consume the keyspace before the next iteration"
     190              :         );
     191         7606 :     }
     192              : 
     193              :     /// Update the state collected for a given key.
     194              :     /// Returns true if this was the last value needed for the key and false otherwise.
     195              :     ///
     196              :     /// If the key is done after the update, mark it as such.
     197       666936 :     pub(crate) fn update_key(
     198       666936 :         &mut self,
     199       666936 :         key: &Key,
     200       666936 :         lsn: Lsn,
     201       666936 :         value: Value,
     202       666936 :     ) -> ValueReconstructSituation {
     203       666936 :         let state = self
     204       666936 :             .keys
     205       666936 :             .entry(*key)
     206       666936 :             .or_insert(Ok(VectoredValueReconstructState::default()));
     207              : 
     208       666936 :         if let Ok(state) = state {
     209       666936 :             let key_done = match state.situation {
     210            0 :                 ValueReconstructSituation::Complete => unreachable!(),
     211       666936 :                 ValueReconstructSituation::Continue => match value {
     212       666628 :                     Value::Image(img) => {
     213       666628 :                         state.img = Some((lsn, img));
     214       666628 :                         true
     215              :                     }
     216          308 :                     Value::WalRecord(rec) => {
     217          308 :                         debug_assert!(
     218          308 :                             Some(lsn) > state.get_cached_lsn(),
     219            0 :                             "Attempt to collect a record below cached LSN for walredo: {} < {}",
     220            0 :                             lsn,
     221            0 :                             state
     222            0 :                                 .get_cached_lsn()
     223            0 :                                 .expect("Assertion can only fire if a cached lsn is present")
     224              :                         );
     225              : 
     226          308 :                         let will_init = rec.will_init();
     227          308 :                         state.records.push((lsn, rec));
     228          308 :                         will_init
     229              :                     }
     230              :                 },
     231              :             };
     232              : 
     233       666936 :             if key_done && state.situation == ValueReconstructSituation::Continue {
     234       666628 :                 state.situation = ValueReconstructSituation::Complete;
     235       666628 :                 self.keys_done.add_key(*key);
     236       666628 :             }
     237              : 
     238       666936 :             state.situation
     239              :         } else {
     240            0 :             ValueReconstructSituation::Complete
     241              :         }
     242       666936 :     }
     243              : 
     244              :     /// Returns the Lsn at which this key is cached if one exists.
     245              :     /// The read path should go no further than this Lsn for the given key.
     246      1082185 :     pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
     247      1082185 :         self.keys
     248      1082185 :             .get(key)
     249      1082185 :             .and_then(|k| k.as_ref().ok())
     250      1082185 :             .and_then(|state| state.get_cached_lsn())
     251      1082185 :     }
     252              : 
     253              :     /// Returns the key space describing the keys that have
     254              :     /// been marked as completed since the last call to this function.
     255              :     /// Returns individual keys done, and the image layer coverage.
     256      1669354 :     pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
     257      1669354 :         (
     258      1669354 :             self.keys_done.consume_keyspace(),
     259      1669354 :             self.keys_with_image_coverage.take(),
     260      1669354 :         )
     261      1669354 :     }
     262              : }
     263              : 
     264              : impl Default for ValuesReconstructState {
     265           66 :     fn default() -> Self {
     266           66 :         Self::new()
     267           66 :     }
     268              : }
     269              : 
     270              : /// A key that uniquely identifies a layer in a timeline
     271              : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
     272              : pub(crate) enum LayerId {
     273              :     PersitentLayerId(PersistentLayerKey),
     274              :     InMemoryLayerId(InMemoryLayerFileId),
     275              : }
     276              : 
     277              : /// Layer wrapper for the read path. Note that it is valid
     278              : /// to use these layers even after external operations have
     279              : /// been performed on them (compaction, freeze, etc.).
     280              : #[derive(Debug)]
     281              : pub(crate) enum ReadableLayer {
     282              :     PersistentLayer(Layer),
     283              :     InMemoryLayer(Arc<InMemoryLayer>),
     284              : }
     285              : 
     286              : /// A partial description of a read to be done.
     287              : #[derive(Debug, Clone)]
     288              : struct ReadDesc {
     289              :     /// An id used to resolve the readable layer within the fringe
     290              :     layer_id: LayerId,
     291              :     /// Lsn range for the read, used for selecting the next read
     292              :     lsn_range: Range<Lsn>,
     293              : }
     294              : 
     295              : /// Data structure which maintains a fringe of layers for the
     296              : /// read path. The fringe is the set of layers which intersects
     297              : /// the current keyspace that the search is descending on.
     298              : /// Each layer tracks the keyspace that intersects it.
     299              : ///
     300              : /// The fringe must appear sorted by Lsn. Hence, it uses
     301              : /// a two layer indexing scheme.
     302              : #[derive(Debug)]
     303              : pub(crate) struct LayerFringe {
     304              :     planned_reads_by_lsn: BinaryHeap<ReadDesc>,
     305              :     layers: HashMap<LayerId, LayerKeyspace>,
     306              : }
     307              : 
     308              : #[derive(Debug)]
     309              : struct LayerKeyspace {
     310              :     layer: ReadableLayer,
     311              :     target_keyspace: KeySpaceRandomAccum,
     312              : }
     313              : 
     314              : impl LayerFringe {
     315       851260 :     pub(crate) fn new() -> Self {
     316       851260 :         LayerFringe {
     317       851260 :             planned_reads_by_lsn: BinaryHeap::new(),
     318       851260 :             layers: HashMap::new(),
     319       851260 :         }
     320       851260 :     }
     321              : 
     322      1669354 :     pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
     323      1669354 :         let read_desc = match self.planned_reads_by_lsn.pop() {
     324       818094 :             Some(desc) => desc,
     325       851260 :             None => return None,
     326              :         };
     327              : 
     328       818094 :         let removed = self.layers.remove_entry(&read_desc.layer_id);
     329       818094 : 
     330       818094 :         match removed {
     331              :             Some((
     332              :                 _,
     333              :                 LayerKeyspace {
     334       818094 :                     layer,
     335       818094 :                     mut target_keyspace,
     336       818094 :                 },
     337       818094 :             )) => Some((
     338       818094 :                 layer,
     339       818094 :                 target_keyspace.consume_keyspace(),
     340       818094 :                 read_desc.lsn_range,
     341       818094 :             )),
     342            0 :             None => unreachable!("fringe internals are always consistent"),
     343              :         }
     344      1669354 :     }
     345              : 
     346       881910 :     pub(crate) fn update(
     347       881910 :         &mut self,
     348       881910 :         layer: ReadableLayer,
     349       881910 :         keyspace: KeySpace,
     350       881910 :         lsn_range: Range<Lsn>,
     351       881910 :     ) {
     352       881910 :         let layer_id = layer.id();
     353       881910 :         let entry = self.layers.entry(layer_id.clone());
     354       881910 :         match entry {
     355        63816 :             Entry::Occupied(mut entry) => {
     356        63816 :                 entry.get_mut().target_keyspace.add_keyspace(keyspace);
     357        63816 :             }
     358       818094 :             Entry::Vacant(entry) => {
     359       818094 :                 self.planned_reads_by_lsn.push(ReadDesc {
     360       818094 :                     lsn_range,
     361       818094 :                     layer_id: layer_id.clone(),
     362       818094 :                 });
     363       818094 :                 let mut accum = KeySpaceRandomAccum::new();
     364       818094 :                 accum.add_keyspace(keyspace);
     365       818094 :                 entry.insert(LayerKeyspace {
     366       818094 :                     layer,
     367       818094 :                     target_keyspace: accum,
     368       818094 :                 });
     369       818094 :             }
     370              :         }
     371       881910 :     }
     372              : }
     373              : 
     374              : impl Default for LayerFringe {
     375            0 :     fn default() -> Self {
     376            0 :         Self::new()
     377            0 :     }
     378              : }
     379              : 
     380              : impl Ord for ReadDesc {
     381           16 :     fn cmp(&self, other: &Self) -> Ordering {
     382           16 :         let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
     383           16 :         if ord == std::cmp::Ordering::Equal {
     384           16 :             self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
     385              :         } else {
     386            0 :             ord
     387              :         }
     388           16 :     }
     389              : }
     390              : 
     391              : impl PartialOrd for ReadDesc {
     392           16 :     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
     393           16 :         Some(self.cmp(other))
     394           16 :     }
     395              : }
     396              : 
     397              : impl PartialEq for ReadDesc {
     398            0 :     fn eq(&self, other: &Self) -> bool {
     399            0 :         self.lsn_range == other.lsn_range
     400            0 :     }
     401              : }
     402              : 
     403              : impl Eq for ReadDesc {}
     404              : 
     405              : impl ReadableLayer {
     406       881910 :     pub(crate) fn id(&self) -> LayerId {
     407       881910 :         match self {
     408       275713 :             Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
     409       606197 :             Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
     410              :         }
     411       881910 :     }
     412              : 
     413       818094 :     pub(crate) async fn get_values_reconstruct_data(
     414       818094 :         &self,
     415       818094 :         keyspace: KeySpace,
     416       818094 :         lsn_range: Range<Lsn>,
     417       818094 :         reconstruct_state: &mut ValuesReconstructState,
     418       818094 :         ctx: &RequestContext,
     419       818094 :     ) -> Result<(), GetVectoredError> {
     420       818094 :         match self {
     421       211897 :             ReadableLayer::PersistentLayer(layer) => {
     422       211897 :                 layer
     423       211897 :                     .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
     424        98410 :                     .await
     425              :             }
     426       606197 :             ReadableLayer::InMemoryLayer(layer) => {
     427       606197 :                 layer
     428       606197 :                     .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
     429         9405 :                     .await
     430              :             }
     431              :         }
     432       818094 :     }
     433              : }
     434              : 
     435              : /// Return value from [`Layer::get_value_reconstruct_data`]
     436              : #[derive(Clone, Copy, Debug)]
     437              : pub enum ValueReconstructResult {
     438              :     /// Got all the data needed to reconstruct the requested page
     439              :     Complete,
     440              :     /// This layer didn't contain all the required data, the caller should look up
     441              :     /// the predecessor layer at the returned LSN and collect more data from there.
     442              :     Continue,
     443              : 
     444              :     /// This layer didn't contain data needed to reconstruct the page version at
     445              :     /// the returned LSN. This is usually considered an error, but might be OK
     446              :     /// in some circumstances.
     447              :     Missing,
     448              : }
     449              : 
     450              : /// Layers contain a hint indicating whether they are likely to be used for reads.  This is a hint rather
     451              : /// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
     452              : /// of layers (for example when creating a branch that makes some previously covered layers visible).  It should
     453              : /// be used for cache management but not for correctness-critical checks.
     454              : #[derive(Debug, Clone, PartialEq, Eq)]
     455              : pub enum LayerVisibilityHint {
     456              :     /// A Visible layer might be read while serving a read, because there is not an image layer between it
     457              :     /// and a readable LSN (the tip of the branch or a child's branch point)
     458              :     Visible,
     459              :     /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
     460              :     /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
     461              :     Covered,
     462              : }
     463              : 
     464              : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
     465              : 
     466            0 : #[derive(Clone, Copy, strum_macros::EnumString)]
     467              : pub(crate) enum LayerAccessStatsReset {
     468              :     NoReset,
     469              :     AllStats,
     470              : }
     471              : 
     472              : impl Default for LayerAccessStats {
     473         1664 :     fn default() -> Self {
     474         1664 :         // Default value is to assume resident since creation time, and visible.
     475         1664 :         let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
     476         1664 :         value |= 0x1 << Self::VISIBILITY_SHIFT;
     477         1664 : 
     478         1664 :         Self(std::sync::atomic::AtomicU64::new(value))
     479         1664 :     }
     480              : }
     481              : 
     482              : // Efficient store of two very-low-resolution timestamps and some bits.  Used for storing last access time and
     483              : // last residence change time.
     484              : impl LayerAccessStats {
     485              :     // How many high bits to drop from a u32 timestamp?
     486              :     // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
     487              :     //   after that, this software has been very successful!)
     488              :     // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
     489              :     // stored in an i32, so they never used it.
     490              :     // - Dropping the next two bits is safe because this code is only running on systems in
     491              :     // years >= 2024, and these bits have been 1 since 2021
     492              :     //
     493              :     // Therefore we may store only 28 bits for a timestamp with one second resolution.  We do
     494              :     // this truncation to make space for some flags in the high bits of our u64.
     495              :     const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
     496              :     const TS_MASK: u32 = 0x1f_ff_ff_ff;
     497              :     const TS_ONES: u32 = 0x60_00_00_00;
     498              : 
     499              :     const ATIME_SHIFT: u32 = 0;
     500              :     const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
     501              :     const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
     502              : 
     503       214875 :     fn write_bits(&self, mask: u64, value: u64) -> u64 {
     504       214875 :         self.0
     505       214875 :             .fetch_update(
     506       214875 :                 // TODO: decide what orderings are correct
     507       214875 :                 std::sync::atomic::Ordering::Relaxed,
     508       214875 :                 std::sync::atomic::Ordering::Relaxed,
     509       214875 :                 |v| Some((v & !mask) | (value & mask)),
     510       214875 :             )
     511       214875 :             .expect("Inner function is infallible")
     512       214875 :     }
     513              : 
     514       214047 :     fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
     515       214047 :         // Drop the low three bits of the timestamp, for an ~8s accuracy
     516       214047 :         let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
     517       214047 : 
     518       214047 :         ((Self::TS_MASK as u64) << shift, timestamp << shift)
     519       214047 :     }
     520              : 
     521           46 :     fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
     522           46 :         let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
     523           46 : 
     524           46 :         let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
     525           46 :         if ts_bits == 0 {
     526           16 :             None
     527              :         } else {
     528           30 :             Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
     529              :         }
     530           46 :     }
     531              : 
     532              :     /// Record a change in layer residency.
     533              :     ///
     534              :     /// Recording the event must happen while holding the layer map lock to
     535              :     /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
     536              :     /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
     537              :     ///
     538              :     /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
     539              :     /// the following race could happen:
     540              :     ///
     541              :     /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
     542              :     /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
     543              :     /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
     544              :     /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
     545           26 :     pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
     546           26 :         let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
     547           26 :         self.write_bits(mask, value);
     548           26 :     }
     549              : 
     550           24 :     pub(crate) fn record_residence_event(&self) {
     551           24 :         self.record_residence_event_at(SystemTime::now())
     552           24 :     }
     553              : 
     554       212357 :     pub(crate) fn record_access_at(&self, now: SystemTime) {
     555       212357 :         let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
     556       212357 : 
     557       212357 :         // A layer which is accessed must be visible.
     558       212357 :         mask |= 0x1 << Self::VISIBILITY_SHIFT;
     559       212357 :         value |= 0x1 << Self::VISIBILITY_SHIFT;
     560       212357 : 
     561       212357 :         self.write_bits(mask, value);
     562       212357 :     }
     563              : 
     564       212631 :     pub(crate) fn record_access(&self, ctx: &RequestContext) {
     565       212631 :         if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
     566          276 :             return;
     567       212355 :         }
     568       212355 : 
     569       212355 :         self.record_access_at(SystemTime::now())
     570       212631 :     }
     571              : 
     572            0 :     fn as_api_model(
     573            0 :         &self,
     574            0 :         reset: LayerAccessStatsReset,
     575            0 :     ) -> pageserver_api::models::LayerAccessStats {
     576            0 :         let ret = pageserver_api::models::LayerAccessStats {
     577            0 :             access_time: self
     578            0 :                 .read_low_res_timestamp(Self::ATIME_SHIFT)
     579            0 :                 .unwrap_or(UNIX_EPOCH),
     580            0 :             residence_time: self
     581            0 :                 .read_low_res_timestamp(Self::RTIME_SHIFT)
     582            0 :                 .unwrap_or(UNIX_EPOCH),
     583            0 :             visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
     584              :         };
     585            0 :         match reset {
     586            0 :             LayerAccessStatsReset::NoReset => {}
     587            0 :             LayerAccessStatsReset::AllStats => {
     588            0 :                 self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
     589            0 :                 self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
     590            0 :             }
     591              :         }
     592            0 :         ret
     593            0 :     }
     594              : 
     595              :     /// Get the latest access timestamp, falling back to latest residence event.  The latest residence event
     596              :     /// will be this Layer's construction time, if its residence hasn't changed since then.
     597            8 :     pub(crate) fn latest_activity(&self) -> SystemTime {
     598            8 :         if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
     599            6 :             t
     600              :         } else {
     601            2 :             self.read_low_res_timestamp(Self::RTIME_SHIFT)
     602            2 :                 .expect("Residence time is set on construction")
     603              :         }
     604            8 :     }
     605              : 
     606              :     /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
     607              :     ///
     608              :     /// This indicates whether the layer has been used for some purpose that would motivate
     609              :     /// us to keep it on disk, such as for serving a getpage request.
     610           18 :     fn accessed(&self) -> bool {
     611           18 :         // Consider it accessed if the most recent access is more recent than
     612           18 :         // the most recent change in residence status.
     613           18 :         match (
     614           18 :             self.read_low_res_timestamp(Self::ATIME_SHIFT),
     615           18 :             self.read_low_res_timestamp(Self::RTIME_SHIFT),
     616              :         ) {
     617           14 :             (None, _) => false,
     618            0 :             (Some(_), None) => true,
     619            4 :             (Some(a), Some(r)) => a >= r,
     620              :         }
     621           18 :     }
     622              : 
     623              :     /// Helper for extracting the visibility hint from the literal value of our inner u64
     624         3002 :     fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
     625         3002 :         match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
     626         2970 :             1 => LayerVisibilityHint::Visible,
     627           32 :             0 => LayerVisibilityHint::Covered,
     628            0 :             _ => unreachable!(),
     629              :         }
     630         3002 :     }
     631              : 
     632              :     /// Returns the old value which has been replaced
     633         2492 :     pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
     634         2492 :         let value = match visibility {
     635         2434 :             LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
     636           58 :             LayerVisibilityHint::Covered => 0x0,
     637              :         };
     638              : 
     639         2492 :         let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
     640         2492 :         self.decode_visibility(old_bits)
     641         2492 :     }
     642              : 
     643          510 :     pub(crate) fn visibility(&self) -> LayerVisibilityHint {
     644          510 :         let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
     645          510 :         self.decode_visibility(read)
     646          510 :     }
     647              : }
     648              : 
     649              : /// Get a layer descriptor from a layer.
     650              : pub(crate) trait AsLayerDesc {
     651              :     /// Get the layer descriptor.
     652              :     fn layer_desc(&self) -> &PersistentLayerDesc;
     653              : }
     654              : 
     655              : pub mod tests {
     656              :     use pageserver_api::shard::TenantShardId;
     657              :     use utils::id::TimelineId;
     658              : 
     659              :     use super::*;
     660              : 
     661              :     impl From<DeltaLayerName> for PersistentLayerDesc {
     662            0 :         fn from(value: DeltaLayerName) -> Self {
     663            0 :             PersistentLayerDesc::new_delta(
     664            0 :                 TenantShardId::from([0; 18]),
     665            0 :                 TimelineId::from_array([0; 16]),
     666            0 :                 value.key_range,
     667            0 :                 value.lsn_range,
     668            0 :                 233,
     669            0 :             )
     670            0 :         }
     671              :     }
     672              : 
     673              :     impl From<ImageLayerName> for PersistentLayerDesc {
     674            0 :         fn from(value: ImageLayerName) -> Self {
     675            0 :             PersistentLayerDesc::new_img(
     676            0 :                 TenantShardId::from([0; 18]),
     677            0 :                 TimelineId::from_array([0; 16]),
     678            0 :                 value.key_range,
     679            0 :                 value.lsn,
     680            0 :                 233,
     681            0 :             )
     682            0 :         }
     683              :     }
     684              : 
     685              :     impl From<LayerName> for PersistentLayerDesc {
     686            0 :         fn from(value: LayerName) -> Self {
     687            0 :             match value {
     688            0 :                 LayerName::Delta(d) => Self::from(d),
     689            0 :                 LayerName::Image(i) => Self::from(i),
     690              :             }
     691            0 :         }
     692              :     }
     693              : }
     694              : 
     695              : /// Range wrapping newtype, which uses display to render Debug.
     696              : ///
     697              : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
     698              : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
     699              : 
     700              : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
     701            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     702            0 :         write!(f, "{}..{}", self.0.start, self.0.end)
     703            0 :     }
     704              : }
        

Generated by: LCOV version 2.1-beta