Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod delta_layer;
4 : pub mod image_layer;
5 : pub(crate) mod inmemory_layer;
6 : pub(crate) mod layer;
7 : mod layer_desc;
8 : mod layer_name;
9 : pub mod merge_iterator;
10 :
11 : #[cfg(test)]
12 : pub mod split_writer;
13 :
14 : use crate::context::{AccessStatsBehavior, RequestContext};
15 : use crate::repository::Value;
16 : use crate::walrecord::NeonWalRecord;
17 : use bytes::Bytes;
18 : use pageserver_api::key::Key;
19 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
20 : use std::cmp::{Ordering, Reverse};
21 : use std::collections::hash_map::Entry;
22 : use std::collections::{BinaryHeap, HashMap};
23 : use std::ops::Range;
24 : use std::sync::Arc;
25 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
26 :
27 : use utils::lsn::Lsn;
28 :
29 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
30 : pub use image_layer::{ImageLayer, ImageLayerWriter};
31 : pub use inmemory_layer::InMemoryLayer;
32 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
33 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
34 :
35 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
36 :
37 : use self::inmemory_layer::InMemoryLayerFileId;
38 :
39 : use super::timeline::GetVectoredError;
40 : use super::PageReconstructError;
41 :
42 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
43 0 : where
44 0 : T: PartialOrd<T>,
45 0 : {
46 0 : if a.start < b.start {
47 0 : a.end > b.start
48 : } else {
49 0 : b.end > a.start
50 : }
51 0 : }
52 :
53 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
54 : ///
55 : /// Before first call, you can fill in 'page_img' if you have an older cached
56 : /// version of the page available. That can save work in
57 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
58 : /// when all the WAL records going back to the cached image have been collected.
59 : ///
60 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
61 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
62 : /// record that initializes the page without requiring a previous image.
63 : ///
64 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
65 : /// been collected, but there are more records outside the current layer. Pass
66 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
67 : /// call, to collect more records.
68 : ///
69 : #[derive(Debug, Default)]
70 : pub(crate) struct ValueReconstructState {
71 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
72 : pub(crate) img: Option<(Lsn, Bytes)>,
73 : }
74 :
75 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
76 : pub(crate) enum ValueReconstructSituation {
77 : Complete,
78 : #[default]
79 : Continue,
80 : }
81 :
82 : /// Reconstruct data accumulated for a single key during a vectored get
83 : #[derive(Debug, Default, Clone)]
84 : pub(crate) struct VectoredValueReconstructState {
85 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
86 : pub(crate) img: Option<(Lsn, Bytes)>,
87 :
88 : situation: ValueReconstructSituation,
89 : }
90 :
91 : impl VectoredValueReconstructState {
92 40226 : fn get_cached_lsn(&self) -> Option<Lsn> {
93 40226 : self.img.as_ref().map(|img| img.0)
94 40226 : }
95 : }
96 :
97 : impl From<VectoredValueReconstructState> for ValueReconstructState {
98 666707 : fn from(mut state: VectoredValueReconstructState) -> Self {
99 666707 : // walredo expects the records to be descending in terms of Lsn
100 666707 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
101 666707 :
102 666707 : ValueReconstructState {
103 666707 : records: state.records,
104 666707 : img: state.img,
105 666707 : }
106 666707 : }
107 : }
108 :
109 : /// Bag of data accumulated during a vectored get..
110 : pub(crate) struct ValuesReconstructState {
111 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
112 : /// should not expect to get anything from this hashmap.
113 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
114 : /// The keys which are already retrieved
115 : keys_done: KeySpaceRandomAccum,
116 :
117 : /// The keys covered by the image layers
118 : keys_with_image_coverage: Option<Range<Key>>,
119 :
120 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
121 : layers_visited: u32,
122 : delta_layers_visited: u32,
123 : }
124 :
125 : impl ValuesReconstructState {
126 626397 : pub(crate) fn new() -> Self {
127 626397 : Self {
128 626397 : keys: HashMap::new(),
129 626397 : keys_done: KeySpaceRandomAccum::new(),
130 626397 : keys_with_image_coverage: None,
131 626397 : layers_visited: 0,
132 626397 : delta_layers_visited: 0,
133 626397 : }
134 626397 : }
135 :
136 : /// Associate a key with the error which it encountered and mark it as done
137 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
138 0 : let previous = self.keys.insert(key, Err(err));
139 0 : if let Some(Ok(state)) = previous {
140 0 : if state.situation == ValueReconstructSituation::Continue {
141 0 : self.keys_done.add_key(key);
142 0 : }
143 0 : }
144 0 : }
145 :
146 818321 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
147 818321 : self.layers_visited += 1;
148 818321 : if let ReadableLayer::PersistentLayer(layer) = layer {
149 212178 : if layer.layer_desc().is_delta() {
150 204452 : self.delta_layers_visited += 1;
151 204452 : }
152 606143 : }
153 818321 : }
154 :
155 24 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
156 24 : self.delta_layers_visited
157 24 : }
158 :
159 626167 : pub(crate) fn get_layers_visited(&self) -> u32 {
160 626167 : self.layers_visited
161 626167 : }
162 :
163 : /// This function is called after reading a keyspace from a layer.
164 : /// It checks if the read path has now moved past the cached Lsn for any keys.
165 : ///
166 : /// Implementation note: We intentionally iterate over the keys for which we've
167 : /// already collected some reconstruct data. This avoids scaling complexity with
168 : /// the size of the search space.
169 810595 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
170 810595 : for (key, value) in self.keys.iter_mut() {
171 691065 : if !keyspace.contains(key) {
172 42126 : continue;
173 648939 : }
174 :
175 648939 : if let Ok(state) = value {
176 648939 : if state.situation != ValueReconstructSituation::Complete
177 266 : && state.get_cached_lsn() >= Some(advanced_to)
178 0 : {
179 0 : state.situation = ValueReconstructSituation::Complete;
180 0 : self.keys_done.add_key(*key);
181 648939 : }
182 0 : }
183 : }
184 810595 : }
185 :
186 : /// On hitting image layer, we can mark all keys in this range as done, because
187 : /// if the image layer does not contain a key, it is deleted/never added.
188 7738 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
189 7738 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
190 7738 : assert_eq!(
191 : prev_val, None,
192 0 : "should consume the keyspace before the next iteration"
193 : );
194 7738 : }
195 :
196 : /// Update the state collected for a given key.
197 : /// Returns true if this was the last value needed for the key and false otherwise.
198 : ///
199 : /// If the key is done after the update, mark it as such.
200 667127 : pub(crate) fn update_key(
201 667127 : &mut self,
202 667127 : key: &Key,
203 667127 : lsn: Lsn,
204 667127 : value: Value,
205 667127 : ) -> ValueReconstructSituation {
206 667127 : let state = self
207 667127 : .keys
208 667127 : .entry(*key)
209 667127 : .or_insert(Ok(VectoredValueReconstructState::default()));
210 :
211 667127 : if let Ok(state) = state {
212 667127 : let key_done = match state.situation {
213 0 : ValueReconstructSituation::Complete => unreachable!(),
214 667127 : ValueReconstructSituation::Continue => match value {
215 666783 : Value::Image(img) => {
216 666783 : state.img = Some((lsn, img));
217 666783 : true
218 : }
219 344 : Value::WalRecord(rec) => {
220 344 : debug_assert!(
221 344 : Some(lsn) > state.get_cached_lsn(),
222 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
223 0 : lsn,
224 0 : state
225 0 : .get_cached_lsn()
226 0 : .expect("Assertion can only fire if a cached lsn is present")
227 : );
228 :
229 344 : let will_init = rec.will_init();
230 344 : state.records.push((lsn, rec));
231 344 : will_init
232 : }
233 : },
234 : };
235 :
236 667127 : if key_done && state.situation == ValueReconstructSituation::Continue {
237 666783 : state.situation = ValueReconstructSituation::Complete;
238 666783 : self.keys_done.add_key(*key);
239 666783 : }
240 :
241 667127 : state.situation
242 : } else {
243 0 : ValueReconstructSituation::Complete
244 : }
245 667127 : }
246 :
247 : /// Returns the Lsn at which this key is cached if one exists.
248 : /// The read path should go no further than this Lsn for the given key.
249 1083210 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
250 1083210 : self.keys
251 1083210 : .get(key)
252 1083210 : .and_then(|k| k.as_ref().ok())
253 1083210 : .and_then(|state| state.get_cached_lsn())
254 1083210 : }
255 :
256 : /// Returns the key space describing the keys that have
257 : /// been marked as completed since the last call to this function.
258 : /// Returns individual keys done, and the image layer coverage.
259 1669453 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
260 1669453 : (
261 1669453 : self.keys_done.consume_keyspace(),
262 1669453 : self.keys_with_image_coverage.take(),
263 1669453 : )
264 1669453 : }
265 : }
266 :
267 : impl Default for ValuesReconstructState {
268 70 : fn default() -> Self {
269 70 : Self::new()
270 70 : }
271 : }
272 :
273 : /// A key that uniquely identifies a layer in a timeline
274 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
275 : pub(crate) enum LayerId {
276 : PersitentLayerId(PersistentLayerKey),
277 : InMemoryLayerId(InMemoryLayerFileId),
278 : }
279 :
280 : /// Layer wrapper for the read path. Note that it is valid
281 : /// to use these layers even after external operations have
282 : /// been performed on them (compaction, freeze, etc.).
283 : #[derive(Debug)]
284 : pub(crate) enum ReadableLayer {
285 : PersistentLayer(Layer),
286 : InMemoryLayer(Arc<InMemoryLayer>),
287 : }
288 :
289 : /// A partial description of a read to be done.
290 : #[derive(Debug, Clone)]
291 : struct ReadDesc {
292 : /// An id used to resolve the readable layer within the fringe
293 : layer_id: LayerId,
294 : /// Lsn range for the read, used for selecting the next read
295 : lsn_range: Range<Lsn>,
296 : }
297 :
298 : /// Data structure which maintains a fringe of layers for the
299 : /// read path. The fringe is the set of layers which intersects
300 : /// the current keyspace that the search is descending on.
301 : /// Each layer tracks the keyspace that intersects it.
302 : ///
303 : /// The fringe must appear sorted by Lsn. Hence, it uses
304 : /// a two layer indexing scheme.
305 : #[derive(Debug)]
306 : pub(crate) struct LayerFringe {
307 : planned_reads_by_lsn: BinaryHeap<ReadDesc>,
308 : layers: HashMap<LayerId, LayerKeyspace>,
309 : }
310 :
311 : #[derive(Debug)]
312 : struct LayerKeyspace {
313 : layer: ReadableLayer,
314 : target_keyspace: KeySpaceRandomAccum,
315 : }
316 :
317 : impl LayerFringe {
318 851132 : pub(crate) fn new() -> Self {
319 851132 : LayerFringe {
320 851132 : planned_reads_by_lsn: BinaryHeap::new(),
321 851132 : layers: HashMap::new(),
322 851132 : }
323 851132 : }
324 :
325 1669453 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
326 1669453 : let read_desc = match self.planned_reads_by_lsn.pop() {
327 818321 : Some(desc) => desc,
328 851132 : None => return None,
329 : };
330 :
331 818321 : let removed = self.layers.remove_entry(&read_desc.layer_id);
332 818321 :
333 818321 : match removed {
334 : Some((
335 : _,
336 : LayerKeyspace {
337 818321 : layer,
338 818321 : mut target_keyspace,
339 818321 : },
340 818321 : )) => Some((
341 818321 : layer,
342 818321 : target_keyspace.consume_keyspace(),
343 818321 : read_desc.lsn_range,
344 818321 : )),
345 0 : None => unreachable!("fringe internals are always consistent"),
346 : }
347 1669453 : }
348 :
349 881951 : pub(crate) fn update(
350 881951 : &mut self,
351 881951 : layer: ReadableLayer,
352 881951 : keyspace: KeySpace,
353 881951 : lsn_range: Range<Lsn>,
354 881951 : ) {
355 881951 : let layer_id = layer.id();
356 881951 : let entry = self.layers.entry(layer_id.clone());
357 881951 : match entry {
358 63630 : Entry::Occupied(mut entry) => {
359 63630 : entry.get_mut().target_keyspace.add_keyspace(keyspace);
360 63630 : }
361 818321 : Entry::Vacant(entry) => {
362 818321 : self.planned_reads_by_lsn.push(ReadDesc {
363 818321 : lsn_range,
364 818321 : layer_id: layer_id.clone(),
365 818321 : });
366 818321 : let mut accum = KeySpaceRandomAccum::new();
367 818321 : accum.add_keyspace(keyspace);
368 818321 : entry.insert(LayerKeyspace {
369 818321 : layer,
370 818321 : target_keyspace: accum,
371 818321 : });
372 818321 : }
373 : }
374 881951 : }
375 : }
376 :
377 : impl Default for LayerFringe {
378 0 : fn default() -> Self {
379 0 : Self::new()
380 0 : }
381 : }
382 :
383 : impl Ord for ReadDesc {
384 16 : fn cmp(&self, other: &Self) -> Ordering {
385 16 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
386 16 : if ord == std::cmp::Ordering::Equal {
387 16 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
388 : } else {
389 0 : ord
390 : }
391 16 : }
392 : }
393 :
394 : impl PartialOrd for ReadDesc {
395 16 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
396 16 : Some(self.cmp(other))
397 16 : }
398 : }
399 :
400 : impl PartialEq for ReadDesc {
401 0 : fn eq(&self, other: &Self) -> bool {
402 0 : self.lsn_range == other.lsn_range
403 0 : }
404 : }
405 :
406 : impl Eq for ReadDesc {}
407 :
408 : impl ReadableLayer {
409 881951 : pub(crate) fn id(&self) -> LayerId {
410 881951 : match self {
411 275808 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
412 606143 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
413 : }
414 881951 : }
415 :
416 818321 : pub(crate) async fn get_values_reconstruct_data(
417 818321 : &self,
418 818321 : keyspace: KeySpace,
419 818321 : lsn_range: Range<Lsn>,
420 818321 : reconstruct_state: &mut ValuesReconstructState,
421 818321 : ctx: &RequestContext,
422 818321 : ) -> Result<(), GetVectoredError> {
423 818321 : match self {
424 212178 : ReadableLayer::PersistentLayer(layer) => {
425 212178 : layer
426 212178 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
427 98375 : .await
428 : }
429 606143 : ReadableLayer::InMemoryLayer(layer) => {
430 606143 : layer
431 606143 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
432 9460 : .await
433 : }
434 : }
435 818321 : }
436 : }
437 :
438 : /// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather
439 : /// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
440 : /// of layers (for example when creating a branch that makes some previously covered layers visible). It should
441 : /// be used for cache management but not for correctness-critical checks.
442 : #[derive(Debug, Clone, PartialEq, Eq)]
443 : pub enum LayerVisibilityHint {
444 : /// A Visible layer might be read while serving a read, because there is not an image layer between it
445 : /// and a readable LSN (the tip of the branch or a child's branch point)
446 : Visible,
447 : /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
448 : /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
449 : Covered,
450 : }
451 :
452 : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
453 :
454 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
455 : pub(crate) enum LayerAccessStatsReset {
456 : NoReset,
457 : AllStats,
458 : }
459 :
460 : impl Default for LayerAccessStats {
461 1708 : fn default() -> Self {
462 1708 : // Default value is to assume resident since creation time, and visible.
463 1708 : let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
464 1708 : value |= 0x1 << Self::VISIBILITY_SHIFT;
465 1708 :
466 1708 : Self(std::sync::atomic::AtomicU64::new(value))
467 1708 : }
468 : }
469 :
470 : // Efficient store of two very-low-resolution timestamps and some bits. Used for storing last access time and
471 : // last residence change time.
472 : impl LayerAccessStats {
473 : // How many high bits to drop from a u32 timestamp?
474 : // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
475 : // after that, this software has been very successful!)
476 : // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
477 : // stored in an i32, so they never used it.
478 : // - Dropping the next two bits is safe because this code is only running on systems in
479 : // years >= 2024, and these bits have been 1 since 2021
480 : //
481 : // Therefore we may store only 28 bits for a timestamp with one second resolution. We do
482 : // this truncation to make space for some flags in the high bits of our u64.
483 : const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
484 : const TS_MASK: u32 = 0x1f_ff_ff_ff;
485 : const TS_ONES: u32 = 0x60_00_00_00;
486 :
487 : const ATIME_SHIFT: u32 = 0;
488 : const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
489 : const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
490 :
491 214854 : fn write_bits(&self, mask: u64, value: u64) -> u64 {
492 214854 : self.0
493 214854 : .fetch_update(
494 214854 : // TODO: decide what orderings are correct
495 214854 : std::sync::atomic::Ordering::Relaxed,
496 214854 : std::sync::atomic::Ordering::Relaxed,
497 214854 : |v| Some((v & !mask) | (value & mask)),
498 214854 : )
499 214854 : .expect("Inner function is infallible")
500 214854 : }
501 :
502 214052 : fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
503 214052 : // Drop the low three bits of the timestamp, for an ~8s accuracy
504 214052 : let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
505 214052 :
506 214052 : ((Self::TS_MASK as u64) << shift, timestamp << shift)
507 214052 : }
508 :
509 62 : fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
510 62 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
511 62 :
512 62 : let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
513 62 : if ts_bits == 0 {
514 24 : None
515 : } else {
516 38 : Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
517 : }
518 62 : }
519 :
520 : /// Record a change in layer residency.
521 : ///
522 : /// Recording the event must happen while holding the layer map lock to
523 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
524 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
525 : ///
526 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
527 : /// the following race could happen:
528 : ///
529 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
530 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
531 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
532 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
533 26 : pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
534 26 : let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
535 26 : self.write_bits(mask, value);
536 26 : }
537 :
538 24 : pub(crate) fn record_residence_event(&self) {
539 24 : self.record_residence_event_at(SystemTime::now())
540 24 : }
541 :
542 212318 : fn record_access_at(&self, now: SystemTime) -> bool {
543 212318 : let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
544 212318 :
545 212318 : // A layer which is accessed must be visible.
546 212318 : mask |= 0x1 << Self::VISIBILITY_SHIFT;
547 212318 : value |= 0x1 << Self::VISIBILITY_SHIFT;
548 212318 :
549 212318 : let old_bits = self.write_bits(mask, value);
550 0 : !matches!(
551 212318 : self.decode_visibility(old_bits),
552 : LayerVisibilityHint::Visible
553 : )
554 212318 : }
555 :
556 : /// Returns true if we modified the layer's visibility to set it to Visible implicitly
557 : /// as a result of this access
558 212592 : pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {
559 212592 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
560 276 : return false;
561 212316 : }
562 212316 :
563 212316 : self.record_access_at(SystemTime::now())
564 212592 : }
565 :
566 0 : fn as_api_model(
567 0 : &self,
568 0 : reset: LayerAccessStatsReset,
569 0 : ) -> pageserver_api::models::LayerAccessStats {
570 0 : let ret = pageserver_api::models::LayerAccessStats {
571 0 : access_time: self
572 0 : .read_low_res_timestamp(Self::ATIME_SHIFT)
573 0 : .unwrap_or(UNIX_EPOCH),
574 0 : residence_time: self
575 0 : .read_low_res_timestamp(Self::RTIME_SHIFT)
576 0 : .unwrap_or(UNIX_EPOCH),
577 0 : visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
578 : };
579 0 : match reset {
580 0 : LayerAccessStatsReset::NoReset => {}
581 0 : LayerAccessStatsReset::AllStats => {
582 0 : self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
583 0 : self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
584 0 : }
585 : }
586 0 : ret
587 0 : }
588 :
589 : /// Get the latest access timestamp, falling back to latest residence event. The latest residence event
590 : /// will be this Layer's construction time, if its residence hasn't changed since then.
591 16 : pub(crate) fn latest_activity(&self) -> SystemTime {
592 16 : if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
593 6 : t
594 : } else {
595 10 : self.read_low_res_timestamp(Self::RTIME_SHIFT)
596 10 : .expect("Residence time is set on construction")
597 : }
598 16 : }
599 :
600 : /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
601 : ///
602 : /// This indicates whether the layer has been used for some purpose that would motivate
603 : /// us to keep it on disk, such as for serving a getpage request.
604 18 : fn accessed(&self) -> bool {
605 18 : // Consider it accessed if the most recent access is more recent than
606 18 : // the most recent change in residence status.
607 18 : match (
608 18 : self.read_low_res_timestamp(Self::ATIME_SHIFT),
609 18 : self.read_low_res_timestamp(Self::RTIME_SHIFT),
610 : ) {
611 14 : (None, _) => false,
612 0 : (Some(_), None) => true,
613 4 : (Some(a), Some(r)) => a >= r,
614 : }
615 18 : }
616 :
617 : /// Helper for extracting the visibility hint from the literal value of our inner u64
618 215378 : fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
619 215378 : match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
620 215344 : 1 => LayerVisibilityHint::Visible,
621 34 : 0 => LayerVisibilityHint::Covered,
622 0 : _ => unreachable!(),
623 : }
624 215378 : }
625 :
626 : /// Returns the old value which has been replaced
627 2510 : pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
628 2510 : let value = match visibility {
629 2450 : LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
630 60 : LayerVisibilityHint::Covered => 0x0,
631 : };
632 :
633 2510 : let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
634 2510 : self.decode_visibility(old_bits)
635 2510 : }
636 :
637 550 : pub(crate) fn visibility(&self) -> LayerVisibilityHint {
638 550 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
639 550 : self.decode_visibility(read)
640 550 : }
641 : }
642 :
643 : /// Get a layer descriptor from a layer.
644 : pub(crate) trait AsLayerDesc {
645 : /// Get the layer descriptor.
646 : fn layer_desc(&self) -> &PersistentLayerDesc;
647 : }
648 :
649 : pub mod tests {
650 : use pageserver_api::shard::TenantShardId;
651 : use utils::id::TimelineId;
652 :
653 : use super::*;
654 :
655 : impl From<DeltaLayerName> for PersistentLayerDesc {
656 0 : fn from(value: DeltaLayerName) -> Self {
657 0 : PersistentLayerDesc::new_delta(
658 0 : TenantShardId::from([0; 18]),
659 0 : TimelineId::from_array([0; 16]),
660 0 : value.key_range,
661 0 : value.lsn_range,
662 0 : 233,
663 0 : )
664 0 : }
665 : }
666 :
667 : impl From<ImageLayerName> for PersistentLayerDesc {
668 0 : fn from(value: ImageLayerName) -> Self {
669 0 : PersistentLayerDesc::new_img(
670 0 : TenantShardId::from([0; 18]),
671 0 : TimelineId::from_array([0; 16]),
672 0 : value.key_range,
673 0 : value.lsn,
674 0 : 233,
675 0 : )
676 0 : }
677 : }
678 :
679 : impl From<LayerName> for PersistentLayerDesc {
680 0 : fn from(value: LayerName) -> Self {
681 0 : match value {
682 0 : LayerName::Delta(d) => Self::from(d),
683 0 : LayerName::Image(i) => Self::from(i),
684 : }
685 0 : }
686 : }
687 : }
688 :
689 : /// Range wrapping newtype, which uses display to render Debug.
690 : ///
691 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
692 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
693 :
694 : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
695 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
696 0 : write!(f, "{}..{}", self.0.start, self.0.end)
697 0 : }
698 : }
|