Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod delta_layer;
4 : pub mod image_layer;
5 : pub(crate) mod inmemory_layer;
6 : pub(crate) mod layer;
7 : mod layer_desc;
8 : mod layer_name;
9 : pub mod merge_iterator;
10 :
11 : use crate::context::{AccessStatsBehavior, RequestContext};
12 : use crate::repository::Value;
13 : use crate::walrecord::NeonWalRecord;
14 : use bytes::Bytes;
15 : use pageserver_api::key::Key;
16 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
17 : use std::cmp::{Ordering, Reverse};
18 : use std::collections::hash_map::Entry;
19 : use std::collections::{BinaryHeap, HashMap};
20 : use std::ops::Range;
21 : use std::sync::Arc;
22 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
23 :
24 : use utils::lsn::Lsn;
25 :
26 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
27 : pub use image_layer::{ImageLayer, ImageLayerWriter};
28 : pub use inmemory_layer::InMemoryLayer;
29 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
30 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
31 :
32 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
33 :
34 : use self::inmemory_layer::InMemoryLayerFileId;
35 :
36 : use super::timeline::GetVectoredError;
37 : use super::PageReconstructError;
38 :
39 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
40 0 : where
41 0 : T: PartialOrd<T>,
42 0 : {
43 0 : if a.start < b.start {
44 0 : a.end > b.start
45 : } else {
46 0 : b.end > a.start
47 : }
48 0 : }
49 :
50 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
51 : ///
52 : /// Before first call, you can fill in 'page_img' if you have an older cached
53 : /// version of the page available. That can save work in
54 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
55 : /// when all the WAL records going back to the cached image have been collected.
56 : ///
57 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
58 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
59 : /// record that initializes the page without requiring a previous image.
60 : ///
61 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
62 : /// been collected, but there are more records outside the current layer. Pass
63 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
64 : /// call, to collect more records.
65 : ///
66 : #[derive(Debug, Default)]
67 : pub(crate) struct ValueReconstructState {
68 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
69 : pub(crate) img: Option<(Lsn, Bytes)>,
70 : }
71 :
72 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
73 : pub(crate) enum ValueReconstructSituation {
74 : Complete,
75 : #[default]
76 : Continue,
77 : }
78 :
79 : /// Reconstruct data accumulated for a single key during a vectored get
80 : #[derive(Debug, Default, Clone)]
81 : pub(crate) struct VectoredValueReconstructState {
82 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
83 : pub(crate) img: Option<(Lsn, Bytes)>,
84 :
85 : situation: ValueReconstructSituation,
86 : }
87 :
88 : impl VectoredValueReconstructState {
89 40229 : fn get_cached_lsn(&self) -> Option<Lsn> {
90 40229 : self.img.as_ref().map(|img| img.0)
91 40229 : }
92 : }
93 :
94 : impl From<VectoredValueReconstructState> for ValueReconstructState {
95 666556 : fn from(mut state: VectoredValueReconstructState) -> Self {
96 666556 : // walredo expects the records to be descending in terms of Lsn
97 666556 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
98 666556 :
99 666556 : ValueReconstructState {
100 666556 : records: state.records,
101 666556 : img: state.img,
102 666556 : }
103 666556 : }
104 : }
105 :
106 : /// Bag of data accumulated during a vectored get..
107 : pub(crate) struct ValuesReconstructState {
108 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
109 : /// should not expect to get anything from this hashmap.
110 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
111 : /// The keys which are already retrieved
112 : keys_done: KeySpaceRandomAccum,
113 :
114 : /// The keys covered by the image layers
115 : keys_with_image_coverage: Option<Range<Key>>,
116 :
117 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
118 : layers_visited: u32,
119 : delta_layers_visited: u32,
120 : }
121 :
122 : impl ValuesReconstructState {
123 626258 : pub(crate) fn new() -> Self {
124 626258 : Self {
125 626258 : keys: HashMap::new(),
126 626258 : keys_done: KeySpaceRandomAccum::new(),
127 626258 : keys_with_image_coverage: None,
128 626258 : layers_visited: 0,
129 626258 : delta_layers_visited: 0,
130 626258 : }
131 626258 : }
132 :
133 : /// Associate a key with the error which it encountered and mark it as done
134 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
135 0 : let previous = self.keys.insert(key, Err(err));
136 0 : if let Some(Ok(state)) = previous {
137 0 : if state.situation == ValueReconstructSituation::Continue {
138 0 : self.keys_done.add_key(key);
139 0 : }
140 0 : }
141 0 : }
142 :
143 818094 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
144 818094 : self.layers_visited += 1;
145 818094 : if let ReadableLayer::PersistentLayer(layer) = layer {
146 211897 : if layer.layer_desc().is_delta() {
147 204299 : self.delta_layers_visited += 1;
148 204299 : }
149 606197 : }
150 818094 : }
151 :
152 24 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
153 24 : self.delta_layers_visited
154 24 : }
155 :
156 626032 : pub(crate) fn get_layers_visited(&self) -> u32 {
157 626032 : self.layers_visited
158 626032 : }
159 :
160 : /// This function is called after reading a keyspace from a layer.
161 : /// It checks if the read path has now moved past the cached Lsn for any keys.
162 : ///
163 : /// Implementation note: We intentionally iterate over the keys for which we've
164 : /// already collected some reconstruct data. This avoids scaling complexity with
165 : /// the size of the search space.
166 810496 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
167 810496 : for (key, value) in self.keys.iter_mut() {
168 691183 : if !keyspace.contains(key) {
169 42240 : continue;
170 648943 : }
171 :
172 648943 : if let Ok(state) = value {
173 648943 : if state.situation != ValueReconstructSituation::Complete
174 236 : && state.get_cached_lsn() >= Some(advanced_to)
175 0 : {
176 0 : state.situation = ValueReconstructSituation::Complete;
177 0 : self.keys_done.add_key(*key);
178 648943 : }
179 0 : }
180 : }
181 810496 : }
182 :
183 : /// On hitting image layer, we can mark all keys in this range as done, because
184 : /// if the image layer does not contain a key, it is deleted/never added.
185 7606 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
186 7606 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
187 7606 : assert_eq!(
188 : prev_val, None,
189 0 : "should consume the keyspace before the next iteration"
190 : );
191 7606 : }
192 :
193 : /// Update the state collected for a given key.
194 : /// Returns true if this was the last value needed for the key and false otherwise.
195 : ///
196 : /// If the key is done after the update, mark it as such.
197 666936 : pub(crate) fn update_key(
198 666936 : &mut self,
199 666936 : key: &Key,
200 666936 : lsn: Lsn,
201 666936 : value: Value,
202 666936 : ) -> ValueReconstructSituation {
203 666936 : let state = self
204 666936 : .keys
205 666936 : .entry(*key)
206 666936 : .or_insert(Ok(VectoredValueReconstructState::default()));
207 :
208 666936 : if let Ok(state) = state {
209 666936 : let key_done = match state.situation {
210 0 : ValueReconstructSituation::Complete => unreachable!(),
211 666936 : ValueReconstructSituation::Continue => match value {
212 666628 : Value::Image(img) => {
213 666628 : state.img = Some((lsn, img));
214 666628 : true
215 : }
216 308 : Value::WalRecord(rec) => {
217 308 : debug_assert!(
218 308 : Some(lsn) > state.get_cached_lsn(),
219 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
220 0 : lsn,
221 0 : state
222 0 : .get_cached_lsn()
223 0 : .expect("Assertion can only fire if a cached lsn is present")
224 : );
225 :
226 308 : let will_init = rec.will_init();
227 308 : state.records.push((lsn, rec));
228 308 : will_init
229 : }
230 : },
231 : };
232 :
233 666936 : if key_done && state.situation == ValueReconstructSituation::Continue {
234 666628 : state.situation = ValueReconstructSituation::Complete;
235 666628 : self.keys_done.add_key(*key);
236 666628 : }
237 :
238 666936 : state.situation
239 : } else {
240 0 : ValueReconstructSituation::Complete
241 : }
242 666936 : }
243 :
244 : /// Returns the Lsn at which this key is cached if one exists.
245 : /// The read path should go no further than this Lsn for the given key.
246 1082185 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
247 1082185 : self.keys
248 1082185 : .get(key)
249 1082185 : .and_then(|k| k.as_ref().ok())
250 1082185 : .and_then(|state| state.get_cached_lsn())
251 1082185 : }
252 :
253 : /// Returns the key space describing the keys that have
254 : /// been marked as completed since the last call to this function.
255 : /// Returns individual keys done, and the image layer coverage.
256 1669354 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
257 1669354 : (
258 1669354 : self.keys_done.consume_keyspace(),
259 1669354 : self.keys_with_image_coverage.take(),
260 1669354 : )
261 1669354 : }
262 : }
263 :
264 : impl Default for ValuesReconstructState {
265 66 : fn default() -> Self {
266 66 : Self::new()
267 66 : }
268 : }
269 :
270 : /// A key that uniquely identifies a layer in a timeline
271 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
272 : pub(crate) enum LayerId {
273 : PersitentLayerId(PersistentLayerKey),
274 : InMemoryLayerId(InMemoryLayerFileId),
275 : }
276 :
277 : /// Layer wrapper for the read path. Note that it is valid
278 : /// to use these layers even after external operations have
279 : /// been performed on them (compaction, freeze, etc.).
280 : #[derive(Debug)]
281 : pub(crate) enum ReadableLayer {
282 : PersistentLayer(Layer),
283 : InMemoryLayer(Arc<InMemoryLayer>),
284 : }
285 :
286 : /// A partial description of a read to be done.
287 : #[derive(Debug, Clone)]
288 : struct ReadDesc {
289 : /// An id used to resolve the readable layer within the fringe
290 : layer_id: LayerId,
291 : /// Lsn range for the read, used for selecting the next read
292 : lsn_range: Range<Lsn>,
293 : }
294 :
295 : /// Data structure which maintains a fringe of layers for the
296 : /// read path. The fringe is the set of layers which intersects
297 : /// the current keyspace that the search is descending on.
298 : /// Each layer tracks the keyspace that intersects it.
299 : ///
300 : /// The fringe must appear sorted by Lsn. Hence, it uses
301 : /// a two layer indexing scheme.
302 : #[derive(Debug)]
303 : pub(crate) struct LayerFringe {
304 : planned_reads_by_lsn: BinaryHeap<ReadDesc>,
305 : layers: HashMap<LayerId, LayerKeyspace>,
306 : }
307 :
308 : #[derive(Debug)]
309 : struct LayerKeyspace {
310 : layer: ReadableLayer,
311 : target_keyspace: KeySpaceRandomAccum,
312 : }
313 :
314 : impl LayerFringe {
315 851260 : pub(crate) fn new() -> Self {
316 851260 : LayerFringe {
317 851260 : planned_reads_by_lsn: BinaryHeap::new(),
318 851260 : layers: HashMap::new(),
319 851260 : }
320 851260 : }
321 :
322 1669354 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
323 1669354 : let read_desc = match self.planned_reads_by_lsn.pop() {
324 818094 : Some(desc) => desc,
325 851260 : None => return None,
326 : };
327 :
328 818094 : let removed = self.layers.remove_entry(&read_desc.layer_id);
329 818094 :
330 818094 : match removed {
331 : Some((
332 : _,
333 : LayerKeyspace {
334 818094 : layer,
335 818094 : mut target_keyspace,
336 818094 : },
337 818094 : )) => Some((
338 818094 : layer,
339 818094 : target_keyspace.consume_keyspace(),
340 818094 : read_desc.lsn_range,
341 818094 : )),
342 0 : None => unreachable!("fringe internals are always consistent"),
343 : }
344 1669354 : }
345 :
346 881910 : pub(crate) fn update(
347 881910 : &mut self,
348 881910 : layer: ReadableLayer,
349 881910 : keyspace: KeySpace,
350 881910 : lsn_range: Range<Lsn>,
351 881910 : ) {
352 881910 : let layer_id = layer.id();
353 881910 : let entry = self.layers.entry(layer_id.clone());
354 881910 : match entry {
355 63816 : Entry::Occupied(mut entry) => {
356 63816 : entry.get_mut().target_keyspace.add_keyspace(keyspace);
357 63816 : }
358 818094 : Entry::Vacant(entry) => {
359 818094 : self.planned_reads_by_lsn.push(ReadDesc {
360 818094 : lsn_range,
361 818094 : layer_id: layer_id.clone(),
362 818094 : });
363 818094 : let mut accum = KeySpaceRandomAccum::new();
364 818094 : accum.add_keyspace(keyspace);
365 818094 : entry.insert(LayerKeyspace {
366 818094 : layer,
367 818094 : target_keyspace: accum,
368 818094 : });
369 818094 : }
370 : }
371 881910 : }
372 : }
373 :
374 : impl Default for LayerFringe {
375 0 : fn default() -> Self {
376 0 : Self::new()
377 0 : }
378 : }
379 :
380 : impl Ord for ReadDesc {
381 16 : fn cmp(&self, other: &Self) -> Ordering {
382 16 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
383 16 : if ord == std::cmp::Ordering::Equal {
384 16 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
385 : } else {
386 0 : ord
387 : }
388 16 : }
389 : }
390 :
391 : impl PartialOrd for ReadDesc {
392 16 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
393 16 : Some(self.cmp(other))
394 16 : }
395 : }
396 :
397 : impl PartialEq for ReadDesc {
398 0 : fn eq(&self, other: &Self) -> bool {
399 0 : self.lsn_range == other.lsn_range
400 0 : }
401 : }
402 :
403 : impl Eq for ReadDesc {}
404 :
405 : impl ReadableLayer {
406 881910 : pub(crate) fn id(&self) -> LayerId {
407 881910 : match self {
408 275713 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
409 606197 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
410 : }
411 881910 : }
412 :
413 818094 : pub(crate) async fn get_values_reconstruct_data(
414 818094 : &self,
415 818094 : keyspace: KeySpace,
416 818094 : lsn_range: Range<Lsn>,
417 818094 : reconstruct_state: &mut ValuesReconstructState,
418 818094 : ctx: &RequestContext,
419 818094 : ) -> Result<(), GetVectoredError> {
420 818094 : match self {
421 211897 : ReadableLayer::PersistentLayer(layer) => {
422 211897 : layer
423 211897 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
424 98410 : .await
425 : }
426 606197 : ReadableLayer::InMemoryLayer(layer) => {
427 606197 : layer
428 606197 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
429 9405 : .await
430 : }
431 : }
432 818094 : }
433 : }
434 :
435 : /// Return value from [`Layer::get_value_reconstruct_data`]
436 : #[derive(Clone, Copy, Debug)]
437 : pub enum ValueReconstructResult {
438 : /// Got all the data needed to reconstruct the requested page
439 : Complete,
440 : /// This layer didn't contain all the required data, the caller should look up
441 : /// the predecessor layer at the returned LSN and collect more data from there.
442 : Continue,
443 :
444 : /// This layer didn't contain data needed to reconstruct the page version at
445 : /// the returned LSN. This is usually considered an error, but might be OK
446 : /// in some circumstances.
447 : Missing,
448 : }
449 :
450 : /// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather
451 : /// than an authoritative value, so that we do not have to update it synchronously when changing the visibility
452 : /// of layers (for example when creating a branch that makes some previously covered layers visible). It should
453 : /// be used for cache management but not for correctness-critical checks.
454 : #[derive(Debug, Clone, PartialEq, Eq)]
455 : pub enum LayerVisibilityHint {
456 : /// A Visible layer might be read while serving a read, because there is not an image layer between it
457 : /// and a readable LSN (the tip of the branch or a child's branch point)
458 : Visible,
459 : /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
460 : /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
461 : Covered,
462 : }
463 :
464 : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
465 :
466 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
467 : pub(crate) enum LayerAccessStatsReset {
468 : NoReset,
469 : AllStats,
470 : }
471 :
472 : impl Default for LayerAccessStats {
473 1664 : fn default() -> Self {
474 1664 : // Default value is to assume resident since creation time, and visible.
475 1664 : let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
476 1664 : value |= 0x1 << Self::VISIBILITY_SHIFT;
477 1664 :
478 1664 : Self(std::sync::atomic::AtomicU64::new(value))
479 1664 : }
480 : }
481 :
482 : // Efficient store of two very-low-resolution timestamps and some bits. Used for storing last access time and
483 : // last residence change time.
484 : impl LayerAccessStats {
485 : // How many high bits to drop from a u32 timestamp?
486 : // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
487 : // after that, this software has been very successful!)
488 : // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
489 : // stored in an i32, so they never used it.
490 : // - Dropping the next two bits is safe because this code is only running on systems in
491 : // years >= 2024, and these bits have been 1 since 2021
492 : //
493 : // Therefore we may store only 28 bits for a timestamp with one second resolution. We do
494 : // this truncation to make space for some flags in the high bits of our u64.
495 : const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
496 : const TS_MASK: u32 = 0x1f_ff_ff_ff;
497 : const TS_ONES: u32 = 0x60_00_00_00;
498 :
499 : const ATIME_SHIFT: u32 = 0;
500 : const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
501 : const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
502 :
503 214875 : fn write_bits(&self, mask: u64, value: u64) -> u64 {
504 214875 : self.0
505 214875 : .fetch_update(
506 214875 : // TODO: decide what orderings are correct
507 214875 : std::sync::atomic::Ordering::Relaxed,
508 214875 : std::sync::atomic::Ordering::Relaxed,
509 214875 : |v| Some((v & !mask) | (value & mask)),
510 214875 : )
511 214875 : .expect("Inner function is infallible")
512 214875 : }
513 :
514 214047 : fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
515 214047 : // Drop the low three bits of the timestamp, for an ~8s accuracy
516 214047 : let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
517 214047 :
518 214047 : ((Self::TS_MASK as u64) << shift, timestamp << shift)
519 214047 : }
520 :
521 46 : fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
522 46 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
523 46 :
524 46 : let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
525 46 : if ts_bits == 0 {
526 16 : None
527 : } else {
528 30 : Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
529 : }
530 46 : }
531 :
532 : /// Record a change in layer residency.
533 : ///
534 : /// Recording the event must happen while holding the layer map lock to
535 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
536 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
537 : ///
538 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
539 : /// the following race could happen:
540 : ///
541 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
542 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
543 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
544 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
545 26 : pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
546 26 : let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
547 26 : self.write_bits(mask, value);
548 26 : }
549 :
550 24 : pub(crate) fn record_residence_event(&self) {
551 24 : self.record_residence_event_at(SystemTime::now())
552 24 : }
553 :
554 212357 : pub(crate) fn record_access_at(&self, now: SystemTime) {
555 212357 : let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
556 212357 :
557 212357 : // A layer which is accessed must be visible.
558 212357 : mask |= 0x1 << Self::VISIBILITY_SHIFT;
559 212357 : value |= 0x1 << Self::VISIBILITY_SHIFT;
560 212357 :
561 212357 : self.write_bits(mask, value);
562 212357 : }
563 :
564 212631 : pub(crate) fn record_access(&self, ctx: &RequestContext) {
565 212631 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
566 276 : return;
567 212355 : }
568 212355 :
569 212355 : self.record_access_at(SystemTime::now())
570 212631 : }
571 :
572 0 : fn as_api_model(
573 0 : &self,
574 0 : reset: LayerAccessStatsReset,
575 0 : ) -> pageserver_api::models::LayerAccessStats {
576 0 : let ret = pageserver_api::models::LayerAccessStats {
577 0 : access_time: self
578 0 : .read_low_res_timestamp(Self::ATIME_SHIFT)
579 0 : .unwrap_or(UNIX_EPOCH),
580 0 : residence_time: self
581 0 : .read_low_res_timestamp(Self::RTIME_SHIFT)
582 0 : .unwrap_or(UNIX_EPOCH),
583 0 : visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
584 : };
585 0 : match reset {
586 0 : LayerAccessStatsReset::NoReset => {}
587 0 : LayerAccessStatsReset::AllStats => {
588 0 : self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
589 0 : self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
590 0 : }
591 : }
592 0 : ret
593 0 : }
594 :
595 : /// Get the latest access timestamp, falling back to latest residence event. The latest residence event
596 : /// will be this Layer's construction time, if its residence hasn't changed since then.
597 8 : pub(crate) fn latest_activity(&self) -> SystemTime {
598 8 : if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
599 6 : t
600 : } else {
601 2 : self.read_low_res_timestamp(Self::RTIME_SHIFT)
602 2 : .expect("Residence time is set on construction")
603 : }
604 8 : }
605 :
606 : /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
607 : ///
608 : /// This indicates whether the layer has been used for some purpose that would motivate
609 : /// us to keep it on disk, such as for serving a getpage request.
610 18 : fn accessed(&self) -> bool {
611 18 : // Consider it accessed if the most recent access is more recent than
612 18 : // the most recent change in residence status.
613 18 : match (
614 18 : self.read_low_res_timestamp(Self::ATIME_SHIFT),
615 18 : self.read_low_res_timestamp(Self::RTIME_SHIFT),
616 : ) {
617 14 : (None, _) => false,
618 0 : (Some(_), None) => true,
619 4 : (Some(a), Some(r)) => a >= r,
620 : }
621 18 : }
622 :
623 : /// Helper for extracting the visibility hint from the literal value of our inner u64
624 3002 : fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
625 3002 : match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
626 2970 : 1 => LayerVisibilityHint::Visible,
627 32 : 0 => LayerVisibilityHint::Covered,
628 0 : _ => unreachable!(),
629 : }
630 3002 : }
631 :
632 : /// Returns the old value which has been replaced
633 2492 : pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
634 2492 : let value = match visibility {
635 2434 : LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
636 58 : LayerVisibilityHint::Covered => 0x0,
637 : };
638 :
639 2492 : let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
640 2492 : self.decode_visibility(old_bits)
641 2492 : }
642 :
643 510 : pub(crate) fn visibility(&self) -> LayerVisibilityHint {
644 510 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
645 510 : self.decode_visibility(read)
646 510 : }
647 : }
648 :
649 : /// Get a layer descriptor from a layer.
650 : pub(crate) trait AsLayerDesc {
651 : /// Get the layer descriptor.
652 : fn layer_desc(&self) -> &PersistentLayerDesc;
653 : }
654 :
655 : pub mod tests {
656 : use pageserver_api::shard::TenantShardId;
657 : use utils::id::TimelineId;
658 :
659 : use super::*;
660 :
661 : impl From<DeltaLayerName> for PersistentLayerDesc {
662 0 : fn from(value: DeltaLayerName) -> Self {
663 0 : PersistentLayerDesc::new_delta(
664 0 : TenantShardId::from([0; 18]),
665 0 : TimelineId::from_array([0; 16]),
666 0 : value.key_range,
667 0 : value.lsn_range,
668 0 : 233,
669 0 : )
670 0 : }
671 : }
672 :
673 : impl From<ImageLayerName> for PersistentLayerDesc {
674 0 : fn from(value: ImageLayerName) -> Self {
675 0 : PersistentLayerDesc::new_img(
676 0 : TenantShardId::from([0; 18]),
677 0 : TimelineId::from_array([0; 16]),
678 0 : value.key_range,
679 0 : value.lsn,
680 0 : 233,
681 0 : )
682 0 : }
683 : }
684 :
685 : impl From<LayerName> for PersistentLayerDesc {
686 0 : fn from(value: LayerName) -> Self {
687 0 : match value {
688 0 : LayerName::Delta(d) => Self::from(d),
689 0 : LayerName::Image(i) => Self::from(i),
690 : }
691 0 : }
692 : }
693 : }
694 :
695 : /// Range wrapping newtype, which uses display to render Debug.
696 : ///
697 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
698 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
699 :
700 : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
701 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
702 0 : write!(f, "{}..{}", self.0.start, self.0.end)
703 0 : }
704 : }
|