Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod batch_split_writer;
4 : pub mod delta_layer;
5 : pub mod filter_iterator;
6 : pub mod image_layer;
7 : pub mod inmemory_layer;
8 : pub(crate) mod layer;
9 : mod layer_desc;
10 : mod layer_name;
11 : pub mod merge_iterator;
12 :
13 : use crate::context::{AccessStatsBehavior, RequestContext};
14 : use bytes::Bytes;
15 : use pageserver_api::key::{Key, NON_INHERITED_SPARSE_RANGE};
16 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
17 : use pageserver_api::record::NeonWalRecord;
18 : use pageserver_api::value::Value;
19 : use std::cmp::{Ordering, Reverse};
20 : use std::collections::hash_map::Entry;
21 : use std::collections::{BinaryHeap, HashMap};
22 : use std::ops::Range;
23 : use std::sync::Arc;
24 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 :
26 : use utils::lsn::Lsn;
27 :
28 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
29 : pub use image_layer::{ImageLayer, ImageLayerWriter};
30 : pub use inmemory_layer::InMemoryLayer;
31 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
32 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
33 :
34 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
35 :
36 : use self::inmemory_layer::InMemoryLayerFileId;
37 :
38 : use super::timeline::GetVectoredError;
39 : use super::PageReconstructError;
40 :
41 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
42 0 : where
43 0 : T: PartialOrd<T>,
44 0 : {
45 0 : if a.start < b.start {
46 0 : a.end > b.start
47 : } else {
48 0 : b.end > a.start
49 : }
50 0 : }
51 :
52 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
53 : ///
54 : /// Before first call, you can fill in 'page_img' if you have an older cached
55 : /// version of the page available. That can save work in
56 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
57 : /// when all the WAL records going back to the cached image have been collected.
58 : ///
59 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
60 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
61 : /// record that initializes the page without requiring a previous image.
62 : ///
63 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
64 : /// been collected, but there are more records outside the current layer. Pass
65 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
66 : /// call, to collect more records.
67 : ///
68 : #[derive(Debug, Default)]
69 : pub(crate) struct ValueReconstructState {
70 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
71 : pub(crate) img: Option<(Lsn, Bytes)>,
72 : }
73 :
74 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
75 : pub(crate) enum ValueReconstructSituation {
76 : Complete,
77 : #[default]
78 : Continue,
79 : }
80 :
81 : /// Reconstruct data accumulated for a single key during a vectored get
82 : #[derive(Debug, Default, Clone)]
83 : pub(crate) struct VectoredValueReconstructState {
84 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
85 : pub(crate) img: Option<(Lsn, Bytes)>,
86 :
87 : situation: ValueReconstructSituation,
88 : }
89 :
90 : impl VectoredValueReconstructState {
91 73578 : fn get_cached_lsn(&self) -> Option<Lsn> {
92 73578 : self.img.as_ref().map(|img| img.0)
93 73578 : }
94 : }
95 :
96 : impl From<VectoredValueReconstructState> for ValueReconstructState {
97 667727 : fn from(mut state: VectoredValueReconstructState) -> Self {
98 667727 : // walredo expects the records to be descending in terms of Lsn
99 667727 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
100 667727 :
101 667727 : ValueReconstructState {
102 667727 : records: state.records,
103 667727 : img: state.img,
104 667727 : }
105 667727 : }
106 : }
107 :
108 : /// Bag of data accumulated during a vectored get..
109 : pub(crate) struct ValuesReconstructState {
110 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
111 : /// should not expect to get anything from this hashmap.
112 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
113 : /// The keys which are already retrieved
114 : keys_done: KeySpaceRandomAccum,
115 :
116 : /// The keys covered by the image layers
117 : keys_with_image_coverage: Option<Range<Key>>,
118 :
119 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
120 : layers_visited: u32,
121 : delta_layers_visited: u32,
122 : }
123 :
124 : impl ValuesReconstructState {
125 627631 : pub(crate) fn new() -> Self {
126 627631 : Self {
127 627631 : keys: HashMap::new(),
128 627631 : keys_done: KeySpaceRandomAccum::new(),
129 627631 : keys_with_image_coverage: None,
130 627631 : layers_visited: 0,
131 627631 : delta_layers_visited: 0,
132 627631 : }
133 627631 : }
134 :
135 : /// Associate a key with the error which it encountered and mark it as done
136 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
137 0 : let previous = self.keys.insert(key, Err(err));
138 0 : if let Some(Ok(state)) = previous {
139 0 : if state.situation == ValueReconstructSituation::Continue {
140 0 : self.keys_done.add_key(key);
141 0 : }
142 0 : }
143 0 : }
144 :
145 847677 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
146 847677 : self.layers_visited += 1;
147 847677 : if let ReadableLayer::PersistentLayer(layer) = layer {
148 241193 : if layer.layer_desc().is_delta() {
149 218681 : self.delta_layers_visited += 1;
150 218681 : }
151 606484 : }
152 847677 : }
153 :
154 204 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
155 204 : self.delta_layers_visited
156 204 : }
157 :
158 627401 : pub(crate) fn get_layers_visited(&self) -> u32 {
159 627401 : self.layers_visited
160 627401 : }
161 :
162 : /// This function is called after reading a keyspace from a layer.
163 : /// It checks if the read path has now moved past the cached Lsn for any keys.
164 : ///
165 : /// Implementation note: We intentionally iterate over the keys for which we've
166 : /// already collected some reconstruct data. This avoids scaling complexity with
167 : /// the size of the search space.
168 825165 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
169 825165 : for (key, value) in self.keys.iter_mut() {
170 704979 : if !keyspace.contains(key) {
171 120 : continue;
172 704859 : }
173 :
174 704859 : if let Ok(state) = value {
175 704859 : if state.situation != ValueReconstructSituation::Complete
176 494 : && state.get_cached_lsn() >= Some(advanced_to)
177 0 : {
178 0 : state.situation = ValueReconstructSituation::Complete;
179 0 : self.keys_done.add_key(*key);
180 704859 : }
181 0 : }
182 : }
183 825165 : }
184 :
185 : /// On hitting image layer, we can mark all keys in this range as done, because
186 : /// if the image layer does not contain a key, it is deleted/never added.
187 22524 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
188 22524 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
189 22524 : assert_eq!(
190 : prev_val, None,
191 0 : "should consume the keyspace before the next iteration"
192 : );
193 22524 : }
194 :
195 : /// Update the state collected for a given key.
196 : /// Returns true if this was the last value needed for the key and false otherwise.
197 : ///
198 : /// If the key is done after the update, mark it as such.
199 : ///
200 : /// If the key is in the sparse keyspace (i.e., aux files), we do not track them in
201 : /// `key_done`.
202 703867 : pub(crate) fn update_key(
203 703867 : &mut self,
204 703867 : key: &Key,
205 703867 : lsn: Lsn,
206 703867 : value: Value,
207 703867 : ) -> ValueReconstructSituation {
208 703867 : let state = self
209 703867 : .keys
210 703867 : .entry(*key)
211 703867 : .or_insert(Ok(VectoredValueReconstructState::default()));
212 703867 : let is_sparse_key = NON_INHERITED_SPARSE_RANGE.contains(key);
213 703867 : if let Ok(state) = state {
214 703867 : let key_done = match state.situation {
215 : ValueReconstructSituation::Complete => {
216 35414 : if is_sparse_key {
217 : // Sparse keyspace might be visited multiple times because
218 : // we don't track unmapped keyspaces.
219 35414 : return ValueReconstructSituation::Complete;
220 : } else {
221 0 : unreachable!()
222 : }
223 : }
224 668453 : ValueReconstructSituation::Continue => match value {
225 667753 : Value::Image(img) => {
226 667753 : state.img = Some((lsn, img));
227 667753 : true
228 : }
229 700 : Value::WalRecord(rec) => {
230 700 : debug_assert!(
231 700 : Some(lsn) > state.get_cached_lsn(),
232 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
233 0 : lsn,
234 0 : state
235 0 : .get_cached_lsn()
236 0 : .expect("Assertion can only fire if a cached lsn is present")
237 : );
238 :
239 700 : let will_init = rec.will_init();
240 700 : state.records.push((lsn, rec));
241 700 : will_init
242 : }
243 : },
244 : };
245 :
246 668453 : if key_done && state.situation == ValueReconstructSituation::Continue {
247 667797 : state.situation = ValueReconstructSituation::Complete;
248 667797 : if !is_sparse_key {
249 604071 : self.keys_done.add_key(*key);
250 604071 : }
251 656 : }
252 :
253 668453 : state.situation
254 : } else {
255 0 : ValueReconstructSituation::Complete
256 : }
257 703867 : }
258 :
259 : /// Returns the Lsn at which this key is cached if one exists.
260 : /// The read path should go no further than this Lsn for the given key.
261 1126935 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
262 1126935 : self.keys
263 1126935 : .get(key)
264 1126935 : .and_then(|k| k.as_ref().ok())
265 1126935 : .and_then(|state| state.get_cached_lsn())
266 1126935 : }
267 :
268 : /// Returns the key space describing the keys that have
269 : /// been marked as completed since the last call to this function.
270 : /// Returns individual keys done, and the image layer coverage.
271 1703522 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
272 1703522 : (
273 1703522 : self.keys_done.consume_keyspace(),
274 1703522 : self.keys_with_image_coverage.take(),
275 1703522 : )
276 1703522 : }
277 : }
278 :
279 : impl Default for ValuesReconstructState {
280 250 : fn default() -> Self {
281 250 : Self::new()
282 250 : }
283 : }
284 :
285 : /// A key that uniquely identifies a layer in a timeline
286 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
287 : pub(crate) enum LayerId {
288 : PersitentLayerId(PersistentLayerKey),
289 : InMemoryLayerId(InMemoryLayerFileId),
290 : }
291 :
292 : /// Uniquely identify a layer visit by the layer
293 : /// and LSN floor (or start LSN) of the reads.
294 : /// The layer itself is not enough since we may
295 : /// have different LSN lower bounds for delta layer reads.
296 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
297 : struct LayerToVisitId {
298 : layer_id: LayerId,
299 : lsn_floor: Lsn,
300 : }
301 :
302 : /// Layer wrapper for the read path. Note that it is valid
303 : /// to use these layers even after external operations have
304 : /// been performed on them (compaction, freeze, etc.).
305 : #[derive(Debug)]
306 : pub(crate) enum ReadableLayer {
307 : PersistentLayer(Layer),
308 : InMemoryLayer(Arc<InMemoryLayer>),
309 : }
310 :
311 : /// A partial description of a read to be done.
312 : #[derive(Debug, Clone)]
313 : struct LayerVisit {
314 : /// An id used to resolve the readable layer within the fringe
315 : layer_to_visit_id: LayerToVisitId,
316 : /// Lsn range for the read, used for selecting the next read
317 : lsn_range: Range<Lsn>,
318 : }
319 :
320 : /// Data structure which maintains a fringe of layers for the
321 : /// read path. The fringe is the set of layers which intersects
322 : /// the current keyspace that the search is descending on.
323 : /// Each layer tracks the keyspace that intersects it.
324 : ///
325 : /// The fringe must appear sorted by Lsn. Hence, it uses
326 : /// a two layer indexing scheme.
327 : #[derive(Debug)]
328 : pub(crate) struct LayerFringe {
329 : planned_visits_by_lsn: BinaryHeap<LayerVisit>,
330 : visit_reads: HashMap<LayerToVisitId, LayerVisitReads>,
331 : }
332 :
333 : #[derive(Debug)]
334 : struct LayerVisitReads {
335 : layer: ReadableLayer,
336 : target_keyspace: KeySpaceRandomAccum,
337 : }
338 :
339 : impl LayerFringe {
340 855845 : pub(crate) fn new() -> Self {
341 855845 : LayerFringe {
342 855845 : planned_visits_by_lsn: BinaryHeap::new(),
343 855845 : visit_reads: HashMap::new(),
344 855845 : }
345 855845 : }
346 :
347 1703522 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
348 1703522 : let read_desc = self.planned_visits_by_lsn.pop()?;
349 :
350 847677 : let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);
351 847677 :
352 847677 : match removed {
353 : Some((
354 : _,
355 : LayerVisitReads {
356 847677 : layer,
357 847677 : mut target_keyspace,
358 847677 : },
359 847677 : )) => Some((
360 847677 : layer,
361 847677 : target_keyspace.consume_keyspace(),
362 847677 : read_desc.lsn_range,
363 847677 : )),
364 0 : None => unreachable!("fringe internals are always consistent"),
365 : }
366 1703522 : }
367 :
368 847691 : pub(crate) fn update(
369 847691 : &mut self,
370 847691 : layer: ReadableLayer,
371 847691 : keyspace: KeySpace,
372 847691 : lsn_range: Range<Lsn>,
373 847691 : ) {
374 847691 : let layer_to_visit_id = LayerToVisitId {
375 847691 : layer_id: layer.id(),
376 847691 : lsn_floor: lsn_range.start,
377 847691 : };
378 847691 :
379 847691 : let entry = self.visit_reads.entry(layer_to_visit_id.clone());
380 847691 : match entry {
381 14 : Entry::Occupied(mut entry) => {
382 14 : entry.get_mut().target_keyspace.add_keyspace(keyspace);
383 14 : }
384 847677 : Entry::Vacant(entry) => {
385 847677 : self.planned_visits_by_lsn.push(LayerVisit {
386 847677 : lsn_range,
387 847677 : layer_to_visit_id: layer_to_visit_id.clone(),
388 847677 : });
389 847677 : let mut accum = KeySpaceRandomAccum::new();
390 847677 : accum.add_keyspace(keyspace);
391 847677 : entry.insert(LayerVisitReads {
392 847677 : layer,
393 847677 : target_keyspace: accum,
394 847677 : });
395 847677 : }
396 : }
397 847691 : }
398 : }
399 :
400 : impl Default for LayerFringe {
401 0 : fn default() -> Self {
402 0 : Self::new()
403 0 : }
404 : }
405 :
406 : impl Ord for LayerVisit {
407 30 : fn cmp(&self, other: &Self) -> Ordering {
408 30 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
409 30 : if ord == std::cmp::Ordering::Equal {
410 22 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
411 : } else {
412 8 : ord
413 : }
414 30 : }
415 : }
416 :
417 : impl PartialOrd for LayerVisit {
418 30 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
419 30 : Some(self.cmp(other))
420 30 : }
421 : }
422 :
423 : impl PartialEq for LayerVisit {
424 0 : fn eq(&self, other: &Self) -> bool {
425 0 : self.lsn_range == other.lsn_range
426 0 : }
427 : }
428 :
429 : impl Eq for LayerVisit {}
430 :
431 : impl ReadableLayer {
432 847691 : pub(crate) fn id(&self) -> LayerId {
433 847691 : match self {
434 241207 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
435 606484 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
436 : }
437 847691 : }
438 :
439 847677 : pub(crate) async fn get_values_reconstruct_data(
440 847677 : &self,
441 847677 : keyspace: KeySpace,
442 847677 : lsn_range: Range<Lsn>,
443 847677 : reconstruct_state: &mut ValuesReconstructState,
444 847677 : ctx: &RequestContext,
445 847677 : ) -> Result<(), GetVectoredError> {
446 847677 : match self {
447 241193 : ReadableLayer::PersistentLayer(layer) => {
448 241193 : layer
449 241193 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
450 241193 : .await
451 : }
452 606484 : ReadableLayer::InMemoryLayer(layer) => {
453 606484 : layer
454 606484 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
455 606484 : .await
456 : }
457 : }
458 847677 : }
459 : }
460 :
461 : /// Layers contain a hint indicating whether they are likely to be used for reads.
462 : ///
463 : /// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
464 : /// when changing the visibility of layers (for example when creating a branch that makes some previously
465 : /// covered layers visible). It should be used for cache management but not for correctness-critical checks.
466 : #[derive(Debug, Clone, PartialEq, Eq)]
467 : pub enum LayerVisibilityHint {
468 : /// A Visible layer might be read while serving a read, because there is not an image layer between it
469 : /// and a readable LSN (the tip of the branch or a child's branch point)
470 : Visible,
471 : /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
472 : /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
473 : Covered,
474 : }
475 :
476 : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
477 :
478 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
479 : pub(crate) enum LayerAccessStatsReset {
480 : NoReset,
481 : AllStats,
482 : }
483 :
484 : impl Default for LayerAccessStats {
485 1794 : fn default() -> Self {
486 1794 : // Default value is to assume resident since creation time, and visible.
487 1794 : let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
488 1794 : value |= 0x1 << Self::VISIBILITY_SHIFT;
489 1794 :
490 1794 : Self(std::sync::atomic::AtomicU64::new(value))
491 1794 : }
492 : }
493 :
494 : // Efficient store of two very-low-resolution timestamps and some bits. Used for storing last access time and
495 : // last residence change time.
496 : impl LayerAccessStats {
497 : // How many high bits to drop from a u32 timestamp?
498 : // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
499 : // after that, this software has been very successful!)
500 : // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
501 : // stored in an i32, so they never used it.
502 : // - Dropping the next two bits is safe because this code is only running on systems in
503 : // years >= 2024, and these bits have been 1 since 2021
504 : //
505 : // Therefore we may store only 28 bits for a timestamp with one second resolution. We do
506 : // this truncation to make space for some flags in the high bits of our u64.
507 : const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
508 : const TS_MASK: u32 = 0x1f_ff_ff_ff;
509 : const TS_ONES: u32 = 0x60_00_00_00;
510 :
511 : const ATIME_SHIFT: u32 = 0;
512 : const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
513 : const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
514 :
515 241289 : fn write_bits(&self, mask: u64, value: u64) -> u64 {
516 241289 : self.0
517 241289 : .fetch_update(
518 241289 : // TODO: decide what orderings are correct
519 241289 : std::sync::atomic::Ordering::Relaxed,
520 241289 : std::sync::atomic::Ordering::Relaxed,
521 241289 : |v| Some((v & !mask) | (value & mask)),
522 241289 : )
523 241289 : .expect("Inner function is infallible")
524 241289 : }
525 :
526 242747 : fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
527 242747 : // Drop the low three bits of the timestamp, for an ~8s accuracy
528 242747 : let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
529 242747 :
530 242747 : ((Self::TS_MASK as u64) << shift, timestamp << shift)
531 242747 : }
532 :
533 62 : fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
534 62 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
535 62 :
536 62 : let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
537 62 : if ts_bits == 0 {
538 24 : None
539 : } else {
540 38 : Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
541 : }
542 62 : }
543 :
544 : /// Record a change in layer residency.
545 : ///
546 : /// Recording the event must happen while holding the layer map lock to
547 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
548 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
549 : ///
550 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
551 : /// the following race could happen:
552 : ///
553 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
554 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
555 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
556 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
557 26 : pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
558 26 : let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
559 26 : self.write_bits(mask, value);
560 26 : }
561 :
562 24 : pub(crate) fn record_residence_event(&self) {
563 24 : self.record_residence_event_at(SystemTime::now())
564 24 : }
565 :
566 240927 : fn record_access_at(&self, now: SystemTime) -> bool {
567 240927 : let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
568 240927 :
569 240927 : // A layer which is accessed must be visible.
570 240927 : mask |= 0x1 << Self::VISIBILITY_SHIFT;
571 240927 : value |= 0x1 << Self::VISIBILITY_SHIFT;
572 240927 :
573 240927 : let old_bits = self.write_bits(mask, value);
574 2 : !matches!(
575 240927 : self.decode_visibility(old_bits),
576 : LayerVisibilityHint::Visible
577 : )
578 240927 : }
579 :
580 : /// Returns true if we modified the layer's visibility to set it to Visible implicitly
581 : /// as a result of this access
582 241205 : pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {
583 241205 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
584 284 : return false;
585 240921 : }
586 240921 :
587 240921 : self.record_access_at(SystemTime::now())
588 241205 : }
589 :
590 0 : fn as_api_model(
591 0 : &self,
592 0 : reset: LayerAccessStatsReset,
593 0 : ) -> pageserver_api::models::LayerAccessStats {
594 0 : let ret = pageserver_api::models::LayerAccessStats {
595 0 : access_time: self
596 0 : .read_low_res_timestamp(Self::ATIME_SHIFT)
597 0 : .unwrap_or(UNIX_EPOCH),
598 0 : residence_time: self
599 0 : .read_low_res_timestamp(Self::RTIME_SHIFT)
600 0 : .unwrap_or(UNIX_EPOCH),
601 0 : visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
602 : };
603 0 : match reset {
604 0 : LayerAccessStatsReset::NoReset => {}
605 0 : LayerAccessStatsReset::AllStats => {
606 0 : self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
607 0 : self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
608 0 : }
609 : }
610 0 : ret
611 0 : }
612 :
613 : /// Get the latest access timestamp, falling back to latest residence event. The latest residence event
614 : /// will be this Layer's construction time, if its residence hasn't changed since then.
615 16 : pub(crate) fn latest_activity(&self) -> SystemTime {
616 16 : if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
617 6 : t
618 : } else {
619 10 : self.read_low_res_timestamp(Self::RTIME_SHIFT)
620 10 : .expect("Residence time is set on construction")
621 : }
622 16 : }
623 :
624 : /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
625 : ///
626 : /// This indicates whether the layer has been used for some purpose that would motivate
627 : /// us to keep it on disk, such as for serving a getpage request.
628 18 : fn accessed(&self) -> bool {
629 18 : // Consider it accessed if the most recent access is more recent than
630 18 : // the most recent change in residence status.
631 18 : match (
632 18 : self.read_low_res_timestamp(Self::ATIME_SHIFT),
633 18 : self.read_low_res_timestamp(Self::RTIME_SHIFT),
634 : ) {
635 14 : (None, _) => false,
636 0 : (Some(_), None) => true,
637 4 : (Some(a), Some(r)) => a >= r,
638 : }
639 18 : }
640 :
641 : /// Helper for extracting the visibility hint from the literal value of our inner u64
642 241879 : fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
643 241879 : match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
644 241857 : 1 => LayerVisibilityHint::Visible,
645 22 : 0 => LayerVisibilityHint::Covered,
646 0 : _ => unreachable!(),
647 : }
648 241879 : }
649 :
650 : /// Returns the old value which has been replaced
651 336 : pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
652 336 : let value = match visibility {
653 284 : LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
654 52 : LayerVisibilityHint::Covered => 0x0,
655 : };
656 :
657 336 : let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
658 336 : self.decode_visibility(old_bits)
659 336 : }
660 :
661 616 : pub(crate) fn visibility(&self) -> LayerVisibilityHint {
662 616 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
663 616 : self.decode_visibility(read)
664 616 : }
665 : }
666 :
667 : /// Get a layer descriptor from a layer.
668 : pub(crate) trait AsLayerDesc {
669 : /// Get the layer descriptor.
670 : fn layer_desc(&self) -> &PersistentLayerDesc;
671 : }
672 :
673 : pub mod tests {
674 : use pageserver_api::shard::TenantShardId;
675 : use utils::id::TimelineId;
676 :
677 : use super::*;
678 :
679 : impl From<DeltaLayerName> for PersistentLayerDesc {
680 0 : fn from(value: DeltaLayerName) -> Self {
681 0 : PersistentLayerDesc::new_delta(
682 0 : TenantShardId::from([0; 18]),
683 0 : TimelineId::from_array([0; 16]),
684 0 : value.key_range,
685 0 : value.lsn_range,
686 0 : 233,
687 0 : )
688 0 : }
689 : }
690 :
691 : impl From<ImageLayerName> for PersistentLayerDesc {
692 0 : fn from(value: ImageLayerName) -> Self {
693 0 : PersistentLayerDesc::new_img(
694 0 : TenantShardId::from([0; 18]),
695 0 : TimelineId::from_array([0; 16]),
696 0 : value.key_range,
697 0 : value.lsn,
698 0 : 233,
699 0 : )
700 0 : }
701 : }
702 :
703 : impl From<LayerName> for PersistentLayerDesc {
704 0 : fn from(value: LayerName) -> Self {
705 0 : match value {
706 0 : LayerName::Delta(d) => Self::from(d),
707 0 : LayerName::Image(i) => Self::from(i),
708 : }
709 0 : }
710 : }
711 : }
712 :
713 : /// Range wrapping newtype, which uses display to render Debug.
714 : ///
715 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
716 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
717 :
718 : impl<T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'_, T> {
719 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
720 0 : write!(f, "{}..{}", self.0.start, self.0.end)
721 0 : }
722 : }
|