Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod batch_split_writer;
4 : pub mod delta_layer;
5 : pub mod filter_iterator;
6 : pub mod image_layer;
7 : pub mod inmemory_layer;
8 : pub(crate) mod layer;
9 : mod layer_desc;
10 : mod layer_name;
11 : pub mod merge_iterator;
12 :
13 : use crate::context::{AccessStatsBehavior, RequestContext};
14 : use bytes::Bytes;
15 : use pageserver_api::key::{Key, NON_INHERITED_SPARSE_RANGE};
16 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
17 : use pageserver_api::record::NeonWalRecord;
18 : use pageserver_api::value::Value;
19 : use std::cmp::{Ordering, Reverse};
20 : use std::collections::hash_map::Entry;
21 : use std::collections::{BinaryHeap, HashMap};
22 : use std::ops::Range;
23 : use std::sync::Arc;
24 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 :
26 : use utils::lsn::Lsn;
27 :
28 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
29 : pub use image_layer::{ImageLayer, ImageLayerWriter};
30 : pub use inmemory_layer::InMemoryLayer;
31 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
32 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
33 :
34 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
35 :
36 : use self::inmemory_layer::InMemoryLayerFileId;
37 :
38 : use super::timeline::GetVectoredError;
39 : use super::PageReconstructError;
40 :
41 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
42 0 : where
43 0 : T: PartialOrd<T>,
44 0 : {
45 0 : if a.start < b.start {
46 0 : a.end > b.start
47 : } else {
48 0 : b.end > a.start
49 : }
50 0 : }
51 :
52 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
53 : ///
54 : /// Before first call, you can fill in 'page_img' if you have an older cached
55 : /// version of the page available. That can save work in
56 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
57 : /// when all the WAL records going back to the cached image have been collected.
58 : ///
59 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
60 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
61 : /// record that initializes the page without requiring a previous image.
62 : ///
63 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
64 : /// been collected, but there are more records outside the current layer. Pass
65 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
66 : /// call, to collect more records.
67 : ///
68 : #[derive(Debug, Default)]
69 : pub(crate) struct ValueReconstructState {
70 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
71 : pub(crate) img: Option<(Lsn, Bytes)>,
72 : }
73 :
74 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
75 : pub(crate) enum ValueReconstructSituation {
76 : Complete,
77 : #[default]
78 : Continue,
79 : }
80 :
81 : /// Reconstruct data accumulated for a single key during a vectored get
82 : #[derive(Debug, Default, Clone)]
83 : pub(crate) struct VectoredValueReconstructState {
84 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
85 : pub(crate) img: Option<(Lsn, Bytes)>,
86 :
87 : situation: ValueReconstructSituation,
88 : }
89 :
90 : impl VectoredValueReconstructState {
91 72575 : fn get_cached_lsn(&self) -> Option<Lsn> {
92 72575 : self.img.as_ref().map(|img| img.0)
93 72575 : }
94 : }
95 :
96 : impl From<VectoredValueReconstructState> for ValueReconstructState {
97 666926 : fn from(mut state: VectoredValueReconstructState) -> Self {
98 666926 : // walredo expects the records to be descending in terms of Lsn
99 666926 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
100 666926 :
101 666926 : ValueReconstructState {
102 666926 : records: state.records,
103 666926 : img: state.img,
104 666926 : }
105 666926 : }
106 : }
107 :
108 : /// Bag of data accumulated during a vectored get..
109 : pub(crate) struct ValuesReconstructState {
110 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
111 : /// should not expect to get anything from this hashmap.
112 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
113 : /// The keys which are already retrieved
114 : keys_done: KeySpaceRandomAccum,
115 :
116 : /// The keys covered by the image layers
117 : keys_with_image_coverage: Option<Range<Key>>,
118 :
119 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
120 : layers_visited: u32,
121 : delta_layers_visited: u32,
122 : }
123 :
124 : impl ValuesReconstructState {
125 626830 : pub(crate) fn new() -> Self {
126 626830 : Self {
127 626830 : keys: HashMap::new(),
128 626830 : keys_done: KeySpaceRandomAccum::new(),
129 626830 : keys_with_image_coverage: None,
130 626830 : layers_visited: 0,
131 626830 : delta_layers_visited: 0,
132 626830 : }
133 626830 : }
134 :
135 : /// Associate a key with the error which it encountered and mark it as done
136 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
137 0 : let previous = self.keys.insert(key, Err(err));
138 0 : if let Some(Ok(state)) = previous {
139 0 : if state.situation == ValueReconstructSituation::Continue {
140 0 : self.keys_done.add_key(key);
141 0 : }
142 0 : }
143 0 : }
144 :
145 846270 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
146 846270 : self.layers_visited += 1;
147 846270 : if let ReadableLayer::PersistentLayer(layer) = layer {
148 239920 : if layer.layer_desc().is_delta() {
149 218112 : self.delta_layers_visited += 1;
150 218112 : }
151 606350 : }
152 846270 : }
153 :
154 200 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
155 200 : self.delta_layers_visited
156 200 : }
157 :
158 626600 : pub(crate) fn get_layers_visited(&self) -> u32 {
159 626600 : self.layers_visited
160 626600 : }
161 :
162 : /// This function is called after reading a keyspace from a layer.
163 : /// It checks if the read path has now moved past the cached Lsn for any keys.
164 : ///
165 : /// Implementation note: We intentionally iterate over the keys for which we've
166 : /// already collected some reconstruct data. This avoids scaling complexity with
167 : /// the size of the search space.
168 824462 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
169 824462 : for (key, value) in self.keys.iter_mut() {
170 704290 : if !keyspace.contains(key) {
171 128 : continue;
172 704162 : }
173 :
174 704162 : if let Ok(state) = value {
175 704162 : if state.situation != ValueReconstructSituation::Complete
176 354 : && state.get_cached_lsn() >= Some(advanced_to)
177 0 : {
178 0 : state.situation = ValueReconstructSituation::Complete;
179 0 : self.keys_done.add_key(*key);
180 704162 : }
181 0 : }
182 : }
183 824462 : }
184 :
185 : /// On hitting image layer, we can mark all keys in this range as done, because
186 : /// if the image layer does not contain a key, it is deleted/never added.
187 21820 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
188 21820 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
189 21820 : assert_eq!(
190 : prev_val, None,
191 0 : "should consume the keyspace before the next iteration"
192 : );
193 21820 : }
194 :
195 : /// Update the state collected for a given key.
196 : /// Returns true if this was the last value needed for the key and false otherwise.
197 : ///
198 : /// If the key is done after the update, mark it as such.
199 : ///
200 : /// If the key is in the sparse keyspace (i.e., aux files), we do not track them in
201 : /// `key_done`.
202 702949 : pub(crate) fn update_key(
203 702949 : &mut self,
204 702949 : key: &Key,
205 702949 : lsn: Lsn,
206 702949 : value: Value,
207 702949 : ) -> ValueReconstructSituation {
208 702949 : let state = self
209 702949 : .keys
210 702949 : .entry(*key)
211 702949 : .or_insert(Ok(VectoredValueReconstructState::default()));
212 702949 : let is_sparse_key = NON_INHERITED_SPARSE_RANGE.contains(key);
213 702949 : if let Ok(state) = state {
214 702949 : let key_done = match state.situation {
215 : ValueReconstructSituation::Complete => {
216 35497 : if is_sparse_key {
217 : // Sparse keyspace might be visited multiple times because
218 : // we don't track unmapped keyspaces.
219 35497 : return ValueReconstructSituation::Complete;
220 : } else {
221 0 : unreachable!()
222 : }
223 : }
224 667452 : ValueReconstructSituation::Continue => match value {
225 666952 : Value::Image(img) => {
226 666952 : state.img = Some((lsn, img));
227 666952 : true
228 : }
229 500 : Value::WalRecord(rec) => {
230 500 : debug_assert!(
231 500 : Some(lsn) > state.get_cached_lsn(),
232 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
233 0 : lsn,
234 0 : state
235 0 : .get_cached_lsn()
236 0 : .expect("Assertion can only fire if a cached lsn is present")
237 : );
238 :
239 500 : let will_init = rec.will_init();
240 500 : state.records.push((lsn, rec));
241 500 : will_init
242 : }
243 : },
244 : };
245 :
246 667452 : if key_done && state.situation == ValueReconstructSituation::Continue {
247 666996 : state.situation = ValueReconstructSituation::Complete;
248 666996 : if !is_sparse_key {
249 603920 : self.keys_done.add_key(*key);
250 603920 : }
251 456 : }
252 :
253 667452 : state.situation
254 : } else {
255 0 : ValueReconstructSituation::Complete
256 : }
257 702949 : }
258 :
259 : /// Returns the Lsn at which this key is cached if one exists.
260 : /// The read path should go no further than this Lsn for the given key.
261 1126396 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
262 1126396 : self.keys
263 1126396 : .get(key)
264 1126396 : .and_then(|k| k.as_ref().ok())
265 1126396 : .and_then(|state| state.get_cached_lsn())
266 1126396 : }
267 :
268 : /// Returns the key space describing the keys that have
269 : /// been marked as completed since the last call to this function.
270 : /// Returns individual keys done, and the image layer coverage.
271 1698539 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
272 1698539 : (
273 1698539 : self.keys_done.consume_keyspace(),
274 1698539 : self.keys_with_image_coverage.take(),
275 1698539 : )
276 1698539 : }
277 : }
278 :
279 : impl Default for ValuesReconstructState {
280 246 : fn default() -> Self {
281 246 : Self::new()
282 246 : }
283 : }
284 :
285 : /// A key that uniquely identifies a layer in a timeline
286 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
287 : pub(crate) enum LayerId {
288 : PersitentLayerId(PersistentLayerKey),
289 : InMemoryLayerId(InMemoryLayerFileId),
290 : }
291 :
292 : /// Uniquely identify a layer visit by the layer
293 : /// and LSN floor (or start LSN) of the reads.
294 : /// The layer itself is not enough since we may
295 : /// have different LSN lower bounds for delta layer reads.
296 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
297 : struct LayerToVisitId {
298 : layer_id: LayerId,
299 : lsn_floor: Lsn,
300 : }
301 :
302 : /// Layer wrapper for the read path. Note that it is valid
303 : /// to use these layers even after external operations have
304 : /// been performed on them (compaction, freeze, etc.).
305 : #[derive(Debug)]
306 : pub(crate) enum ReadableLayer {
307 : PersistentLayer(Layer),
308 : InMemoryLayer(Arc<InMemoryLayer>),
309 : }
310 :
311 : /// A partial description of a read to be done.
312 : #[derive(Debug, Clone)]
313 : struct LayerVisit {
314 : /// An id used to resolve the readable layer within the fringe
315 : layer_to_visit_id: LayerToVisitId,
316 : /// Lsn range for the read, used for selecting the next read
317 : lsn_range: Range<Lsn>,
318 : }
319 :
320 : /// Data structure which maintains a fringe of layers for the
321 : /// read path. The fringe is the set of layers which intersects
322 : /// the current keyspace that the search is descending on.
323 : /// Each layer tracks the keyspace that intersects it.
324 : ///
325 : /// The fringe must appear sorted by Lsn. Hence, it uses
326 : /// a two layer indexing scheme.
327 : #[derive(Debug)]
328 : pub(crate) struct LayerFringe {
329 : planned_visits_by_lsn: BinaryHeap<LayerVisit>,
330 : visit_reads: HashMap<LayerToVisitId, LayerVisitReads>,
331 : }
332 :
333 : #[derive(Debug)]
334 : struct LayerVisitReads {
335 : layer: ReadableLayer,
336 : target_keyspace: KeySpaceRandomAccum,
337 : }
338 :
339 : impl LayerFringe {
340 852269 : pub(crate) fn new() -> Self {
341 852269 : LayerFringe {
342 852269 : planned_visits_by_lsn: BinaryHeap::new(),
343 852269 : visit_reads: HashMap::new(),
344 852269 : }
345 852269 : }
346 :
347 1698539 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
348 1698539 : let read_desc = match self.planned_visits_by_lsn.pop() {
349 846270 : Some(desc) => desc,
350 852269 : None => return None,
351 : };
352 :
353 846270 : let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);
354 846270 :
355 846270 : match removed {
356 : Some((
357 : _,
358 : LayerVisitReads {
359 846270 : layer,
360 846270 : mut target_keyspace,
361 846270 : },
362 846270 : )) => Some((
363 846270 : layer,
364 846270 : target_keyspace.consume_keyspace(),
365 846270 : read_desc.lsn_range,
366 846270 : )),
367 0 : None => unreachable!("fringe internals are always consistent"),
368 : }
369 1698539 : }
370 :
371 846284 : pub(crate) fn update(
372 846284 : &mut self,
373 846284 : layer: ReadableLayer,
374 846284 : keyspace: KeySpace,
375 846284 : lsn_range: Range<Lsn>,
376 846284 : ) {
377 846284 : let layer_to_visit_id = LayerToVisitId {
378 846284 : layer_id: layer.id(),
379 846284 : lsn_floor: lsn_range.start,
380 846284 : };
381 846284 :
382 846284 : let entry = self.visit_reads.entry(layer_to_visit_id.clone());
383 846284 : match entry {
384 14 : Entry::Occupied(mut entry) => {
385 14 : entry.get_mut().target_keyspace.add_keyspace(keyspace);
386 14 : }
387 846270 : Entry::Vacant(entry) => {
388 846270 : self.planned_visits_by_lsn.push(LayerVisit {
389 846270 : lsn_range,
390 846270 : layer_to_visit_id: layer_to_visit_id.clone(),
391 846270 : });
392 846270 : let mut accum = KeySpaceRandomAccum::new();
393 846270 : accum.add_keyspace(keyspace);
394 846270 : entry.insert(LayerVisitReads {
395 846270 : layer,
396 846270 : target_keyspace: accum,
397 846270 : });
398 846270 : }
399 : }
400 846284 : }
401 : }
402 :
403 : impl Default for LayerFringe {
404 0 : fn default() -> Self {
405 0 : Self::new()
406 0 : }
407 : }
408 :
409 : impl Ord for LayerVisit {
410 30 : fn cmp(&self, other: &Self) -> Ordering {
411 30 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
412 30 : if ord == std::cmp::Ordering::Equal {
413 22 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
414 : } else {
415 8 : ord
416 : }
417 30 : }
418 : }
419 :
420 : impl PartialOrd for LayerVisit {
421 30 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
422 30 : Some(self.cmp(other))
423 30 : }
424 : }
425 :
426 : impl PartialEq for LayerVisit {
427 0 : fn eq(&self, other: &Self) -> bool {
428 0 : self.lsn_range == other.lsn_range
429 0 : }
430 : }
431 :
432 : impl Eq for LayerVisit {}
433 :
434 : impl ReadableLayer {
435 846284 : pub(crate) fn id(&self) -> LayerId {
436 846284 : match self {
437 239934 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
438 606350 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
439 : }
440 846284 : }
441 :
442 846270 : pub(crate) async fn get_values_reconstruct_data(
443 846270 : &self,
444 846270 : keyspace: KeySpace,
445 846270 : lsn_range: Range<Lsn>,
446 846270 : reconstruct_state: &mut ValuesReconstructState,
447 846270 : ctx: &RequestContext,
448 846270 : ) -> Result<(), GetVectoredError> {
449 846270 : match self {
450 239920 : ReadableLayer::PersistentLayer(layer) => {
451 239920 : layer
452 239920 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
453 95970 : .await
454 : }
455 606350 : ReadableLayer::InMemoryLayer(layer) => {
456 606350 : layer
457 606350 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
458 87081 : .await
459 : }
460 : }
461 846270 : }
462 : }
463 :
464 : /// Layers contain a hint indicating whether they are likely to be used for reads.
465 : ///
466 : /// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
467 : /// when changing the visibility of layers (for example when creating a branch that makes some previously
468 : /// covered layers visible). It should be used for cache management but not for correctness-critical checks.
469 : #[derive(Debug, Clone, PartialEq, Eq)]
470 : pub enum LayerVisibilityHint {
471 : /// A Visible layer might be read while serving a read, because there is not an image layer between it
472 : /// and a readable LSN (the tip of the branch or a child's branch point)
473 : Visible,
474 : /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
475 : /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
476 : Covered,
477 : }
478 :
479 : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
480 :
481 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
482 : pub(crate) enum LayerAccessStatsReset {
483 : NoReset,
484 : AllStats,
485 : }
486 :
487 : impl Default for LayerAccessStats {
488 1756 : fn default() -> Self {
489 1756 : // Default value is to assume resident since creation time, and visible.
490 1756 : let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
491 1756 : value |= 0x1 << Self::VISIBILITY_SHIFT;
492 1756 :
493 1756 : Self(std::sync::atomic::AtomicU64::new(value))
494 1756 : }
495 : }
496 :
497 : // Efficient store of two very-low-resolution timestamps and some bits. Used for storing last access time and
498 : // last residence change time.
499 : impl LayerAccessStats {
500 : // How many high bits to drop from a u32 timestamp?
501 : // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
502 : // after that, this software has been very successful!)
503 : // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
504 : // stored in an i32, so they never used it.
505 : // - Dropping the next two bits is safe because this code is only running on systems in
506 : // years >= 2024, and these bits have been 1 since 2021
507 : //
508 : // Therefore we may store only 28 bits for a timestamp with one second resolution. We do
509 : // this truncation to make space for some flags in the high bits of our u64.
510 : const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
511 : const TS_MASK: u32 = 0x1f_ff_ff_ff;
512 : const TS_ONES: u32 = 0x60_00_00_00;
513 :
514 : const ATIME_SHIFT: u32 = 0;
515 : const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
516 : const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
517 :
518 240012 : fn write_bits(&self, mask: u64, value: u64) -> u64 {
519 240012 : self.0
520 240012 : .fetch_update(
521 240012 : // TODO: decide what orderings are correct
522 240012 : std::sync::atomic::Ordering::Relaxed,
523 240012 : std::sync::atomic::Ordering::Relaxed,
524 240012 : |v| Some((v & !mask) | (value & mask)),
525 240012 : )
526 240012 : .expect("Inner function is infallible")
527 240012 : }
528 :
529 241436 : fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
530 241436 : // Drop the low three bits of the timestamp, for an ~8s accuracy
531 241436 : let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
532 241436 :
533 241436 : ((Self::TS_MASK as u64) << shift, timestamp << shift)
534 241436 : }
535 :
536 62 : fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
537 62 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
538 62 :
539 62 : let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
540 62 : if ts_bits == 0 {
541 24 : None
542 : } else {
543 38 : Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
544 : }
545 62 : }
546 :
547 : /// Record a change in layer residency.
548 : ///
549 : /// Recording the event must happen while holding the layer map lock to
550 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
551 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
552 : ///
553 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
554 : /// the following race could happen:
555 : ///
556 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
557 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
558 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
559 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
560 26 : pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
561 26 : let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
562 26 : self.write_bits(mask, value);
563 26 : }
564 :
565 24 : pub(crate) fn record_residence_event(&self) {
566 24 : self.record_residence_event_at(SystemTime::now())
567 24 : }
568 :
569 239654 : fn record_access_at(&self, now: SystemTime) -> bool {
570 239654 : let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
571 239654 :
572 239654 : // A layer which is accessed must be visible.
573 239654 : mask |= 0x1 << Self::VISIBILITY_SHIFT;
574 239654 : value |= 0x1 << Self::VISIBILITY_SHIFT;
575 239654 :
576 239654 : let old_bits = self.write_bits(mask, value);
577 2 : !matches!(
578 239654 : self.decode_visibility(old_bits),
579 : LayerVisibilityHint::Visible
580 : )
581 239654 : }
582 :
583 : /// Returns true if we modified the layer's visibility to set it to Visible implicitly
584 : /// as a result of this access
585 239932 : pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {
586 239932 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
587 284 : return false;
588 239648 : }
589 239648 :
590 239648 : self.record_access_at(SystemTime::now())
591 239932 : }
592 :
593 0 : fn as_api_model(
594 0 : &self,
595 0 : reset: LayerAccessStatsReset,
596 0 : ) -> pageserver_api::models::LayerAccessStats {
597 0 : let ret = pageserver_api::models::LayerAccessStats {
598 0 : access_time: self
599 0 : .read_low_res_timestamp(Self::ATIME_SHIFT)
600 0 : .unwrap_or(UNIX_EPOCH),
601 0 : residence_time: self
602 0 : .read_low_res_timestamp(Self::RTIME_SHIFT)
603 0 : .unwrap_or(UNIX_EPOCH),
604 0 : visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
605 : };
606 0 : match reset {
607 0 : LayerAccessStatsReset::NoReset => {}
608 0 : LayerAccessStatsReset::AllStats => {
609 0 : self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
610 0 : self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
611 0 : }
612 : }
613 0 : ret
614 0 : }
615 :
616 : /// Get the latest access timestamp, falling back to latest residence event. The latest residence event
617 : /// will be this Layer's construction time, if its residence hasn't changed since then.
618 16 : pub(crate) fn latest_activity(&self) -> SystemTime {
619 16 : if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
620 6 : t
621 : } else {
622 10 : self.read_low_res_timestamp(Self::RTIME_SHIFT)
623 10 : .expect("Residence time is set on construction")
624 : }
625 16 : }
626 :
627 : /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
628 : ///
629 : /// This indicates whether the layer has been used for some purpose that would motivate
630 : /// us to keep it on disk, such as for serving a getpage request.
631 18 : fn accessed(&self) -> bool {
632 18 : // Consider it accessed if the most recent access is more recent than
633 18 : // the most recent change in residence status.
634 18 : match (
635 18 : self.read_low_res_timestamp(Self::ATIME_SHIFT),
636 18 : self.read_low_res_timestamp(Self::RTIME_SHIFT),
637 : ) {
638 14 : (None, _) => false,
639 0 : (Some(_), None) => true,
640 4 : (Some(a), Some(r)) => a >= r,
641 : }
642 18 : }
643 :
644 : /// Helper for extracting the visibility hint from the literal value of our inner u64
645 240571 : fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
646 240571 : match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
647 240549 : 1 => LayerVisibilityHint::Visible,
648 22 : 0 => LayerVisibilityHint::Covered,
649 0 : _ => unreachable!(),
650 : }
651 240571 : }
652 :
653 : /// Returns the old value which has been replaced
654 332 : pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
655 332 : let value = match visibility {
656 280 : LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
657 52 : LayerVisibilityHint::Covered => 0x0,
658 : };
659 :
660 332 : let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
661 332 : self.decode_visibility(old_bits)
662 332 : }
663 :
664 585 : pub(crate) fn visibility(&self) -> LayerVisibilityHint {
665 585 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
666 585 : self.decode_visibility(read)
667 585 : }
668 : }
669 :
670 : /// Get a layer descriptor from a layer.
671 : pub(crate) trait AsLayerDesc {
672 : /// Get the layer descriptor.
673 : fn layer_desc(&self) -> &PersistentLayerDesc;
674 : }
675 :
676 : pub mod tests {
677 : use pageserver_api::shard::TenantShardId;
678 : use utils::id::TimelineId;
679 :
680 : use super::*;
681 :
682 : impl From<DeltaLayerName> for PersistentLayerDesc {
683 0 : fn from(value: DeltaLayerName) -> Self {
684 0 : PersistentLayerDesc::new_delta(
685 0 : TenantShardId::from([0; 18]),
686 0 : TimelineId::from_array([0; 16]),
687 0 : value.key_range,
688 0 : value.lsn_range,
689 0 : 233,
690 0 : )
691 0 : }
692 : }
693 :
694 : impl From<ImageLayerName> for PersistentLayerDesc {
695 0 : fn from(value: ImageLayerName) -> Self {
696 0 : PersistentLayerDesc::new_img(
697 0 : TenantShardId::from([0; 18]),
698 0 : TimelineId::from_array([0; 16]),
699 0 : value.key_range,
700 0 : value.lsn,
701 0 : 233,
702 0 : )
703 0 : }
704 : }
705 :
706 : impl From<LayerName> for PersistentLayerDesc {
707 0 : fn from(value: LayerName) -> Self {
708 0 : match value {
709 0 : LayerName::Delta(d) => Self::from(d),
710 0 : LayerName::Image(i) => Self::from(i),
711 : }
712 0 : }
713 : }
714 : }
715 :
716 : /// Range wrapping newtype, which uses display to render Debug.
717 : ///
718 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
719 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
720 :
721 : impl<T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'_, T> {
722 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
723 0 : write!(f, "{}..{}", self.0.start, self.0.end)
724 0 : }
725 : }
|