Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod delta_layer;
4 : pub mod filter_iterator;
5 : pub mod image_layer;
6 : pub mod inmemory_layer;
7 : pub(crate) mod layer;
8 : mod layer_desc;
9 : mod layer_name;
10 : pub mod merge_iterator;
11 : pub mod split_writer;
12 :
13 : use crate::context::{AccessStatsBehavior, RequestContext};
14 : use crate::repository::Value;
15 : use crate::walrecord::NeonWalRecord;
16 : use bytes::Bytes;
17 : use pageserver_api::key::Key;
18 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
19 : use std::cmp::{Ordering, Reverse};
20 : use std::collections::hash_map::Entry;
21 : use std::collections::{BinaryHeap, HashMap};
22 : use std::ops::Range;
23 : use std::sync::Arc;
24 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
25 :
26 : use utils::lsn::Lsn;
27 :
28 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
29 : pub use image_layer::{ImageLayer, ImageLayerWriter};
30 : pub use inmemory_layer::InMemoryLayer;
31 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
32 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
33 :
34 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
35 :
36 : use self::inmemory_layer::InMemoryLayerFileId;
37 :
38 : use super::timeline::GetVectoredError;
39 : use super::PageReconstructError;
40 :
41 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
42 0 : where
43 0 : T: PartialOrd<T>,
44 0 : {
45 0 : if a.start < b.start {
46 0 : a.end > b.start
47 : } else {
48 0 : b.end > a.start
49 : }
50 0 : }
51 :
52 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
53 : ///
54 : /// Before first call, you can fill in 'page_img' if you have an older cached
55 : /// version of the page available. That can save work in
56 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
57 : /// when all the WAL records going back to the cached image have been collected.
58 : ///
59 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
60 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
61 : /// record that initializes the page without requiring a previous image.
62 : ///
63 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
64 : /// been collected, but there are more records outside the current layer. Pass
65 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
66 : /// call, to collect more records.
67 : ///
68 : #[derive(Debug, Default)]
69 : pub(crate) struct ValueReconstructState {
70 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
71 : pub(crate) img: Option<(Lsn, Bytes)>,
72 : }
73 :
74 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
75 : pub(crate) enum ValueReconstructSituation {
76 : Complete,
77 : #[default]
78 : Continue,
79 : }
80 :
81 : /// Reconstruct data accumulated for a single key during a vectored get
82 : #[derive(Debug, Default, Clone)]
83 : pub(crate) struct VectoredValueReconstructState {
84 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
85 : pub(crate) img: Option<(Lsn, Bytes)>,
86 :
87 : situation: ValueReconstructSituation,
88 : }
89 :
90 : impl VectoredValueReconstructState {
91 121096 : fn get_cached_lsn(&self) -> Option<Lsn> {
92 121096 : self.img.as_ref().map(|img| img.0)
93 121096 : }
94 : }
95 :
96 : impl From<VectoredValueReconstructState> for ValueReconstructState {
97 2000561 : fn from(mut state: VectoredValueReconstructState) -> Self {
98 2000561 : // walredo expects the records to be descending in terms of Lsn
99 2000561 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
100 2000561 :
101 2000561 : ValueReconstructState {
102 2000561 : records: state.records,
103 2000561 : img: state.img,
104 2000561 : }
105 2000561 : }
106 : }
107 :
108 : /// Bag of data accumulated during a vectored get..
109 : pub(crate) struct ValuesReconstructState {
110 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
111 : /// should not expect to get anything from this hashmap.
112 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
113 : /// The keys which are already retrieved
114 : keys_done: KeySpaceRandomAccum,
115 :
116 : /// The keys covered by the image layers
117 : keys_with_image_coverage: Option<Range<Key>>,
118 :
119 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
120 : layers_visited: u32,
121 : delta_layers_visited: u32,
122 : }
123 :
124 : impl ValuesReconstructState {
125 1881149 : pub(crate) fn new() -> Self {
126 1881149 : Self {
127 1881149 : keys: HashMap::new(),
128 1881149 : keys_done: KeySpaceRandomAccum::new(),
129 1881149 : keys_with_image_coverage: None,
130 1881149 : layers_visited: 0,
131 1881149 : delta_layers_visited: 0,
132 1881149 : }
133 1881149 : }
134 :
135 : /// Associate a key with the error which it encountered and mark it as done
136 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
137 0 : let previous = self.keys.insert(key, Err(err));
138 0 : if let Some(Ok(state)) = previous {
139 0 : if state.situation == ValueReconstructSituation::Continue {
140 0 : self.keys_done.add_key(key);
141 0 : }
142 0 : }
143 0 : }
144 :
145 2455570 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
146 2455570 : self.layers_visited += 1;
147 2455570 : if let ReadableLayer::PersistentLayer(layer) = layer {
148 635999 : if layer.layer_desc().is_delta() {
149 611933 : self.delta_layers_visited += 1;
150 611933 : }
151 1819571 : }
152 2455570 : }
153 :
154 600 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
155 600 : self.delta_layers_visited
156 600 : }
157 :
158 1880459 : pub(crate) fn get_layers_visited(&self) -> u32 {
159 1880459 : self.layers_visited
160 1880459 : }
161 :
162 : /// This function is called after reading a keyspace from a layer.
163 : /// It checks if the read path has now moved past the cached Lsn for any keys.
164 : ///
165 : /// Implementation note: We intentionally iterate over the keys for which we've
166 : /// already collected some reconstruct data. This avoids scaling complexity with
167 : /// the size of the search space.
168 2431504 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
169 2431504 : for (key, value) in self.keys.iter_mut() {
170 2073206 : if !keyspace.contains(key) {
171 126555 : continue;
172 1946651 : }
173 :
174 1946651 : if let Ok(state) = value {
175 1946651 : if state.situation != ValueReconstructSituation::Complete
176 978 : && state.get_cached_lsn() >= Some(advanced_to)
177 0 : {
178 0 : state.situation = ValueReconstructSituation::Complete;
179 0 : self.keys_done.add_key(*key);
180 1946651 : }
181 0 : }
182 : }
183 2431504 : }
184 :
185 : /// On hitting image layer, we can mark all keys in this range as done, because
186 : /// if the image layer does not contain a key, it is deleted/never added.
187 24102 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
188 24102 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
189 24102 : assert_eq!(
190 : prev_val, None,
191 0 : "should consume the keyspace before the next iteration"
192 : );
193 24102 : }
194 :
195 : /// Update the state collected for a given key.
196 : /// Returns true if this was the last value needed for the key and false otherwise.
197 : ///
198 : /// If the key is done after the update, mark it as such.
199 2002043 : pub(crate) fn update_key(
200 2002043 : &mut self,
201 2002043 : key: &Key,
202 2002043 : lsn: Lsn,
203 2002043 : value: Value,
204 2002043 : ) -> ValueReconstructSituation {
205 2002043 : let state = self
206 2002043 : .keys
207 2002043 : .entry(*key)
208 2002043 : .or_insert(Ok(VectoredValueReconstructState::default()));
209 :
210 2002043 : if let Ok(state) = state {
211 2002043 : let key_done = match state.situation {
212 0 : ValueReconstructSituation::Complete => unreachable!(),
213 2002043 : ValueReconstructSituation::Continue => match value {
214 2000759 : Value::Image(img) => {
215 2000759 : state.img = Some((lsn, img));
216 2000759 : true
217 : }
218 1284 : Value::WalRecord(rec) => {
219 1284 : debug_assert!(
220 1284 : Some(lsn) > state.get_cached_lsn(),
221 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
222 0 : lsn,
223 0 : state
224 0 : .get_cached_lsn()
225 0 : .expect("Assertion can only fire if a cached lsn is present")
226 : );
227 :
228 1284 : let will_init = rec.will_init();
229 1284 : state.records.push((lsn, rec));
230 1284 : will_init
231 : }
232 : },
233 : };
234 :
235 2002043 : if key_done && state.situation == ValueReconstructSituation::Continue {
236 2000771 : state.situation = ValueReconstructSituation::Complete;
237 2000771 : self.keys_done.add_key(*key);
238 2000771 : }
239 :
240 2002043 : state.situation
241 : } else {
242 0 : ValueReconstructSituation::Complete
243 : }
244 2002043 : }
245 :
246 : /// Returns the Lsn at which this key is cached if one exists.
247 : /// The read path should go no further than this Lsn for the given key.
248 3246198 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
249 3246198 : self.keys
250 3246198 : .get(key)
251 3246198 : .and_then(|k| k.as_ref().ok())
252 3246198 : .and_then(|state| state.get_cached_lsn())
253 3246198 : }
254 :
255 : /// Returns the key space describing the keys that have
256 : /// been marked as completed since the last call to this function.
257 : /// Returns individual keys done, and the image layer coverage.
258 5009742 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
259 5009742 : (
260 5009742 : self.keys_done.consume_keyspace(),
261 5009742 : self.keys_with_image_coverage.take(),
262 5009742 : )
263 5009742 : }
264 : }
265 :
266 : impl Default for ValuesReconstructState {
267 774 : fn default() -> Self {
268 774 : Self::new()
269 774 : }
270 : }
271 :
272 : /// A key that uniquely identifies a layer in a timeline
273 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
274 : pub(crate) enum LayerId {
275 : PersitentLayerId(PersistentLayerKey),
276 : InMemoryLayerId(InMemoryLayerFileId),
277 : }
278 :
279 : /// Uniquely identify a layer visit by the layer
280 : /// and LSN floor (or start LSN) of the reads.
281 : /// The layer itself is not enough since we may
282 : /// have different LSN lower bounds for delta layer reads.
283 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
284 : struct LayerToVisitId {
285 : layer_id: LayerId,
286 : lsn_floor: Lsn,
287 : }
288 :
289 : /// Layer wrapper for the read path. Note that it is valid
290 : /// to use these layers even after external operations have
291 : /// been performed on them (compaction, freeze, etc.).
292 : #[derive(Debug)]
293 : pub(crate) enum ReadableLayer {
294 : PersistentLayer(Layer),
295 : InMemoryLayer(Arc<InMemoryLayer>),
296 : }
297 :
298 : /// A partial description of a read to be done.
299 : #[derive(Debug, Clone)]
300 : struct LayerVisit {
301 : /// An id used to resolve the readable layer within the fringe
302 : layer_to_visit_id: LayerToVisitId,
303 : /// Lsn range for the read, used for selecting the next read
304 : lsn_range: Range<Lsn>,
305 : }
306 :
307 : /// Data structure which maintains a fringe of layers for the
308 : /// read path. The fringe is the set of layers which intersects
309 : /// the current keyspace that the search is descending on.
310 : /// Each layer tracks the keyspace that intersects it.
311 : ///
312 : /// The fringe must appear sorted by Lsn. Hence, it uses
313 : /// a two layer indexing scheme.
314 : #[derive(Debug)]
315 : pub(crate) struct LayerFringe {
316 : planned_visits_by_lsn: BinaryHeap<LayerVisit>,
317 : visit_reads: HashMap<LayerToVisitId, LayerVisitReads>,
318 : }
319 :
320 : #[derive(Debug)]
321 : struct LayerVisitReads {
322 : layer: ReadableLayer,
323 : target_keyspace: KeySpaceRandomAccum,
324 : }
325 :
326 : impl LayerFringe {
327 2554172 : pub(crate) fn new() -> Self {
328 2554172 : LayerFringe {
329 2554172 : planned_visits_by_lsn: BinaryHeap::new(),
330 2554172 : visit_reads: HashMap::new(),
331 2554172 : }
332 2554172 : }
333 :
334 5009742 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
335 5009742 : let read_desc = match self.planned_visits_by_lsn.pop() {
336 2455570 : Some(desc) => desc,
337 2554172 : None => return None,
338 : };
339 :
340 2455570 : let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);
341 2455570 :
342 2455570 : match removed {
343 : Some((
344 : _,
345 : LayerVisitReads {
346 2455570 : layer,
347 2455570 : mut target_keyspace,
348 2455570 : },
349 2455570 : )) => Some((
350 2455570 : layer,
351 2455570 : target_keyspace.consume_keyspace(),
352 2455570 : read_desc.lsn_range,
353 2455570 : )),
354 0 : None => unreachable!("fringe internals are always consistent"),
355 : }
356 5009742 : }
357 :
358 2646745 : pub(crate) fn update(
359 2646745 : &mut self,
360 2646745 : layer: ReadableLayer,
361 2646745 : keyspace: KeySpace,
362 2646745 : lsn_range: Range<Lsn>,
363 2646745 : ) {
364 2646745 : let layer_to_visit_id = LayerToVisitId {
365 2646745 : layer_id: layer.id(),
366 2646745 : lsn_floor: lsn_range.start,
367 2646745 : };
368 2646745 :
369 2646745 : let entry = self.visit_reads.entry(layer_to_visit_id.clone());
370 2646745 : match entry {
371 191175 : Entry::Occupied(mut entry) => {
372 191175 : entry.get_mut().target_keyspace.add_keyspace(keyspace);
373 191175 : }
374 2455570 : Entry::Vacant(entry) => {
375 2455570 : self.planned_visits_by_lsn.push(LayerVisit {
376 2455570 : lsn_range,
377 2455570 : layer_to_visit_id: layer_to_visit_id.clone(),
378 2455570 : });
379 2455570 : let mut accum = KeySpaceRandomAccum::new();
380 2455570 : accum.add_keyspace(keyspace);
381 2455570 : entry.insert(LayerVisitReads {
382 2455570 : layer,
383 2455570 : target_keyspace: accum,
384 2455570 : });
385 2455570 : }
386 : }
387 2646745 : }
388 : }
389 :
390 : impl Default for LayerFringe {
391 0 : fn default() -> Self {
392 0 : Self::new()
393 0 : }
394 : }
395 :
396 : impl Ord for LayerVisit {
397 66 : fn cmp(&self, other: &Self) -> Ordering {
398 66 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
399 66 : if ord == std::cmp::Ordering::Equal {
400 54 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
401 : } else {
402 12 : ord
403 : }
404 66 : }
405 : }
406 :
407 : impl PartialOrd for LayerVisit {
408 66 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
409 66 : Some(self.cmp(other))
410 66 : }
411 : }
412 :
413 : impl PartialEq for LayerVisit {
414 0 : fn eq(&self, other: &Self) -> bool {
415 0 : self.lsn_range == other.lsn_range
416 0 : }
417 : }
418 :
419 : impl Eq for LayerVisit {}
420 :
421 : impl ReadableLayer {
422 2646745 : pub(crate) fn id(&self) -> LayerId {
423 2646745 : match self {
424 827174 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
425 1819571 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
426 : }
427 2646745 : }
428 :
429 2455570 : pub(crate) async fn get_values_reconstruct_data(
430 2455570 : &self,
431 2455570 : keyspace: KeySpace,
432 2455570 : lsn_range: Range<Lsn>,
433 2455570 : reconstruct_state: &mut ValuesReconstructState,
434 2455570 : ctx: &RequestContext,
435 2455570 : ) -> Result<(), GetVectoredError> {
436 2455570 : match self {
437 635999 : ReadableLayer::PersistentLayer(layer) => {
438 635999 : layer
439 635999 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
440 286619 : .await
441 : }
442 1819571 : ReadableLayer::InMemoryLayer(layer) => {
443 1819571 : layer
444 1819571 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
445 261439 : .await
446 : }
447 : }
448 2455570 : }
449 : }
450 :
451 : /// Layers contain a hint indicating whether they are likely to be used for reads.
452 : ///
453 : /// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
454 : /// when changing the visibility of layers (for example when creating a branch that makes some previously
455 : /// covered layers visible). It should be used for cache management but not for correctness-critical checks.
456 : #[derive(Debug, Clone, PartialEq, Eq)]
457 : pub enum LayerVisibilityHint {
458 : /// A Visible layer might be read while serving a read, because there is not an image layer between it
459 : /// and a readable LSN (the tip of the branch or a child's branch point)
460 : Visible,
461 : /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates
462 : /// a branch or ephemeral endpoint at an LSN below the layer that covers this.
463 : Covered,
464 : }
465 :
466 : pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64);
467 :
468 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
469 : pub(crate) enum LayerAccessStatsReset {
470 : NoReset,
471 : AllStats,
472 : }
473 :
474 : impl Default for LayerAccessStats {
475 5214 : fn default() -> Self {
476 5214 : // Default value is to assume resident since creation time, and visible.
477 5214 : let (_mask, mut value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, SystemTime::now());
478 5214 : value |= 0x1 << Self::VISIBILITY_SHIFT;
479 5214 :
480 5214 : Self(std::sync::atomic::AtomicU64::new(value))
481 5214 : }
482 : }
483 :
484 : // Efficient store of two very-low-resolution timestamps and some bits. Used for storing last access time and
485 : // last residence change time.
486 : impl LayerAccessStats {
487 : // How many high bits to drop from a u32 timestamp?
488 : // - Only storing up to a u32 timestamp will work fine until 2038 (if this code is still in use
489 : // after that, this software has been very successful!)
490 : // - Dropping the top bit is implicitly safe because unix timestamps are meant to be
491 : // stored in an i32, so they never used it.
492 : // - Dropping the next two bits is safe because this code is only running on systems in
493 : // years >= 2024, and these bits have been 1 since 2021
494 : //
495 : // Therefore we may store only 28 bits for a timestamp with one second resolution. We do
496 : // this truncation to make space for some flags in the high bits of our u64.
497 : const TS_DROP_HIGH_BITS: u32 = u32::count_ones(Self::TS_ONES) + 1;
498 : const TS_MASK: u32 = 0x1f_ff_ff_ff;
499 : const TS_ONES: u32 = 0x60_00_00_00;
500 :
501 : const ATIME_SHIFT: u32 = 0;
502 : const RTIME_SHIFT: u32 = 32 - Self::TS_DROP_HIGH_BITS;
503 : const VISIBILITY_SHIFT: u32 = 64 - 2 * Self::TS_DROP_HIGH_BITS;
504 :
505 637481 : fn write_bits(&self, mask: u64, value: u64) -> u64 {
506 637481 : self.0
507 637481 : .fetch_update(
508 637481 : // TODO: decide what orderings are correct
509 637481 : std::sync::atomic::Ordering::Relaxed,
510 637481 : std::sync::atomic::Ordering::Relaxed,
511 637481 : |v| Some((v & !mask) | (value & mask)),
512 637481 : )
513 637481 : .expect("Inner function is infallible")
514 637481 : }
515 :
516 641723 : fn to_low_res_timestamp(shift: u32, time: SystemTime) -> (u64, u64) {
517 641723 : // Drop the low three bits of the timestamp, for an ~8s accuracy
518 641723 : let timestamp = time.duration_since(UNIX_EPOCH).unwrap().as_secs() & (Self::TS_MASK as u64);
519 641723 :
520 641723 : ((Self::TS_MASK as u64) << shift, timestamp << shift)
521 641723 : }
522 :
523 186 : fn read_low_res_timestamp(&self, shift: u32) -> Option<SystemTime> {
524 186 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
525 186 :
526 186 : let ts_bits = (read & ((Self::TS_MASK as u64) << shift)) >> shift;
527 186 : if ts_bits == 0 {
528 72 : None
529 : } else {
530 114 : Some(UNIX_EPOCH + Duration::from_secs(ts_bits | (Self::TS_ONES as u64)))
531 : }
532 186 : }
533 :
534 : /// Record a change in layer residency.
535 : ///
536 : /// Recording the event must happen while holding the layer map lock to
537 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
538 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
539 : ///
540 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
541 : /// the following race could happen:
542 : ///
543 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
544 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
545 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
546 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
547 78 : pub(crate) fn record_residence_event_at(&self, now: SystemTime) {
548 78 : let (mask, value) = Self::to_low_res_timestamp(Self::RTIME_SHIFT, now);
549 78 : self.write_bits(mask, value);
550 78 : }
551 :
552 72 : pub(crate) fn record_residence_event(&self) {
553 72 : self.record_residence_event_at(SystemTime::now())
554 72 : }
555 :
556 636431 : fn record_access_at(&self, now: SystemTime) -> bool {
557 636431 : let (mut mask, mut value) = Self::to_low_res_timestamp(Self::ATIME_SHIFT, now);
558 636431 :
559 636431 : // A layer which is accessed must be visible.
560 636431 : mask |= 0x1 << Self::VISIBILITY_SHIFT;
561 636431 : value |= 0x1 << Self::VISIBILITY_SHIFT;
562 636431 :
563 636431 : let old_bits = self.write_bits(mask, value);
564 6 : !matches!(
565 636431 : self.decode_visibility(old_bits),
566 : LayerVisibilityHint::Visible
567 : )
568 636431 : }
569 :
570 : /// Returns true if we modified the layer's visibility to set it to Visible implicitly
571 : /// as a result of this access
572 637241 : pub(crate) fn record_access(&self, ctx: &RequestContext) -> bool {
573 637241 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
574 828 : return false;
575 636413 : }
576 636413 :
577 636413 : self.record_access_at(SystemTime::now())
578 637241 : }
579 :
580 0 : fn as_api_model(
581 0 : &self,
582 0 : reset: LayerAccessStatsReset,
583 0 : ) -> pageserver_api::models::LayerAccessStats {
584 0 : let ret = pageserver_api::models::LayerAccessStats {
585 0 : access_time: self
586 0 : .read_low_res_timestamp(Self::ATIME_SHIFT)
587 0 : .unwrap_or(UNIX_EPOCH),
588 0 : residence_time: self
589 0 : .read_low_res_timestamp(Self::RTIME_SHIFT)
590 0 : .unwrap_or(UNIX_EPOCH),
591 0 : visible: matches!(self.visibility(), LayerVisibilityHint::Visible),
592 : };
593 0 : match reset {
594 0 : LayerAccessStatsReset::NoReset => {}
595 0 : LayerAccessStatsReset::AllStats => {
596 0 : self.write_bits((Self::TS_MASK as u64) << Self::ATIME_SHIFT, 0x0);
597 0 : self.write_bits((Self::TS_MASK as u64) << Self::RTIME_SHIFT, 0x0);
598 0 : }
599 : }
600 0 : ret
601 0 : }
602 :
603 : /// Get the latest access timestamp, falling back to latest residence event. The latest residence event
604 : /// will be this Layer's construction time, if its residence hasn't changed since then.
605 48 : pub(crate) fn latest_activity(&self) -> SystemTime {
606 48 : if let Some(t) = self.read_low_res_timestamp(Self::ATIME_SHIFT) {
607 18 : t
608 : } else {
609 30 : self.read_low_res_timestamp(Self::RTIME_SHIFT)
610 30 : .expect("Residence time is set on construction")
611 : }
612 48 : }
613 :
614 : /// Whether this layer has been accessed (excluding in [`AccessStatsBehavior::Skip`]).
615 : ///
616 : /// This indicates whether the layer has been used for some purpose that would motivate
617 : /// us to keep it on disk, such as for serving a getpage request.
618 54 : fn accessed(&self) -> bool {
619 54 : // Consider it accessed if the most recent access is more recent than
620 54 : // the most recent change in residence status.
621 54 : match (
622 54 : self.read_low_res_timestamp(Self::ATIME_SHIFT),
623 54 : self.read_low_res_timestamp(Self::RTIME_SHIFT),
624 : ) {
625 42 : (None, _) => false,
626 0 : (Some(_), None) => true,
627 12 : (Some(a), Some(r)) => a >= r,
628 : }
629 54 : }
630 :
631 : /// Helper for extracting the visibility hint from the literal value of our inner u64
632 639151 : fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint {
633 639151 : match (bits >> Self::VISIBILITY_SHIFT) & 0x1 {
634 639091 : 1 => LayerVisibilityHint::Visible,
635 60 : 0 => LayerVisibilityHint::Covered,
636 0 : _ => unreachable!(),
637 : }
638 639151 : }
639 :
640 : /// Returns the old value which has been replaced
641 972 : pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint {
642 972 : let value = match visibility {
643 840 : LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT,
644 132 : LayerVisibilityHint::Covered => 0x0,
645 : };
646 :
647 972 : let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value);
648 972 : self.decode_visibility(old_bits)
649 972 : }
650 :
651 1748 : pub(crate) fn visibility(&self) -> LayerVisibilityHint {
652 1748 : let read = self.0.load(std::sync::atomic::Ordering::Relaxed);
653 1748 : self.decode_visibility(read)
654 1748 : }
655 : }
656 :
657 : /// Get a layer descriptor from a layer.
658 : pub(crate) trait AsLayerDesc {
659 : /// Get the layer descriptor.
660 : fn layer_desc(&self) -> &PersistentLayerDesc;
661 : }
662 :
663 : pub mod tests {
664 : use pageserver_api::shard::TenantShardId;
665 : use utils::id::TimelineId;
666 :
667 : use super::*;
668 :
669 : impl From<DeltaLayerName> for PersistentLayerDesc {
670 0 : fn from(value: DeltaLayerName) -> Self {
671 0 : PersistentLayerDesc::new_delta(
672 0 : TenantShardId::from([0; 18]),
673 0 : TimelineId::from_array([0; 16]),
674 0 : value.key_range,
675 0 : value.lsn_range,
676 0 : 233,
677 0 : )
678 0 : }
679 : }
680 :
681 : impl From<ImageLayerName> for PersistentLayerDesc {
682 0 : fn from(value: ImageLayerName) -> Self {
683 0 : PersistentLayerDesc::new_img(
684 0 : TenantShardId::from([0; 18]),
685 0 : TimelineId::from_array([0; 16]),
686 0 : value.key_range,
687 0 : value.lsn,
688 0 : 233,
689 0 : )
690 0 : }
691 : }
692 :
693 : impl From<LayerName> for PersistentLayerDesc {
694 0 : fn from(value: LayerName) -> Self {
695 0 : match value {
696 0 : LayerName::Delta(d) => Self::from(d),
697 0 : LayerName::Image(i) => Self::from(i),
698 : }
699 0 : }
700 : }
701 : }
702 :
703 : /// Range wrapping newtype, which uses display to render Debug.
704 : ///
705 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
706 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
707 :
708 : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
709 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
710 0 : write!(f, "{}..{}", self.0.start, self.0.end)
711 0 : }
712 : }
|