Line data Source code
1 : //! Common traits and structs for layers
2 :
3 : pub mod delta_layer;
4 : pub mod image_layer;
5 : pub(crate) mod inmemory_layer;
6 : pub(crate) mod layer;
7 : mod layer_desc;
8 : mod layer_name;
9 :
10 : use crate::context::{AccessStatsBehavior, RequestContext};
11 : use crate::repository::Value;
12 : use crate::task_mgr::TaskKind;
13 : use crate::walrecord::NeonWalRecord;
14 : use bytes::Bytes;
15 : use enum_map::EnumMap;
16 : use enumset::EnumSet;
17 : use once_cell::sync::Lazy;
18 : use pageserver_api::key::Key;
19 : use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
20 : use pageserver_api::models::{
21 : LayerAccessKind, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
22 : };
23 : use std::borrow::Cow;
24 : use std::cmp::{Ordering, Reverse};
25 : use std::collections::hash_map::Entry;
26 : use std::collections::{BinaryHeap, HashMap};
27 : use std::ops::Range;
28 : use std::sync::{Arc, Mutex};
29 : use std::time::{Duration, SystemTime, UNIX_EPOCH};
30 : use tracing::warn;
31 : use utils::history_buffer::HistoryBufferWithDropCounter;
32 : use utils::rate_limit::RateLimit;
33 :
34 : use utils::{id::TimelineId, lsn::Lsn};
35 :
36 : pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef};
37 : pub use image_layer::{ImageLayer, ImageLayerWriter};
38 : pub use inmemory_layer::InMemoryLayer;
39 : pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey};
40 : pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName};
41 :
42 : pub(crate) use layer::{EvictionError, Layer, ResidentLayer};
43 :
44 : use self::inmemory_layer::InMemoryLayerFileId;
45 :
46 : use super::timeline::GetVectoredError;
47 : use super::PageReconstructError;
48 :
49 0 : pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
50 0 : where
51 0 : T: PartialOrd<T>,
52 0 : {
53 0 : if a.start < b.start {
54 0 : a.end > b.start
55 : } else {
56 0 : b.end > a.start
57 : }
58 0 : }
59 :
60 : /// Struct used to communicate across calls to 'get_value_reconstruct_data'.
61 : ///
62 : /// Before first call, you can fill in 'page_img' if you have an older cached
63 : /// version of the page available. That can save work in
64 : /// 'get_value_reconstruct_data', as it can stop searching for page versions
65 : /// when all the WAL records going back to the cached image have been collected.
66 : ///
67 : /// When get_value_reconstruct_data returns Complete, 'img' is set to an image
68 : /// of the page, or the oldest WAL record in 'records' is a will_init-type
69 : /// record that initializes the page without requiring a previous image.
70 : ///
71 : /// If 'get_page_reconstruct_data' returns Continue, some 'records' may have
72 : /// been collected, but there are more records outside the current layer. Pass
73 : /// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
74 : /// call, to collect more records.
75 : ///
76 : #[derive(Debug, Default)]
77 : pub struct ValueReconstructState {
78 : pub records: Vec<(Lsn, NeonWalRecord)>,
79 : pub img: Option<(Lsn, Bytes)>,
80 : }
81 :
82 : #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
83 : pub(crate) enum ValueReconstructSituation {
84 : Complete,
85 : #[default]
86 : Continue,
87 : }
88 :
89 : /// Reconstruct data accumulated for a single key during a vectored get
90 : #[derive(Debug, Default, Clone)]
91 : pub(crate) struct VectoredValueReconstructState {
92 : pub(crate) records: Vec<(Lsn, NeonWalRecord)>,
93 : pub(crate) img: Option<(Lsn, Bytes)>,
94 :
95 : situation: ValueReconstructSituation,
96 : }
97 :
98 : impl VectoredValueReconstructState {
99 39705 : fn get_cached_lsn(&self) -> Option<Lsn> {
100 39705 : self.img.as_ref().map(|img| img.0)
101 39705 : }
102 : }
103 :
104 : impl From<VectoredValueReconstructState> for ValueReconstructState {
105 78418 : fn from(mut state: VectoredValueReconstructState) -> Self {
106 78418 : // walredo expects the records to be descending in terms of Lsn
107 78418 : state.records.sort_by_key(|(lsn, _)| Reverse(*lsn));
108 78418 :
109 78418 : ValueReconstructState {
110 78418 : records: state.records,
111 78418 : img: state.img,
112 78418 : }
113 78418 : }
114 : }
115 :
116 : /// Bag of data accumulated during a vectored get..
117 : pub(crate) struct ValuesReconstructState {
118 : /// The keys will be removed after `get_vectored` completes. The caller outside `Timeline`
119 : /// should not expect to get anything from this hashmap.
120 : pub(crate) keys: HashMap<Key, Result<VectoredValueReconstructState, PageReconstructError>>,
121 : /// The keys which are already retrieved
122 : keys_done: KeySpaceRandomAccum,
123 :
124 : /// The keys covered by the image layers
125 : keys_with_image_coverage: Option<Range<Key>>,
126 :
127 : // Statistics that are still accessible as a caller of `get_vectored_impl`.
128 : layers_visited: u32,
129 : delta_layers_visited: u32,
130 : }
131 :
132 : impl ValuesReconstructState {
133 672 : pub(crate) fn new() -> Self {
134 672 : Self {
135 672 : keys: HashMap::new(),
136 672 : keys_done: KeySpaceRandomAccum::new(),
137 672 : keys_with_image_coverage: None,
138 672 : layers_visited: 0,
139 672 : delta_layers_visited: 0,
140 672 : }
141 672 : }
142 :
143 : /// Associate a key with the error which it encountered and mark it as done
144 0 : pub(crate) fn on_key_error(&mut self, key: Key, err: PageReconstructError) {
145 0 : let previous = self.keys.insert(key, Err(err));
146 0 : if let Some(Ok(state)) = previous {
147 0 : if state.situation == ValueReconstructSituation::Continue {
148 0 : self.keys_done.add_key(key);
149 0 : }
150 0 : }
151 0 : }
152 :
153 364 : pub(crate) fn on_layer_visited(&mut self, layer: &ReadableLayer) {
154 364 : self.layers_visited += 1;
155 364 : if let ReadableLayer::PersistentLayer(layer) = layer {
156 354 : if layer.layer_desc().is_delta() {
157 252 : self.delta_layers_visited += 1;
158 252 : }
159 10 : }
160 364 : }
161 :
162 372 : pub(crate) fn get_delta_layers_visited(&self) -> u32 {
163 372 : self.delta_layers_visited
164 372 : }
165 :
166 460 : pub(crate) fn get_layers_visited(&self) -> u32 {
167 460 : self.layers_visited
168 460 : }
169 :
170 : /// This function is called after reading a keyspace from a layer.
171 : /// It checks if the read path has now moved past the cached Lsn for any keys.
172 : ///
173 : /// Implementation note: We intentionally iterate over the keys for which we've
174 : /// already collected some reconstruct data. This avoids scaling complexity with
175 : /// the size of the search space.
176 262 : pub(crate) fn on_lsn_advanced(&mut self, keyspace: &KeySpace, advanced_to: Lsn) {
177 72328 : for (key, value) in self.keys.iter_mut() {
178 72328 : if !keyspace.contains(key) {
179 42171 : continue;
180 30157 : }
181 :
182 30157 : if let Ok(state) = value {
183 30157 : if state.situation != ValueReconstructSituation::Complete
184 0 : && state.get_cached_lsn() >= Some(advanced_to)
185 0 : {
186 0 : state.situation = ValueReconstructSituation::Complete;
187 0 : self.keys_done.add_key(*key);
188 30157 : }
189 0 : }
190 : }
191 262 : }
192 :
193 : /// On hitting image layer, we can mark all keys in this range as done, because
194 : /// if the image layer does not contain a key, it is deleted/never added.
195 102 : pub(crate) fn on_image_layer_visited(&mut self, key_range: &Range<Key>) {
196 102 : let prev_val = self.keys_with_image_coverage.replace(key_range.clone());
197 102 : assert_eq!(
198 : prev_val, None,
199 0 : "should consume the keyspace before the next iteration"
200 : );
201 102 : }
202 :
203 : /// Update the state collected for a given key.
204 : /// Returns true if this was the last value needed for the key and false otherwise.
205 : ///
206 : /// If the key is done after the update, mark it as such.
207 78418 : pub(crate) fn update_key(
208 78418 : &mut self,
209 78418 : key: &Key,
210 78418 : lsn: Lsn,
211 78418 : value: Value,
212 78418 : ) -> ValueReconstructSituation {
213 78418 : let state = self
214 78418 : .keys
215 78418 : .entry(*key)
216 78418 : .or_insert(Ok(VectoredValueReconstructState::default()));
217 :
218 78418 : if let Ok(state) = state {
219 78418 : let key_done = match state.situation {
220 0 : ValueReconstructSituation::Complete => unreachable!(),
221 78418 : ValueReconstructSituation::Continue => match value {
222 78418 : Value::Image(img) => {
223 78418 : state.img = Some((lsn, img));
224 78418 : true
225 : }
226 0 : Value::WalRecord(rec) => {
227 0 : debug_assert!(
228 0 : Some(lsn) > state.get_cached_lsn(),
229 0 : "Attempt to collect a record below cached LSN for walredo: {} < {}",
230 0 : lsn,
231 0 : state
232 0 : .get_cached_lsn()
233 0 : .expect("Assertion can only fire if a cached lsn is present")
234 : );
235 :
236 0 : let will_init = rec.will_init();
237 0 : state.records.push((lsn, rec));
238 0 : will_init
239 : }
240 : },
241 : };
242 :
243 78418 : if key_done && state.situation == ValueReconstructSituation::Continue {
244 78418 : state.situation = ValueReconstructSituation::Complete;
245 78418 : self.keys_done.add_key(*key);
246 78418 : }
247 :
248 78418 : state.situation
249 : } else {
250 0 : ValueReconstructSituation::Complete
251 : }
252 78418 : }
253 :
254 : /// Returns the Lsn at which this key is cached if one exists.
255 : /// The read path should go no further than this Lsn for the given key.
256 191551 : pub(crate) fn get_cached_lsn(&self, key: &Key) -> Option<Lsn> {
257 191551 : self.keys
258 191551 : .get(key)
259 191551 : .and_then(|k| k.as_ref().ok())
260 191551 : .and_then(|state| state.get_cached_lsn())
261 191551 : }
262 :
263 : /// Returns the key space describing the keys that have
264 : /// been marked as completed since the last call to this function.
265 : /// Returns individual keys done, and the image layer coverage.
266 868 : pub(crate) fn consume_done_keys(&mut self) -> (KeySpace, Option<Range<Key>>) {
267 868 : (
268 868 : self.keys_done.consume_keyspace(),
269 868 : self.keys_with_image_coverage.take(),
270 868 : )
271 868 : }
272 : }
273 :
274 : impl Default for ValuesReconstructState {
275 402 : fn default() -> Self {
276 402 : Self::new()
277 402 : }
278 : }
279 :
280 : /// A key that uniquely identifies a layer in a timeline
281 : #[derive(Debug, PartialEq, Eq, Clone, Hash)]
282 : pub(crate) enum LayerId {
283 : PersitentLayerId(PersistentLayerKey),
284 : InMemoryLayerId(InMemoryLayerFileId),
285 : }
286 :
287 : /// Layer wrapper for the read path. Note that it is valid
288 : /// to use these layers even after external operations have
289 : /// been performed on them (compaction, freeze, etc.).
290 : #[derive(Debug)]
291 : pub(crate) enum ReadableLayer {
292 : PersistentLayer(Layer),
293 : InMemoryLayer(Arc<InMemoryLayer>),
294 : }
295 :
296 : /// A partial description of a read to be done.
297 : #[derive(Debug, Clone)]
298 : struct ReadDesc {
299 : /// An id used to resolve the readable layer within the fringe
300 : layer_id: LayerId,
301 : /// Lsn range for the read, used for selecting the next read
302 : lsn_range: Range<Lsn>,
303 : }
304 :
305 : /// Data structure which maintains a fringe of layers for the
306 : /// read path. The fringe is the set of layers which intersects
307 : /// the current keyspace that the search is descending on.
308 : /// Each layer tracks the keyspace that intersects it.
309 : ///
310 : /// The fringe must appear sorted by Lsn. Hence, it uses
311 : /// a two layer indexing scheme.
312 : #[derive(Debug)]
313 : pub(crate) struct LayerFringe {
314 : planned_reads_by_lsn: BinaryHeap<ReadDesc>,
315 : layers: HashMap<LayerId, LayerKeyspace>,
316 : }
317 :
318 : #[derive(Debug)]
319 : struct LayerKeyspace {
320 : layer: ReadableLayer,
321 : target_keyspace: KeySpace,
322 : }
323 :
324 : impl LayerFringe {
325 504 : pub(crate) fn new() -> Self {
326 504 : LayerFringe {
327 504 : planned_reads_by_lsn: BinaryHeap::new(),
328 504 : layers: HashMap::new(),
329 504 : }
330 504 : }
331 :
332 868 : pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
333 868 : let read_desc = match self.planned_reads_by_lsn.pop() {
334 364 : Some(desc) => desc,
335 504 : None => return None,
336 : };
337 :
338 364 : let removed = self.layers.remove_entry(&read_desc.layer_id);
339 364 : match removed {
340 : Some((
341 : _,
342 : LayerKeyspace {
343 364 : layer,
344 364 : target_keyspace,
345 364 : },
346 364 : )) => Some((layer, target_keyspace, read_desc.lsn_range)),
347 0 : None => unreachable!("fringe internals are always consistent"),
348 : }
349 868 : }
350 :
351 64140 : pub(crate) fn update(
352 64140 : &mut self,
353 64140 : layer: ReadableLayer,
354 64140 : keyspace: KeySpace,
355 64140 : lsn_range: Range<Lsn>,
356 64140 : ) {
357 64140 : let layer_id = layer.id();
358 64140 : let entry = self.layers.entry(layer_id.clone());
359 64140 : match entry {
360 63776 : Entry::Occupied(mut entry) => {
361 63776 : entry.get_mut().target_keyspace.merge(&keyspace);
362 63776 : }
363 364 : Entry::Vacant(entry) => {
364 364 : self.planned_reads_by_lsn.push(ReadDesc {
365 364 : lsn_range,
366 364 : layer_id: layer_id.clone(),
367 364 : });
368 364 : entry.insert(LayerKeyspace {
369 364 : layer,
370 364 : target_keyspace: keyspace,
371 364 : });
372 364 : }
373 : }
374 64140 : }
375 : }
376 :
377 : impl Default for LayerFringe {
378 0 : fn default() -> Self {
379 0 : Self::new()
380 0 : }
381 : }
382 :
383 : impl Ord for ReadDesc {
384 12 : fn cmp(&self, other: &Self) -> Ordering {
385 12 : let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
386 12 : if ord == std::cmp::Ordering::Equal {
387 12 : self.lsn_range.start.cmp(&other.lsn_range.start).reverse()
388 : } else {
389 0 : ord
390 : }
391 12 : }
392 : }
393 :
394 : impl PartialOrd for ReadDesc {
395 12 : fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
396 12 : Some(self.cmp(other))
397 12 : }
398 : }
399 :
400 : impl PartialEq for ReadDesc {
401 0 : fn eq(&self, other: &Self) -> bool {
402 0 : self.lsn_range == other.lsn_range
403 0 : }
404 : }
405 :
406 : impl Eq for ReadDesc {}
407 :
408 : impl ReadableLayer {
409 64140 : pub(crate) fn id(&self) -> LayerId {
410 64140 : match self {
411 64130 : Self::PersistentLayer(layer) => LayerId::PersitentLayerId(layer.layer_desc().key()),
412 10 : Self::InMemoryLayer(layer) => LayerId::InMemoryLayerId(layer.file_id()),
413 : }
414 64140 : }
415 :
416 364 : pub(crate) async fn get_values_reconstruct_data(
417 364 : &self,
418 364 : keyspace: KeySpace,
419 364 : lsn_range: Range<Lsn>,
420 364 : reconstruct_state: &mut ValuesReconstructState,
421 364 : ctx: &RequestContext,
422 364 : ) -> Result<(), GetVectoredError> {
423 364 : match self {
424 354 : ReadableLayer::PersistentLayer(layer) => {
425 354 : layer
426 354 : .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx)
427 11490 : .await
428 : }
429 10 : ReadableLayer::InMemoryLayer(layer) => {
430 10 : layer
431 10 : .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx)
432 26 : .await
433 : }
434 : }
435 364 : }
436 : }
437 :
438 : /// Return value from [`Layer::get_value_reconstruct_data`]
439 : #[derive(Clone, Copy, Debug)]
440 : pub enum ValueReconstructResult {
441 : /// Got all the data needed to reconstruct the requested page
442 : Complete,
443 : /// This layer didn't contain all the required data, the caller should look up
444 : /// the predecessor layer at the returned LSN and collect more data from there.
445 : Continue,
446 :
447 : /// This layer didn't contain data needed to reconstruct the page version at
448 : /// the returned LSN. This is usually considered an error, but might be OK
449 : /// in some circumstances.
450 : Missing,
451 : }
452 :
453 : #[derive(Debug)]
454 : pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
455 :
456 : /// This struct holds two instances of [`LayerAccessStatsInner`].
457 : /// Accesses are recorded to both instances.
458 : /// The `for_scraping_api`instance can be reset from the management API via [`LayerAccessStatsReset`].
459 : /// The `for_eviction_policy` is never reset.
460 : #[derive(Debug, Default, Clone)]
461 : struct LayerAccessStatsLocked {
462 : for_scraping_api: LayerAccessStatsInner,
463 : for_eviction_policy: LayerAccessStatsInner,
464 : }
465 :
466 : impl LayerAccessStatsLocked {
467 211881 : fn iter_mut(&mut self) -> impl Iterator<Item = &mut LayerAccessStatsInner> {
468 211881 : [&mut self.for_scraping_api, &mut self.for_eviction_policy].into_iter()
469 211881 : }
470 : }
471 :
472 : #[derive(Debug, Default, Clone)]
473 : struct LayerAccessStatsInner {
474 : first_access: Option<LayerAccessStatFullDetails>,
475 : count_by_access_kind: EnumMap<LayerAccessKind, u64>,
476 : task_kind_flag: EnumSet<TaskKind>,
477 : last_accesses: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
478 : last_residence_changes: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
479 : }
480 :
481 : #[derive(Debug, Clone, Copy)]
482 : pub(crate) struct LayerAccessStatFullDetails {
483 : pub(crate) when: SystemTime,
484 : pub(crate) task_kind: TaskKind,
485 : pub(crate) access_kind: LayerAccessKind,
486 : }
487 :
488 0 : #[derive(Clone, Copy, strum_macros::EnumString)]
489 : pub enum LayerAccessStatsReset {
490 : NoReset,
491 : JustTaskKindFlags,
492 : AllStats,
493 : }
494 :
495 0 : fn system_time_to_millis_since_epoch(ts: &SystemTime) -> u64 {
496 0 : ts.duration_since(UNIX_EPOCH)
497 0 : .expect("better to die in this unlikely case than report false stats")
498 0 : .as_millis()
499 0 : .try_into()
500 0 : .expect("64 bits is enough for few more years")
501 0 : }
502 :
503 : impl LayerAccessStatFullDetails {
504 0 : fn as_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
505 0 : let Self {
506 0 : when,
507 0 : task_kind,
508 0 : access_kind,
509 0 : } = self;
510 0 : pageserver_api::models::LayerAccessStatFullDetails {
511 0 : when_millis_since_epoch: system_time_to_millis_since_epoch(when),
512 0 : task_kind: Cow::Borrowed(task_kind.into()), // into static str, powered by strum_macros
513 0 : access_kind: *access_kind,
514 0 : }
515 0 : }
516 : }
517 :
518 : impl LayerAccessStats {
519 : /// Create an empty stats object.
520 : ///
521 : /// The caller is responsible for recording a residence event
522 : /// using [`record_residence_event`] before calling `latest_activity`.
523 : /// If they don't, [`latest_activity`] will return `None`.
524 : ///
525 : /// [`record_residence_event`]: Self::record_residence_event
526 : /// [`latest_activity`]: Self::latest_activity
527 1294 : pub(crate) fn empty_will_record_residence_event_later() -> Self {
528 1294 : LayerAccessStats(Mutex::default())
529 1294 : }
530 :
531 : /// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
532 : ///
533 : /// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
534 : ///
535 : /// [`LayerLoad`]: LayerResidenceEventReason::LayerLoad
536 : /// [`record_residence_event`]: Self::record_residence_event
537 24 : pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
538 24 : let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
539 24 : new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
540 24 : new
541 24 : }
542 :
543 : /// Record a change in layer residency.
544 : ///
545 : /// Recording the event must happen while holding the layer map lock to
546 : /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
547 : /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
548 : ///
549 : /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
550 : /// the following race could happen:
551 : ///
552 : /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
553 : /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
554 : /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
555 : /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
556 : ///
557 1342 : pub(crate) fn record_residence_event(
558 1342 : &self,
559 1342 : status: LayerResidenceStatus,
560 1342 : reason: LayerResidenceEventReason,
561 1342 : ) {
562 1342 : let mut locked = self.0.lock().unwrap();
563 2684 : locked.iter_mut().for_each(|inner| {
564 2684 : inner
565 2684 : .last_residence_changes
566 2684 : .write(LayerResidenceEvent::new(status, reason))
567 2684 : });
568 1342 : }
569 :
570 211057 : fn record_access(&self, access_kind: LayerAccessKind, ctx: &RequestContext) {
571 211057 : if ctx.access_stats_behavior() == AccessStatsBehavior::Skip {
572 518 : return;
573 210539 : }
574 210539 :
575 210539 : let this_access = LayerAccessStatFullDetails {
576 210539 : when: SystemTime::now(),
577 210539 : task_kind: ctx.task_kind(),
578 210539 : access_kind,
579 210539 : };
580 210539 :
581 210539 : let mut locked = self.0.lock().unwrap();
582 421078 : locked.iter_mut().for_each(|inner| {
583 421078 : inner.first_access.get_or_insert(this_access);
584 421078 : inner.count_by_access_kind[access_kind] += 1;
585 421078 : inner.task_kind_flag |= ctx.task_kind();
586 421078 : inner.last_accesses.write(this_access);
587 421078 : })
588 211057 : }
589 :
590 0 : fn as_api_model(
591 0 : &self,
592 0 : reset: LayerAccessStatsReset,
593 0 : ) -> pageserver_api::models::LayerAccessStats {
594 0 : let mut locked = self.0.lock().unwrap();
595 0 : let inner = &mut locked.for_scraping_api;
596 0 : let LayerAccessStatsInner {
597 0 : first_access,
598 0 : count_by_access_kind,
599 0 : task_kind_flag,
600 0 : last_accesses,
601 0 : last_residence_changes,
602 0 : } = inner;
603 0 : let ret = pageserver_api::models::LayerAccessStats {
604 0 : access_count_by_access_kind: count_by_access_kind
605 0 : .iter()
606 0 : .map(|(kind, count)| (kind, *count))
607 0 : .collect(),
608 0 : task_kind_access_flag: task_kind_flag
609 0 : .iter()
610 0 : .map(|task_kind| Cow::Borrowed(task_kind.into())) // into static str, powered by strum_macros
611 0 : .collect(),
612 0 : first: first_access.as_ref().map(|a| a.as_api_model()),
613 0 : accesses_history: last_accesses.map(|m| m.as_api_model()),
614 0 : residence_events_history: last_residence_changes.clone(),
615 0 : };
616 0 : match reset {
617 0 : LayerAccessStatsReset::NoReset => (),
618 0 : LayerAccessStatsReset::JustTaskKindFlags => {
619 0 : inner.task_kind_flag.clear();
620 0 : }
621 0 : LayerAccessStatsReset::AllStats => {
622 0 : *inner = LayerAccessStatsInner::default();
623 0 : }
624 : }
625 0 : ret
626 0 : }
627 :
628 : /// Get the latest access timestamp, falling back to latest residence event, further falling
629 : /// back to `SystemTime::now` for a usable timestamp for eviction.
630 0 : pub(crate) fn latest_activity_or_now(&self) -> SystemTime {
631 0 : self.latest_activity().unwrap_or_else(SystemTime::now)
632 0 : }
633 :
634 : /// Get the latest access timestamp, falling back to latest residence event.
635 : ///
636 : /// This function can only return `None` if there has not yet been a call to the
637 : /// [`record_residence_event`] method. That would generally be considered an
638 : /// implementation error. This function logs a rate-limited warning in that case.
639 : ///
640 : /// TODO: use type system to avoid the need for `fallback`.
641 : /// The approach in <https://github.com/neondatabase/neon/pull/3775>
642 : /// could be used to enforce that a residence event is recorded
643 : /// before a layer is added to the layer map. We could also have
644 : /// a layer wrapper type that holds the LayerAccessStats, and ensure
645 : /// that that type can only be produced by inserting into the layer map.
646 : ///
647 : /// [`record_residence_event`]: Self::record_residence_event
648 0 : fn latest_activity(&self) -> Option<SystemTime> {
649 0 : let locked = self.0.lock().unwrap();
650 0 : let inner = &locked.for_eviction_policy;
651 0 : match inner.last_accesses.recent() {
652 0 : Some(a) => Some(a.when),
653 0 : None => match inner.last_residence_changes.recent() {
654 0 : Some(e) => Some(e.timestamp),
655 : None => {
656 : static WARN_RATE_LIMIT: Lazy<Mutex<(usize, RateLimit)>> =
657 0 : Lazy::new(|| Mutex::new((0, RateLimit::new(Duration::from_secs(10)))));
658 0 : let mut guard = WARN_RATE_LIMIT.lock().unwrap();
659 0 : guard.0 += 1;
660 0 : let occurences = guard.0;
661 0 : guard.1.call(move || {
662 0 : warn!(parent: None, occurences, "latest_activity not available, this is an implementation bug, using fallback value");
663 0 : });
664 0 : None
665 : }
666 : },
667 : }
668 0 : }
669 : }
670 :
671 : /// Get a layer descriptor from a layer.
672 : pub trait AsLayerDesc {
673 : /// Get the layer descriptor.
674 : fn layer_desc(&self) -> &PersistentLayerDesc;
675 : }
676 :
677 : pub mod tests {
678 : use pageserver_api::shard::TenantShardId;
679 :
680 : use super::*;
681 :
682 : impl From<DeltaLayerName> for PersistentLayerDesc {
683 0 : fn from(value: DeltaLayerName) -> Self {
684 0 : PersistentLayerDesc::new_delta(
685 0 : TenantShardId::from([0; 18]),
686 0 : TimelineId::from_array([0; 16]),
687 0 : value.key_range,
688 0 : value.lsn_range,
689 0 : 233,
690 0 : )
691 0 : }
692 : }
693 :
694 : impl From<ImageLayerName> for PersistentLayerDesc {
695 0 : fn from(value: ImageLayerName) -> Self {
696 0 : PersistentLayerDesc::new_img(
697 0 : TenantShardId::from([0; 18]),
698 0 : TimelineId::from_array([0; 16]),
699 0 : value.key_range,
700 0 : value.lsn,
701 0 : 233,
702 0 : )
703 0 : }
704 : }
705 :
706 : impl From<LayerName> for PersistentLayerDesc {
707 0 : fn from(value: LayerName) -> Self {
708 0 : match value {
709 0 : LayerName::Delta(d) => Self::from(d),
710 0 : LayerName::Image(i) => Self::from(i),
711 : }
712 0 : }
713 : }
714 : }
715 :
716 : /// Range wrapping newtype, which uses display to render Debug.
717 : ///
718 : /// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
719 : struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
720 :
721 : impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
722 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
723 0 : write!(f, "{}..{}", self.0.start, self.0.end)
724 0 : }
725 : }
|