LCOV - b837401fb09d2d9818b70e630fdb67e9799b7b0d.info - pageserver/compaction/src/helpers.rs

LCOV - code coverage report

Current view:	top level - pageserver/compaction/src - helpers.rs (source / functions)		Coverage	Total	Hit
Test:	b837401fb09d2d9818b70e630fdb67e9799b7b0d.info	Lines:	0.0 %	147	0
Test Date:	2024-04-18 15:32:49	Functions:	0.0 %	37	0

            Line data    Source code

       1              : //! This file contains generic utility functions over the interface types,
       2              : //! which could be handy for any compaction implementation.
       3              : use crate::interface::*;
       4              : 
       5              : use futures::future::BoxFuture;
       6              : use futures::{Stream, StreamExt};
       7              : use itertools::Itertools;
       8              : use pin_project_lite::pin_project;
       9              : use std::collections::BinaryHeap;
      10              : use std::collections::VecDeque;
      11              : use std::future::Future;
      12              : use std::ops::{DerefMut, Range};
      13              : use std::pin::Pin;
      14              : use std::task::{ready, Poll};
      15              : 
      16            0 : pub fn keyspace_total_size<K>(keyspace: &CompactionKeySpace<K>) -> u64
      17            0 : where
      18            0 :     K: CompactionKey,
      19            0 : {
      20            0 :     keyspace.iter().map(|r| K::key_range_size(r) as u64).sum()
      21            0 : }
      22              : 
      23            0 : pub fn overlaps_with<T: Ord>(a: &Range<T>, b: &Range<T>) -> bool {
      24            0 :     !(a.end <= b.start || b.end <= a.start)
      25            0 : }
      26              : 
      27            0 : pub fn union_to_keyspace<K: Ord>(a: &mut CompactionKeySpace<K>, b: CompactionKeySpace<K>) {
      28            0 :     let x = std::mem::take(a);
      29            0 :     let mut all_ranges_iter = [x.into_iter(), b.into_iter()]
      30            0 :         .into_iter()
      31            0 :         .kmerge_by(|a, b| a.start < b.start);
      32            0 :     let mut ranges = Vec::new();
      33            0 :     if let Some(first) = all_ranges_iter.next() {
      34            0 :         let (mut start, mut end) = (first.start, first.end);
      35              : 
      36            0 :         for r in all_ranges_iter {
      37            0 :             assert!(r.start >= start);
      38            0 :             if r.start > end {
      39            0 :                 ranges.push(start..end);
      40            0 :                 start = r.start;
      41            0 :                 end = r.end;
      42            0 :             } else if r.end > end {
      43            0 :                 end = r.end;
      44            0 :             }
      45              :         }
      46            0 :         ranges.push(start..end);
      47            0 :     }
      48            0 :     *a = ranges
      49            0 : }
      50              : 
      51            0 : pub fn intersect_keyspace<K: Ord + Clone + Copy>(
      52            0 :     a: &CompactionKeySpace<K>,
      53            0 :     r: &Range<K>,
      54            0 : ) -> CompactionKeySpace<K> {
      55            0 :     let mut ranges: Vec<Range<K>> = Vec::new();
      56              : 
      57            0 :     for x in a.iter() {
      58            0 :         if x.end <= r.start {
      59            0 :             continue;
      60            0 :         }
      61            0 :         if x.start >= r.end {
      62            0 :             break;
      63            0 :         }
      64            0 :         ranges.push(x.clone())
      65              :     }
      66              : 
      67              :     // trim the ends
      68            0 :     if let Some(first) = ranges.first_mut() {
      69            0 :         first.start = std::cmp::max(first.start, r.start);
      70            0 :     }
      71            0 :     if let Some(last) = ranges.last_mut() {
      72            0 :         last.end = std::cmp::min(last.end, r.end);
      73            0 :     }
      74            0 :     ranges
      75            0 : }
      76              : 
      77              : /// Create a stream that iterates through all DeltaEntrys among all input
      78              : /// layers, in key-lsn order.
      79              : ///
      80              : /// This is public because the create_delta() implementation likely wants to use this too
      81              : /// TODO: move to a more shared place
      82            0 : pub fn merge_delta_keys<'a, E: CompactionJobExecutor>(
      83            0 :     layers: &'a [E::DeltaLayer],
      84            0 :     ctx: &'a E::RequestContext,
      85            0 : ) -> MergeDeltaKeys<'a, E> {
      86            0 :     // Use a binary heap to merge the layers. Each input layer is initially
      87            0 :     // represented by a LazyLoadLayer::Unloaded element, which uses the start of
      88            0 :     // the layer's key range as the key. The first time a layer reaches the top
      89            0 :     // of the heap, all the keys of the layer are loaded into a sorted vector.
      90            0 :     //
      91            0 :     // This helps to keep the memory usage reasonable: we only need to hold in
      92            0 :     // memory the DeltaEntrys of the layers that overlap with the "current" key.
      93            0 :     let mut heap: BinaryHeap<LazyLoadLayer<'a, E>> = BinaryHeap::new();
      94            0 :     for l in layers {
      95            0 :         heap.push(LazyLoadLayer::Unloaded(l));
      96            0 :     }
      97            0 :     MergeDeltaKeys {
      98            0 :         heap,
      99            0 :         ctx,
     100            0 :         load_future: None,
     101            0 :     }
     102            0 : }
     103              : 
     104              : enum LazyLoadLayer<'a, E: CompactionJobExecutor> {
     105              :     Loaded(VecDeque<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>),
     106              :     Unloaded(&'a E::DeltaLayer),
     107              : }
     108              : impl<'a, E: CompactionJobExecutor> LazyLoadLayer<'a, E> {
     109            0 :     fn key(&self) -> E::Key {
     110            0 :         match self {
     111            0 :             Self::Loaded(entries) => entries.front().unwrap().key(),
     112            0 :             Self::Unloaded(dl) => dl.key_range().start,
     113              :         }
     114            0 :     }
     115              : }
     116              : impl<'a, E: CompactionJobExecutor> PartialOrd for LazyLoadLayer<'a, E> {
     117            0 :     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
     118            0 :         Some(self.cmp(other))
     119            0 :     }
     120              : }
     121              : impl<'a, E: CompactionJobExecutor> Ord for LazyLoadLayer<'a, E> {
     122            0 :     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
     123            0 :         // reverse order so that we get a min-heap
     124            0 :         other.key().cmp(&self.key())
     125            0 :     }
     126              : }
     127              : impl<'a, E: CompactionJobExecutor> PartialEq for LazyLoadLayer<'a, E> {
     128            0 :     fn eq(&self, other: &Self) -> bool {
     129            0 :         self.key().eq(&other.key())
     130            0 :     }
     131              : }
     132              : impl<'a, E: CompactionJobExecutor> Eq for LazyLoadLayer<'a, E> {}
     133              : 
     134              : type LoadFuture<'a, E> = BoxFuture<'a, anyhow::Result<Vec<E>>>;
     135              : 
     136              : // Stream returned by `merge_delta_keys`
     137              : pin_project! {
     138              : #[allow(clippy::type_complexity)]
     139              : pub struct MergeDeltaKeys<'a, E: CompactionJobExecutor> {
     140              :     heap: BinaryHeap<LazyLoadLayer<'a, E>>,
     141              : 
     142              :     #[pin]
     143              :     load_future: Option<LoadFuture<'a, <E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>>,
     144              : 
     145              :     ctx: &'a E::RequestContext,
     146              : }
     147              : }
     148              : 
     149              : impl<'a, E> Stream for MergeDeltaKeys<'a, E>
     150              : where
     151              :     E: CompactionJobExecutor + 'a,
     152              : {
     153              :     type Item = anyhow::Result<<E::DeltaLayer as CompactionDeltaLayer<E>>::DeltaEntry<'a>>;
     154              : 
     155            0 :     fn poll_next(
     156            0 :         self: Pin<&mut Self>,
     157            0 :         cx: &mut std::task::Context<'_>,
     158            0 :     ) -> Poll<std::option::Option<<Self as futures::Stream>::Item>> {
     159            0 :         let mut this = self.project();
     160              :         loop {
     161            0 :             if let Some(mut load_future) = this.load_future.as_mut().as_pin_mut() {
     162              :                 // We are waiting for loading the keys to finish
     163            0 :                 match ready!(load_future.as_mut().poll(cx)) {
     164            0 :                     Ok(entries) => {
     165            0 :                         this.load_future.set(None);
     166            0 :                         *this.heap.peek_mut().unwrap() =
     167            0 :                             LazyLoadLayer::Loaded(VecDeque::from(entries));
     168            0 :                     }
     169            0 :                     Err(e) => {
     170            0 :                         return Poll::Ready(Some(Err(e)));
     171              :                     }
     172              :                 }
     173            0 :             }
     174              : 
     175              :             // If the topmost layer in the heap hasn't been loaded yet, start
     176              :             // loading it. Otherwise return the next entry from it and update
     177              :             // the layer's position in the heap (this decreaseKey operation is
     178              :             // performed implicitly when `top` is dropped).
     179            0 :             if let Some(mut top) = this.heap.peek_mut() {
     180            0 :                 match top.deref_mut() {
     181            0 :                     LazyLoadLayer::Unloaded(ref mut l) => {
     182            0 :                         let fut = l.load_keys(this.ctx);
     183            0 :                         this.load_future.set(Some(Box::pin(fut)));
     184            0 :                         continue;
     185              :                     }
     186            0 :                     LazyLoadLayer::Loaded(ref mut entries) => {
     187            0 :                         let result = entries.pop_front().unwrap();
     188            0 :                         if entries.is_empty() {
     189            0 :                             std::collections::binary_heap::PeekMut::pop(top);
     190            0 :                         }
     191            0 :                         return Poll::Ready(Some(Ok(result)));
     192              :                     }
     193              :                 }
     194              :             } else {
     195            0 :                 return Poll::Ready(None);
     196              :             }
     197              :         }
     198            0 :     }
     199              : }
     200              : 
     201              : // Accumulate values at key boundaries
     202              : pub struct KeySize<K> {
     203              :     pub key: K,
     204              :     pub num_values: u64,
     205              :     pub size: u64,
     206              : }
     207              : 
     208            0 : pub fn accum_key_values<'a, I, K, D, E>(input: I) -> impl Stream<Item = Result<KeySize<K>, E>>
     209            0 : where
     210            0 :     K: Eq,
     211            0 :     I: Stream<Item = Result<D, E>>,
     212            0 :     D: CompactionDeltaEntry<'a, K>,
     213            0 : {
     214            0 :     async_stream::try_stream! {
     215            0 :         // Initialize the state from the first value
     216            0 :         let mut input = std::pin::pin!(input);
     217              : 
     218            0 :         if let Some(first) = input.next().await {
     219            0 :             let first = first?;
     220            0 :             let mut accum: KeySize<K> = KeySize {
     221            0 :                 key: first.key(),
     222            0 :                 num_values: 1,
     223            0 :                 size: first.size(),
     224            0 :             };
     225            0 :             while let Some(this) = input.next().await {
     226            0 :                 let this = this?;
     227            0 :                 if this.key() == accum.key {
     228            0 :                     accum.size += this.size();
     229            0 :                     accum.num_values += 1;
     230            0 :                 } else {
     231            0 :                     yield accum;
     232            0 :                     accum = KeySize {
     233            0 :                         key: this.key(),
     234            0 :                         num_values: 1,
     235            0 :                         size: this.size(),
     236            0 :                     };
     237              :                 }
     238              :             }
     239            0 :             yield accum;
     240            0 :         }
     241              :     }
     242            0 : }

Generated by: LCOV version 2.1-beta