Line data Source code
1 : use std::ops::Range;
2 :
3 : // NOTE the `im` crate has 20x more downloads and also has
4 : // persistent/immutable BTree. But it's bugged so rpds is a
5 : // better choice <https://github.com/neondatabase/neon/issues/3395>
6 : use rpds::RedBlackTreeMapSync;
7 :
8 : /// Data structure that can efficiently:
9 : /// - find the latest layer by lsn.end at a given key
10 : /// - iterate the latest layers in a key range
11 : /// - insert layers in non-decreasing lsn.start order
12 : ///
13 : /// For a detailed explanation and justification of this approach, see:
14 : /// <https://neon.tech/blog/persistent-structures-in-neons-wal-indexing>
15 : ///
16 : /// NOTE The struct is parameterized over Value for easier
17 : /// testing, but in practice it's some sort of layer.
18 : pub struct LayerCoverage<Value> {
19 : /// For every change in coverage (as we sweep the key space)
20 : /// we store (lsn.end, value).
21 : ///
22 : /// NOTE We use an immutable/persistent tree so that we can keep historic
23 : /// versions of this coverage without cloning the whole thing and
24 : /// incurring quadratic memory cost. See HistoricLayerCoverage.
25 : ///
26 : /// NOTE We use the Sync version of the map because we want Self to
27 : /// be Sync. Using nonsync might be faster, if we can work with
28 : /// that.
29 : nodes: RedBlackTreeMapSync<i128, Option<(u64, Value)>>,
30 : }
31 :
32 : impl<T: Clone> Default for LayerCoverage<T> {
33 4884 : fn default() -> Self {
34 4884 : Self::new()
35 4884 : }
36 : }
37 :
38 : impl<Value: Clone> LayerCoverage<Value> {
39 4884 : pub fn new() -> Self {
40 4884 : Self {
41 4884 : nodes: RedBlackTreeMapSync::default(),
42 4884 : }
43 4884 : }
44 :
45 : /// Helper function to subdivide the key range without changing any values
46 : ///
47 : /// This operation has no semantic effect by itself. It only helps us pin in
48 : /// place the part of the coverage we don't want to change when inserting.
49 : ///
50 : /// As an analogy, think of a polygon. If you add a vertex along one of the
51 : /// segments, the polygon is still the same, but it behaves differently when
52 : /// we move or delete one of the other points.
53 : ///
54 : /// Complexity: O(log N)
55 29988 : fn add_node(&mut self, key: i128) {
56 29988 : let value = match self.nodes.range(..=key).last() {
57 20448 : Some((_, Some(v))) => Some(v.clone()),
58 7842 : Some((_, None)) => None,
59 1698 : None => None,
60 : };
61 29988 : self.nodes.insert_mut(key, value);
62 29988 : }
63 :
64 : /// Insert a layer.
65 : ///
66 : /// Complexity: worst case O(N), in practice O(log N). See NOTE in implementation.
67 14994 : pub fn insert(&mut self, key: Range<i128>, lsn: Range<u64>, value: Value) {
68 14994 : // Add nodes at endpoints
69 14994 : //
70 14994 : // NOTE The order of lines is important. We add nodes at the start
71 14994 : // and end of the key range **before updating any nodes** in order
72 14994 : // to pin down the current coverage outside of the relevant key range.
73 14994 : // Only the coverage inside the layer's key range should change.
74 14994 : self.add_node(key.start);
75 14994 : self.add_node(key.end);
76 14994 :
77 14994 : // Raise the height where necessary
78 14994 : //
79 14994 : // NOTE This loop is worst case O(N), but amortized O(log N) in the special
80 14994 : // case when rectangles have no height. In practice I don't think we'll see
81 14994 : // the kind of layer intersections needed to trigger O(N) behavior. The worst
82 14994 : // case is N/2 horizontal layers overlapped with N/2 vertical layers in a
83 14994 : // grid pattern.
84 14994 : let mut to_update = Vec::new();
85 14994 : let mut to_remove = Vec::new();
86 14994 : let mut prev_covered = false;
87 20280 : for (k, node) in self.nodes.range(key) {
88 20280 : let needs_cover = match node {
89 3174 : None => true,
90 17106 : Some((h, _)) => h < &lsn.end,
91 : };
92 20280 : if needs_cover {
93 20238 : match prev_covered {
94 5226 : true => to_remove.push(*k),
95 15012 : false => to_update.push(*k),
96 : }
97 42 : }
98 20280 : prev_covered = needs_cover;
99 : }
100 : // TODO check if the nodes inserted at key.start and key.end are safe
101 : // to remove. It's fine to keep them but they could be redundant.
102 30006 : for k in to_update {
103 15012 : self.nodes.insert_mut(k, Some((lsn.end, value.clone())));
104 15012 : }
105 20220 : for k in to_remove {
106 5226 : self.nodes.remove_mut(&k);
107 5226 : }
108 14994 : }
109 :
110 : /// Get the latest (by lsn.end) layer at a given key
111 : ///
112 : /// Complexity: O(log N)
113 2192924 : pub fn query(&self, key: i128) -> Option<Value> {
114 2192924 : self.nodes
115 2192924 : .range(..=key)
116 2192924 : .next_back()?
117 : .1
118 1895022 : .as_ref()
119 1895022 : .map(|(_, v)| v.clone())
120 2192924 : }
121 :
122 : /// Iterate the changes in layer coverage in a given range. You will likely
123 : /// want to start with self.query(key.start), and then follow up with self.range
124 : ///
125 : /// Complexity: O(log N + result_size)
126 1725872 : pub fn range(&self, key: Range<i128>) -> impl '_ + Iterator<Item = (i128, Option<Value>)> {
127 1725872 : self.nodes
128 1725872 : .range(key)
129 1725872 : .map(|(k, v)| (*k, v.as_ref().map(|x| x.1.clone())))
130 1725872 : }
131 :
132 : /// Returns an iterator which includes all coverage changes for layers that intersect
133 : /// with the provided range.
134 1725674 : pub fn range_overlaps(
135 1725674 : &self,
136 1725674 : key_range: &Range<i128>,
137 1725674 : ) -> impl Iterator<Item = (i128, Option<Value>)> + '_
138 1725674 : where
139 1725674 : Value: Eq,
140 1725674 : {
141 1725674 : let first_change = self.query(key_range.start);
142 1725674 : match first_change {
143 1411345 : Some(change) => {
144 1411345 : // If the start of the range is covered, we have to deal with two cases:
145 1411345 : // 1. Start of the range is aligned with the start of a layer.
146 1411345 : // In this case the return of `self.range` will contain the layer which aligns with the start of the key range.
147 1411345 : // We advance said iterator to avoid duplicating the first change.
148 1411345 : // 2. Start of the range is not aligned with the start of a layer.
149 1411345 : let range = key_range.start..key_range.end;
150 1411345 : let mut range_coverage = self.range(range).peekable();
151 1411345 : if range_coverage
152 1411345 : .peek()
153 1411345 : .is_some_and(|c| c.1.as_ref() == Some(&change))
154 11298 : {
155 11298 : range_coverage.next();
156 1400047 : }
157 1411345 : itertools::Either::Left(
158 1411345 : std::iter::once((key_range.start, Some(change))).chain(range_coverage),
159 1411345 : )
160 : }
161 : None => {
162 314329 : let range = key_range.start..key_range.end;
163 314329 : let coverage = self.range(range);
164 314329 : itertools::Either::Right(coverage)
165 : }
166 : }
167 1725674 : }
168 : /// O(1) clone
169 35748 : pub fn clone(&self) -> Self {
170 35748 : Self {
171 35748 : nodes: self.nodes.clone(),
172 35748 : }
173 35748 : }
174 : }
175 :
176 : /// Image and delta coverage at a specific LSN.
177 : pub struct LayerCoverageTuple<Value> {
178 : pub image_coverage: LayerCoverage<Value>,
179 : pub delta_coverage: LayerCoverage<Value>,
180 : }
181 :
182 : impl<T: Clone> Default for LayerCoverageTuple<T> {
183 2442 : fn default() -> Self {
184 2442 : Self {
185 2442 : image_coverage: LayerCoverage::default(),
186 2442 : delta_coverage: LayerCoverage::default(),
187 2442 : }
188 2442 : }
189 : }
190 :
191 : impl<Value: Clone> LayerCoverageTuple<Value> {
192 17874 : pub fn clone(&self) -> Self {
193 17874 : Self {
194 17874 : image_coverage: self.image_coverage.clone(),
195 17874 : delta_coverage: self.delta_coverage.clone(),
196 17874 : }
197 17874 : }
198 : }
|