TLA Line data Source code
1 : use postgres_ffi::BLCKSZ;
2 : use std::ops::Range;
3 :
4 : use crate::key::Key;
5 :
6 : ///
7 : /// Represents a set of Keys, in a compact form.
8 : ///
9 CBC 35778 : #[derive(Clone, Debug, Default, PartialEq, Eq)]
10 : pub struct KeySpace {
11 : /// Contiguous ranges of keys that belong to the key space. In key order,
12 : /// and with no overlap.
13 : pub ranges: Vec<Range<Key>>,
14 : }
15 :
16 : impl KeySpace {
17 : ///
18 : /// Partition a key space into roughly chunks of roughly 'target_size' bytes
19 : /// in each partition.
20 : ///
21 715 : pub fn partition(&self, target_size: u64) -> KeyPartitioning {
22 715 : // Assume that each value is 8k in size.
23 715 : let target_nblocks = (target_size / BLCKSZ as u64) as usize;
24 715 :
25 715 : let mut parts = Vec::new();
26 715 : let mut current_part = Vec::new();
27 715 : let mut current_part_size: usize = 0;
28 1062075 : for range in &self.ranges {
29 : // If appending the next contiguous range in the keyspace to the current
30 : // partition would cause it to be too large, start a new partition.
31 1061360 : let this_size = key_range_size(range) as usize;
32 1061360 : if current_part_size + this_size > target_nblocks && !current_part.is_empty() {
33 15715 : parts.push(KeySpace {
34 15715 : ranges: current_part,
35 15715 : });
36 15715 : current_part = Vec::new();
37 15715 : current_part_size = 0;
38 1045645 : }
39 :
40 : // If the next range is larger than 'target_size', split it into
41 : // 'target_size' chunks.
42 1061360 : let mut remain_size = this_size;
43 1061360 : let mut start = range.start;
44 1067171 : while remain_size > target_nblocks {
45 5811 : let next = start.add(target_nblocks as u32);
46 5811 : parts.push(KeySpace {
47 5811 : ranges: vec![start..next],
48 5811 : });
49 5811 : start = next;
50 5811 : remain_size -= target_nblocks
51 : }
52 1061360 : current_part.push(start..range.end);
53 1061360 : current_part_size += remain_size;
54 : }
55 :
56 : // add last partition that wasn't full yet.
57 715 : if !current_part.is_empty() {
58 715 : parts.push(KeySpace {
59 715 : ranges: current_part,
60 715 : });
61 715 : }
62 :
63 715 : KeyPartitioning { parts }
64 715 : }
65 :
66 : ///
67 : /// Check if key space contains overlapping range
68 : ///
69 4983 : pub fn overlaps(&self, range: &Range<Key>) -> bool {
70 4983 : match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
71 1 : Ok(0) => false,
72 4972 : Err(0) => false,
73 2 : Ok(index) => self.ranges[index - 1].end > range.start,
74 8 : Err(index) => self.ranges[index - 1].end > range.start,
75 : }
76 4983 : }
77 : }
78 :
79 : ///
80 : /// Represents a partitioning of the key space.
81 : ///
82 : /// The only kind of partitioning we do is to partition the key space into
83 : /// partitions that are roughly equal in physical size (see KeySpace::partition).
84 : /// But this data structure could represent any partitioning.
85 : ///
86 1416 : #[derive(Clone, Debug, Default)]
87 : pub struct KeyPartitioning {
88 : pub parts: Vec<KeySpace>,
89 : }
90 :
91 : impl KeyPartitioning {
92 1290 : pub fn new() -> Self {
93 1290 : KeyPartitioning { parts: Vec::new() }
94 1290 : }
95 : }
96 :
97 : ///
98 : /// A helper object, to collect a set of keys and key ranges into a KeySpace
99 : /// object. This takes care of merging adjacent keys and key ranges into
100 : /// contiguous ranges.
101 : ///
102 UBC 0 : #[derive(Clone, Debug, Default)]
103 : pub struct KeySpaceAccum {
104 : accum: Option<Range<Key>>,
105 :
106 : ranges: Vec<Range<Key>>,
107 : }
108 :
109 : impl KeySpaceAccum {
110 CBC 729 : pub fn new() -> Self {
111 729 : Self {
112 729 : accum: None,
113 729 : ranges: Vec::new(),
114 729 : }
115 729 : }
116 :
117 1126670 : pub fn add_key(&mut self, key: Key) {
118 1126670 : self.add_range(singleton_range(key))
119 1126670 : }
120 :
121 1740603 : pub fn add_range(&mut self, range: Range<Key>) {
122 1740603 : match self.accum.as_mut() {
123 1739874 : Some(accum) => {
124 1739874 : if range.start == accum.end {
125 663506 : accum.end = range.end;
126 1076368 : } else {
127 1076368 : // TODO: to efficiently support small sharding stripe sizes, we should avoid starting
128 1076368 : // a new range here if the skipped region was all keys that don't belong on this shard.
129 1076368 : // (https://github.com/neondatabase/neon/issues/6247)
130 1076368 : assert!(range.start > accum.end);
131 1076368 : self.ranges.push(accum.clone());
132 1076368 : *accum = range;
133 : }
134 : }
135 729 : None => self.accum = Some(range),
136 : }
137 1740603 : }
138 :
139 723 : pub fn to_keyspace(mut self) -> KeySpace {
140 723 : if let Some(accum) = self.accum.take() {
141 723 : self.ranges.push(accum);
142 723 : }
143 723 : KeySpace {
144 723 : ranges: self.ranges,
145 723 : }
146 723 : }
147 : }
148 :
149 : ///
150 : /// A helper object, to collect a set of keys and key ranges into a KeySpace
151 : /// object. Key ranges may be inserted in any order and can overlap.
152 : ///
153 551 : #[derive(Clone, Debug, Default)]
154 : pub struct KeySpaceRandomAccum {
155 : ranges: Vec<Range<Key>>,
156 : }
157 :
158 : impl KeySpaceRandomAccum {
159 UBC 0 : pub fn new() -> Self {
160 0 : Self { ranges: Vec::new() }
161 0 : }
162 :
163 0 : pub fn add_key(&mut self, key: Key) {
164 0 : self.add_range(singleton_range(key))
165 0 : }
166 :
167 CBC 21 : pub fn add_range(&mut self, range: Range<Key>) {
168 21 : self.ranges.push(range);
169 21 : }
170 :
171 551 : pub fn to_keyspace(mut self) -> KeySpace {
172 551 : let mut ranges = Vec::new();
173 551 : if !self.ranges.is_empty() {
174 30 : self.ranges.sort_by_key(|r| r.start);
175 9 : let mut start = self.ranges.first().unwrap().start;
176 9 : let mut end = self.ranges.first().unwrap().end;
177 30 : for r in self.ranges {
178 21 : assert!(r.start >= start);
179 21 : if r.start > end {
180 2 : ranges.push(start..end);
181 2 : start = r.start;
182 2 : end = r.end;
183 19 : } else if r.end > end {
184 6 : end = r.end;
185 13 : }
186 : }
187 9 : ranges.push(start..end);
188 542 : }
189 551 : KeySpace { ranges }
190 551 : }
191 : }
192 :
193 1061360 : pub fn key_range_size(key_range: &Range<Key>) -> u32 {
194 1061360 : let start = key_range.start;
195 1061360 : let end = key_range.end;
196 1061360 :
197 1061360 : if end.field1 != start.field1
198 1061360 : || end.field2 != start.field2
199 1061360 : || end.field3 != start.field3
200 1061360 : || end.field4 != start.field4
201 : {
202 UBC 0 : return u32::MAX;
203 CBC 1061360 : }
204 1061360 :
205 1061360 : let start = (start.field5 as u64) << 32 | start.field6 as u64;
206 1061360 : let end = (end.field5 as u64) << 32 | end.field6 as u64;
207 1061360 :
208 1061360 : let diff = end - start;
209 1061360 : if diff > u32::MAX as u64 {
210 UBC 0 : u32::MAX
211 : } else {
212 CBC 1061360 : diff as u32
213 : }
214 1061360 : }
215 :
216 1126670 : pub fn singleton_range(key: Key) -> Range<Key> {
217 1126670 : key..key.next()
218 1126670 : }
219 :
220 : #[cfg(test)]
221 : mod tests {
222 : use super::*;
223 : use std::fmt::Write;
224 :
225 : // Helper function to create a key range.
226 : //
227 : // Make the tests below less verbose.
228 43 : fn kr(irange: Range<i128>) -> Range<Key> {
229 43 : Key::from_i128(irange.start)..Key::from_i128(irange.end)
230 43 : }
231 :
232 : #[allow(dead_code)]
233 UBC 0 : fn dump_keyspace(ks: &KeySpace) {
234 0 : for r in ks.ranges.iter() {
235 0 : println!(" {}..{}", r.start.to_i128(), r.end.to_i128());
236 0 : }
237 0 : }
238 :
239 CBC 8 : fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {
240 8 : if actual.ranges != expected {
241 UBC 0 : let mut msg = String::new();
242 0 :
243 0 : writeln!(msg, "expected:").unwrap();
244 0 : for r in &expected {
245 0 : writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
246 0 : }
247 0 : writeln!(msg, "got:").unwrap();
248 0 : for r in &actual.ranges {
249 0 : writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
250 0 : }
251 0 : panic!("{}", msg);
252 CBC 8 : }
253 8 : }
254 :
255 1 : #[test]
256 1 : fn keyspace_add_range() {
257 1 : // two separate ranges
258 1 : //
259 1 : // #####
260 1 : // #####
261 1 : let mut ks = KeySpaceRandomAccum::default();
262 1 : ks.add_range(kr(0..10));
263 1 : ks.add_range(kr(20..30));
264 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);
265 1 :
266 1 : // two separate ranges, added in reverse order
267 1 : //
268 1 : // #####
269 1 : // #####
270 1 : let mut ks = KeySpaceRandomAccum::default();
271 1 : ks.add_range(kr(20..30));
272 1 : ks.add_range(kr(0..10));
273 1 :
274 1 : // add range that is adjacent to the end of an existing range
275 1 : //
276 1 : // #####
277 1 : // #####
278 1 : ks.add_range(kr(0..10));
279 1 : ks.add_range(kr(10..30));
280 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
281 1 :
282 1 : // add range that is adjacent to the start of an existing range
283 1 : //
284 1 : // #####
285 1 : // #####
286 1 : let mut ks = KeySpaceRandomAccum::default();
287 1 : ks.add_range(kr(10..30));
288 1 : ks.add_range(kr(0..10));
289 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
290 1 :
291 1 : // add range that overlaps with the end of an existing range
292 1 : //
293 1 : // #####
294 1 : // #####
295 1 : let mut ks = KeySpaceRandomAccum::default();
296 1 : ks.add_range(kr(0..10));
297 1 : ks.add_range(kr(5..30));
298 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
299 1 :
300 1 : // add range that overlaps with the start of an existing range
301 1 : //
302 1 : // #####
303 1 : // #####
304 1 : let mut ks = KeySpaceRandomAccum::default();
305 1 : ks.add_range(kr(5..30));
306 1 : ks.add_range(kr(0..10));
307 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
308 1 :
309 1 : // add range that is fully covered by an existing range
310 1 : //
311 1 : // #########
312 1 : // #####
313 1 : let mut ks = KeySpaceRandomAccum::default();
314 1 : ks.add_range(kr(0..30));
315 1 : ks.add_range(kr(10..20));
316 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
317 1 :
318 1 : // add range that extends an existing range from both ends
319 1 : //
320 1 : // #####
321 1 : // #########
322 1 : let mut ks = KeySpaceRandomAccum::default();
323 1 : ks.add_range(kr(10..20));
324 1 : ks.add_range(kr(0..30));
325 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
326 1 :
327 1 : // add a range that overlaps with two existing ranges, joining them
328 1 : //
329 1 : // ##### #####
330 1 : // #######
331 1 : let mut ks = KeySpaceRandomAccum::default();
332 1 : ks.add_range(kr(0..10));
333 1 : ks.add_range(kr(20..30));
334 1 : ks.add_range(kr(5..25));
335 1 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
336 1 : }
337 :
338 1 : #[test]
339 1 : fn keyspace_overlaps() {
340 1 : let mut ks = KeySpaceRandomAccum::default();
341 1 : ks.add_range(kr(10..20));
342 1 : ks.add_range(kr(30..40));
343 1 : let ks = ks.to_keyspace();
344 :
345 : // ##### #####
346 : // xxxx
347 1 : assert!(!ks.overlaps(&kr(0..5)));
348 :
349 : // ##### #####
350 : // xxxx
351 1 : assert!(!ks.overlaps(&kr(5..9)));
352 :
353 : // ##### #####
354 : // xxxx
355 1 : assert!(!ks.overlaps(&kr(5..10)));
356 :
357 : // ##### #####
358 : // xxxx
359 1 : assert!(ks.overlaps(&kr(5..11)));
360 :
361 : // ##### #####
362 : // xxxx
363 1 : assert!(ks.overlaps(&kr(10..15)));
364 :
365 : // ##### #####
366 : // xxxx
367 1 : assert!(ks.overlaps(&kr(15..20)));
368 :
369 : // ##### #####
370 : // xxxx
371 1 : assert!(ks.overlaps(&kr(15..25)));
372 :
373 : // ##### #####
374 : // xxxx
375 1 : assert!(!ks.overlaps(&kr(22..28)));
376 :
377 : // ##### #####
378 : // xxxx
379 1 : assert!(!ks.overlaps(&kr(25..30)));
380 :
381 : // ##### #####
382 : // xxxx
383 1 : assert!(ks.overlaps(&kr(35..35)));
384 :
385 : // ##### #####
386 : // xxxx
387 1 : assert!(!ks.overlaps(&kr(40..45)));
388 :
389 : // ##### #####
390 : // xxxx
391 1 : assert!(!ks.overlaps(&kr(45..50)));
392 :
393 : // ##### #####
394 : // xxxxxxxxxxx
395 1 : assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!
396 1 : }
397 : }
|