Line data Source code
1 : use postgres_ffi::BLCKSZ;
2 : use std::ops::Range;
3 :
4 : use crate::key::Key;
5 :
6 : ///
7 : /// Represents a set of Keys, in a compact form.
8 : ///
9 37312 : #[derive(Clone, Debug, Default, PartialEq, Eq)]
10 : pub struct KeySpace {
11 : /// Contiguous ranges of keys that belong to the key space. In key order,
12 : /// and with no overlap.
13 : pub ranges: Vec<Range<Key>>,
14 : }
15 :
16 : impl KeySpace {
17 : ///
18 : /// Partition a key space into roughly chunks of roughly 'target_size' bytes
19 : /// in each partition.
20 : ///
21 1485 : pub fn partition(&self, target_size: u64) -> KeyPartitioning {
22 1485 : // Assume that each value is 8k in size.
23 1485 : let target_nblocks = (target_size / BLCKSZ as u64) as usize;
24 1485 :
25 1485 : let mut parts = Vec::new();
26 1485 : let mut current_part = Vec::new();
27 1485 : let mut current_part_size: usize = 0;
28 1246047 : for range in &self.ranges {
29 : // If appending the next contiguous range in the keyspace to the current
30 : // partition would cause it to be too large, start a new partition.
31 1244562 : let this_size = key_range_size(range) as usize;
32 1244562 : if current_part_size + this_size > target_nblocks && !current_part.is_empty() {
33 16207 : parts.push(KeySpace {
34 16207 : ranges: current_part,
35 16207 : });
36 16207 : current_part = Vec::new();
37 16207 : current_part_size = 0;
38 1228355 : }
39 :
40 : // If the next range is larger than 'target_size', split it into
41 : // 'target_size' chunks.
42 1244562 : let mut remain_size = this_size;
43 1244562 : let mut start = range.start;
44 1251469 : while remain_size > target_nblocks {
45 6907 : let next = start.add(target_nblocks as u32);
46 6907 : parts.push(KeySpace {
47 6907 : ranges: vec![start..next],
48 6907 : });
49 6907 : start = next;
50 6907 : remain_size -= target_nblocks
51 : }
52 1244562 : current_part.push(start..range.end);
53 1244562 : current_part_size += remain_size;
54 : }
55 :
56 : // add last partition that wasn't full yet.
57 1485 : if !current_part.is_empty() {
58 1485 : parts.push(KeySpace {
59 1485 : ranges: current_part,
60 1485 : });
61 1485 : }
62 :
63 1485 : KeyPartitioning { parts }
64 1485 : }
65 :
66 : /// Update the keyspace such that it doesn't contain any range
67 : /// that is overlapping with `other`. This can involve splitting or
68 : /// removing of existing ranges.
69 8 : pub fn remove_overlapping_with(&mut self, other: &KeySpace) {
70 8 : let (self_start, self_end) = match (self.start(), self.end()) {
71 8 : (Some(start), Some(end)) => (start, end),
72 : _ => {
73 : // self is empty
74 0 : return;
75 : }
76 : };
77 :
78 : // Key spaces are sorted by definition, so skip ahead to the first
79 : // potentially intersecting range. Similarly, ignore ranges that start
80 : // after the current keyspace ends.
81 8 : let other_ranges = other
82 8 : .ranges
83 8 : .iter()
84 8 : .skip_while(|range| self_start >= range.end)
85 20 : .take_while(|range| self_end > range.start);
86 :
87 26 : for range in other_ranges {
88 36 : while let Some(overlap_at) = self.overlaps_at(range) {
89 18 : let overlapped = self.ranges[overlap_at].clone();
90 18 :
91 18 : if overlapped.start < range.start && overlapped.end <= range.end {
92 10 : // Higher part of the range is completely overlapped.
93 10 : self.ranges[overlap_at].end = range.start;
94 10 : }
95 18 : if overlapped.start >= range.start && overlapped.end > range.end {
96 2 : // Lower part of the range is completely overlapped.
97 2 : self.ranges[overlap_at].start = range.end;
98 16 : }
99 18 : if overlapped.start < range.start && overlapped.end > range.end {
100 4 : // Middle part of the range is overlapped.
101 4 : self.ranges[overlap_at].end = range.start;
102 4 : self.ranges
103 4 : .insert(overlap_at + 1, range.end..overlapped.end);
104 14 : }
105 18 : if overlapped.start >= range.start && overlapped.end <= range.end {
106 2 : // Whole range is overlapped
107 2 : self.ranges.remove(overlap_at);
108 16 : }
109 : }
110 : }
111 8 : }
112 :
113 8 : pub fn start(&self) -> Option<Key> {
114 8 : self.ranges.first().map(|range| range.start)
115 8 : }
116 :
117 8 : pub fn end(&self) -> Option<Key> {
118 8 : self.ranges.last().map(|range| range.end)
119 8 : }
120 :
121 : #[allow(unused)]
122 0 : pub fn total_size(&self) -> usize {
123 0 : self.ranges
124 0 : .iter()
125 0 : .map(|range| key_range_size(range) as usize)
126 0 : .sum()
127 0 : }
128 :
129 6282 : fn overlaps_at(&self, range: &Range<Key>) -> Option<usize> {
130 6282 : match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
131 2 : Ok(0) => None,
132 6224 : Err(0) => None,
133 18 : Ok(index) if self.ranges[index - 1].end > range.start => Some(index - 1),
134 38 : Err(index) if self.ranges[index - 1].end > range.start => Some(index - 1),
135 26 : _ => None,
136 : }
137 6282 : }
138 :
139 : ///
140 : /// Check if key space contains overlapping range
141 : ///
142 6246 : pub fn overlaps(&self, range: &Range<Key>) -> bool {
143 6246 : self.overlaps_at(range).is_some()
144 6246 : }
145 : }
146 :
147 : ///
148 : /// Represents a partitioning of the key space.
149 : ///
150 : /// The only kind of partitioning we do is to partition the key space into
151 : /// partitions that are roughly equal in physical size (see KeySpace::partition).
152 : /// But this data structure could represent any partitioning.
153 : ///
154 1599 : #[derive(Clone, Debug, Default)]
155 : pub struct KeyPartitioning {
156 : pub parts: Vec<KeySpace>,
157 : }
158 :
159 : impl KeyPartitioning {
160 1568 : pub fn new() -> Self {
161 1568 : KeyPartitioning { parts: Vec::new() }
162 1568 : }
163 : }
164 :
165 : ///
166 : /// A helper object, to collect a set of keys and key ranges into a KeySpace
167 : /// object. This takes care of merging adjacent keys and key ranges into
168 : /// contiguous ranges.
169 : ///
170 16460 : #[derive(Clone, Debug, Default)]
171 : pub struct KeySpaceAccum {
172 : accum: Option<Range<Key>>,
173 :
174 : ranges: Vec<Range<Key>>,
175 : size: u64,
176 : }
177 :
178 : impl KeySpaceAccum {
179 86364 : pub fn new() -> Self {
180 86364 : Self {
181 86364 : accum: None,
182 86364 : ranges: Vec::new(),
183 86364 : size: 0,
184 86364 : }
185 86364 : }
186 :
187 : #[inline(always)]
188 2033084 : pub fn add_key(&mut self, key: Key) {
189 2033084 : self.add_range(singleton_range(key))
190 2033084 : }
191 :
192 : #[inline(always)]
193 2761555 : pub fn add_range(&mut self, range: Range<Key>) {
194 2761555 : self.size += key_range_size(&range) as u64;
195 2761555 :
196 2761555 : match self.accum.as_mut() {
197 2668048 : Some(accum) => {
198 2668048 : if range.start == accum.end {
199 1408349 : accum.end = range.end;
200 1408349 : } else {
201 1259699 : // TODO: to efficiently support small sharding stripe sizes, we should avoid starting
202 1259699 : // a new range here if the skipped region was all keys that don't belong on this shard.
203 1259699 : // (https://github.com/neondatabase/neon/issues/6247)
204 1259699 : assert!(range.start > accum.end);
205 1259699 : self.ranges.push(accum.clone());
206 1259699 : *accum = range;
207 : }
208 : }
209 93507 : None => self.accum = Some(range),
210 : }
211 2761555 : }
212 :
213 25034 : pub fn to_keyspace(mut self) -> KeySpace {
214 25034 : if let Some(accum) = self.accum.take() {
215 21754 : self.ranges.push(accum);
216 21754 : }
217 25034 : KeySpace {
218 25034 : ranges: self.ranges,
219 25034 : }
220 25034 : }
221 :
222 71745 : pub fn consume_keyspace(&mut self) -> KeySpace {
223 71745 : if let Some(accum) = self.accum.take() {
224 71743 : self.ranges.push(accum);
225 71743 : }
226 :
227 71745 : let mut prev_accum = KeySpaceAccum::new();
228 71745 : std::mem::swap(self, &mut prev_accum);
229 71745 :
230 71745 : KeySpace {
231 71745 : ranges: prev_accum.ranges,
232 71745 : }
233 71745 : }
234 :
235 229362 : pub fn size(&self) -> u64 {
236 229362 : self.size
237 229362 : }
238 : }
239 :
240 : ///
241 : /// A helper object, to collect a set of keys and key ranges into a KeySpace
242 : /// object. Key ranges may be inserted in any order and can overlap.
243 : ///
244 639 : #[derive(Clone, Debug, Default)]
245 : pub struct KeySpaceRandomAccum {
246 : ranges: Vec<Range<Key>>,
247 : }
248 :
249 : impl KeySpaceRandomAccum {
250 0 : pub fn new() -> Self {
251 0 : Self { ranges: Vec::new() }
252 0 : }
253 :
254 0 : pub fn add_key(&mut self, key: Key) {
255 0 : self.add_range(singleton_range(key))
256 0 : }
257 :
258 42 : pub fn add_range(&mut self, range: Range<Key>) {
259 42 : self.ranges.push(range);
260 42 : }
261 :
262 639 : pub fn to_keyspace(mut self) -> KeySpace {
263 639 : let mut ranges = Vec::new();
264 639 : if !self.ranges.is_empty() {
265 60 : self.ranges.sort_by_key(|r| r.start);
266 18 : let mut start = self.ranges.first().unwrap().start;
267 18 : let mut end = self.ranges.first().unwrap().end;
268 60 : for r in self.ranges {
269 42 : assert!(r.start >= start);
270 42 : if r.start > end {
271 4 : ranges.push(start..end);
272 4 : start = r.start;
273 4 : end = r.end;
274 38 : } else if r.end > end {
275 12 : end = r.end;
276 26 : }
277 : }
278 18 : ranges.push(start..end);
279 621 : }
280 639 : KeySpace { ranges }
281 639 : }
282 : }
283 :
284 4079739 : pub fn key_range_size(key_range: &Range<Key>) -> u32 {
285 4079739 : let start = key_range.start;
286 4079739 : let end = key_range.end;
287 4079739 :
288 4079739 : if end.field1 != start.field1
289 4079739 : || end.field2 != start.field2
290 4079739 : || end.field3 != start.field3
291 4079739 : || end.field4 != start.field4
292 : {
293 0 : return u32::MAX;
294 4079739 : }
295 4079739 :
296 4079739 : let start = (start.field5 as u64) << 32 | start.field6 as u64;
297 4079739 : let end = (end.field5 as u64) << 32 | end.field6 as u64;
298 4079739 :
299 4079739 : let diff = end - start;
300 4079739 : if diff > u32::MAX as u64 {
301 0 : u32::MAX
302 : } else {
303 4079739 : diff as u32
304 : }
305 4079739 : }
306 :
307 2033099 : pub fn singleton_range(key: Key) -> Range<Key> {
308 2033099 : key..key.next()
309 2033099 : }
310 :
311 : #[cfg(test)]
312 : mod tests {
313 : use super::*;
314 : use std::fmt::Write;
315 :
316 : // Helper function to create a key range.
317 : //
318 : // Make the tests below less verbose.
319 92 : fn kr(irange: Range<i128>) -> Range<Key> {
320 92 : Key::from_i128(irange.start)..Key::from_i128(irange.end)
321 92 : }
322 :
323 : #[allow(dead_code)]
324 0 : fn dump_keyspace(ks: &KeySpace) {
325 0 : for r in ks.ranges.iter() {
326 0 : println!(" {}..{}", r.start.to_i128(), r.end.to_i128());
327 0 : }
328 0 : }
329 :
330 22 : fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {
331 22 : if actual.ranges != expected {
332 0 : let mut msg = String::new();
333 0 :
334 0 : writeln!(msg, "expected:").unwrap();
335 0 : for r in &expected {
336 0 : writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
337 0 : }
338 0 : writeln!(msg, "got:").unwrap();
339 0 : for r in &actual.ranges {
340 0 : writeln!(msg, " {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
341 0 : }
342 0 : panic!("{}", msg);
343 22 : }
344 22 : }
345 :
346 2 : #[test]
347 2 : fn keyspace_consume() {
348 2 : let ranges = vec![kr(0..10), kr(20..35), kr(40..45)];
349 2 :
350 2 : let mut accum = KeySpaceAccum::new();
351 8 : for range in &ranges {
352 6 : accum.add_range(range.clone());
353 6 : }
354 :
355 6 : let expected_size: u64 = ranges.iter().map(|r| key_range_size(r) as u64).sum();
356 2 : assert_eq!(accum.size(), expected_size);
357 :
358 2 : assert_ks_eq(&accum.consume_keyspace(), ranges.clone());
359 2 : assert_eq!(accum.size(), 0);
360 :
361 2 : assert_ks_eq(&accum.consume_keyspace(), vec![]);
362 2 : assert_eq!(accum.size(), 0);
363 :
364 8 : for range in &ranges {
365 6 : accum.add_range(range.clone());
366 6 : }
367 2 : assert_ks_eq(&accum.to_keyspace(), ranges);
368 2 : }
369 :
370 2 : #[test]
371 2 : fn keyspace_add_range() {
372 2 : // two separate ranges
373 2 : //
374 2 : // #####
375 2 : // #####
376 2 : let mut ks = KeySpaceRandomAccum::default();
377 2 : ks.add_range(kr(0..10));
378 2 : ks.add_range(kr(20..30));
379 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);
380 2 :
381 2 : // two separate ranges, added in reverse order
382 2 : //
383 2 : // #####
384 2 : // #####
385 2 : let mut ks = KeySpaceRandomAccum::default();
386 2 : ks.add_range(kr(20..30));
387 2 : ks.add_range(kr(0..10));
388 2 :
389 2 : // add range that is adjacent to the end of an existing range
390 2 : //
391 2 : // #####
392 2 : // #####
393 2 : ks.add_range(kr(0..10));
394 2 : ks.add_range(kr(10..30));
395 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
396 2 :
397 2 : // add range that is adjacent to the start of an existing range
398 2 : //
399 2 : // #####
400 2 : // #####
401 2 : let mut ks = KeySpaceRandomAccum::default();
402 2 : ks.add_range(kr(10..30));
403 2 : ks.add_range(kr(0..10));
404 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
405 2 :
406 2 : // add range that overlaps with the end of an existing range
407 2 : //
408 2 : // #####
409 2 : // #####
410 2 : let mut ks = KeySpaceRandomAccum::default();
411 2 : ks.add_range(kr(0..10));
412 2 : ks.add_range(kr(5..30));
413 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
414 2 :
415 2 : // add range that overlaps with the start of an existing range
416 2 : //
417 2 : // #####
418 2 : // #####
419 2 : let mut ks = KeySpaceRandomAccum::default();
420 2 : ks.add_range(kr(5..30));
421 2 : ks.add_range(kr(0..10));
422 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
423 2 :
424 2 : // add range that is fully covered by an existing range
425 2 : //
426 2 : // #########
427 2 : // #####
428 2 : let mut ks = KeySpaceRandomAccum::default();
429 2 : ks.add_range(kr(0..30));
430 2 : ks.add_range(kr(10..20));
431 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
432 2 :
433 2 : // add range that extends an existing range from both ends
434 2 : //
435 2 : // #####
436 2 : // #########
437 2 : let mut ks = KeySpaceRandomAccum::default();
438 2 : ks.add_range(kr(10..20));
439 2 : ks.add_range(kr(0..30));
440 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
441 2 :
442 2 : // add a range that overlaps with two existing ranges, joining them
443 2 : //
444 2 : // ##### #####
445 2 : // #######
446 2 : let mut ks = KeySpaceRandomAccum::default();
447 2 : ks.add_range(kr(0..10));
448 2 : ks.add_range(kr(20..30));
449 2 : ks.add_range(kr(5..25));
450 2 : assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
451 2 : }
452 :
453 2 : #[test]
454 2 : fn keyspace_overlaps() {
455 2 : let mut ks = KeySpaceRandomAccum::default();
456 2 : ks.add_range(kr(10..20));
457 2 : ks.add_range(kr(30..40));
458 2 : let ks = ks.to_keyspace();
459 :
460 : // ##### #####
461 : // xxxx
462 2 : assert!(!ks.overlaps(&kr(0..5)));
463 :
464 : // ##### #####
465 : // xxxx
466 2 : assert!(!ks.overlaps(&kr(5..9)));
467 :
468 : // ##### #####
469 : // xxxx
470 2 : assert!(!ks.overlaps(&kr(5..10)));
471 :
472 : // ##### #####
473 : // xxxx
474 2 : assert!(ks.overlaps(&kr(5..11)));
475 :
476 : // ##### #####
477 : // xxxx
478 2 : assert!(ks.overlaps(&kr(10..15)));
479 :
480 : // ##### #####
481 : // xxxx
482 2 : assert!(ks.overlaps(&kr(15..20)));
483 :
484 : // ##### #####
485 : // xxxx
486 2 : assert!(ks.overlaps(&kr(15..25)));
487 :
488 : // ##### #####
489 : // xxxx
490 2 : assert!(!ks.overlaps(&kr(22..28)));
491 :
492 : // ##### #####
493 : // xxxx
494 2 : assert!(!ks.overlaps(&kr(25..30)));
495 :
496 : // ##### #####
497 : // xxxx
498 2 : assert!(ks.overlaps(&kr(35..35)));
499 :
500 : // ##### #####
501 : // xxxx
502 2 : assert!(!ks.overlaps(&kr(40..45)));
503 :
504 : // ##### #####
505 : // xxxx
506 2 : assert!(!ks.overlaps(&kr(45..50)));
507 :
508 : // ##### #####
509 : // xxxxxxxxxxx
510 2 : assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!
511 2 : }
512 :
513 2 : #[test]
514 2 : fn test_remove_full_overlapps() {
515 2 : let mut key_space1 = KeySpace {
516 2 : ranges: vec![
517 2 : Key::from_i128(1)..Key::from_i128(4),
518 2 : Key::from_i128(5)..Key::from_i128(8),
519 2 : Key::from_i128(10)..Key::from_i128(12),
520 2 : ],
521 2 : };
522 2 : let key_space2 = KeySpace {
523 2 : ranges: vec![
524 2 : Key::from_i128(2)..Key::from_i128(3),
525 2 : Key::from_i128(6)..Key::from_i128(7),
526 2 : Key::from_i128(11)..Key::from_i128(13),
527 2 : ],
528 2 : };
529 2 : key_space1.remove_overlapping_with(&key_space2);
530 2 : assert_eq!(
531 2 : key_space1.ranges,
532 2 : vec![
533 2 : Key::from_i128(1)..Key::from_i128(2),
534 2 : Key::from_i128(3)..Key::from_i128(4),
535 2 : Key::from_i128(5)..Key::from_i128(6),
536 2 : Key::from_i128(7)..Key::from_i128(8),
537 2 : Key::from_i128(10)..Key::from_i128(11)
538 2 : ]
539 2 : );
540 2 : }
541 :
542 2 : #[test]
543 2 : fn test_remove_partial_overlaps() {
544 2 : // Test partial ovelaps
545 2 : let mut key_space1 = KeySpace {
546 2 : ranges: vec![
547 2 : Key::from_i128(1)..Key::from_i128(5),
548 2 : Key::from_i128(7)..Key::from_i128(10),
549 2 : Key::from_i128(12)..Key::from_i128(15),
550 2 : ],
551 2 : };
552 2 : let key_space2 = KeySpace {
553 2 : ranges: vec![
554 2 : Key::from_i128(3)..Key::from_i128(6),
555 2 : Key::from_i128(8)..Key::from_i128(11),
556 2 : Key::from_i128(14)..Key::from_i128(17),
557 2 : ],
558 2 : };
559 2 : key_space1.remove_overlapping_with(&key_space2);
560 2 : assert_eq!(
561 2 : key_space1.ranges,
562 2 : vec![
563 2 : Key::from_i128(1)..Key::from_i128(3),
564 2 : Key::from_i128(7)..Key::from_i128(8),
565 2 : Key::from_i128(12)..Key::from_i128(14),
566 2 : ]
567 2 : );
568 2 : }
569 :
570 2 : #[test]
571 2 : fn test_remove_no_overlaps() {
572 2 : let mut key_space1 = KeySpace {
573 2 : ranges: vec![
574 2 : Key::from_i128(1)..Key::from_i128(5),
575 2 : Key::from_i128(7)..Key::from_i128(10),
576 2 : Key::from_i128(12)..Key::from_i128(15),
577 2 : ],
578 2 : };
579 2 : let key_space2 = KeySpace {
580 2 : ranges: vec![
581 2 : Key::from_i128(6)..Key::from_i128(7),
582 2 : Key::from_i128(11)..Key::from_i128(12),
583 2 : Key::from_i128(15)..Key::from_i128(17),
584 2 : ],
585 2 : };
586 2 : key_space1.remove_overlapping_with(&key_space2);
587 2 : assert_eq!(
588 2 : key_space1.ranges,
589 2 : vec![
590 2 : Key::from_i128(1)..Key::from_i128(5),
591 2 : Key::from_i128(7)..Key::from_i128(10),
592 2 : Key::from_i128(12)..Key::from_i128(15),
593 2 : ]
594 2 : );
595 2 : }
596 :
597 2 : #[test]
598 2 : fn test_remove_one_range_overlaps_multiple() {
599 2 : let mut key_space1 = KeySpace {
600 2 : ranges: vec![
601 2 : Key::from_i128(1)..Key::from_i128(3),
602 2 : Key::from_i128(3)..Key::from_i128(6),
603 2 : Key::from_i128(6)..Key::from_i128(10),
604 2 : Key::from_i128(12)..Key::from_i128(15),
605 2 : Key::from_i128(17)..Key::from_i128(20),
606 2 : Key::from_i128(20)..Key::from_i128(30),
607 2 : Key::from_i128(30)..Key::from_i128(40),
608 2 : ],
609 2 : };
610 2 : let key_space2 = KeySpace {
611 2 : ranges: vec![Key::from_i128(9)..Key::from_i128(19)],
612 2 : };
613 2 : key_space1.remove_overlapping_with(&key_space2);
614 2 : assert_eq!(
615 2 : key_space1.ranges,
616 2 : vec![
617 2 : Key::from_i128(1)..Key::from_i128(3),
618 2 : Key::from_i128(3)..Key::from_i128(6),
619 2 : Key::from_i128(6)..Key::from_i128(9),
620 2 : Key::from_i128(19)..Key::from_i128(20),
621 2 : Key::from_i128(20)..Key::from_i128(30),
622 2 : Key::from_i128(30)..Key::from_i128(40),
623 2 : ]
624 2 : );
625 2 : }
626 : }
|