LCOV - code coverage report
Current view: top level - libs/tenant_size_model/tests - tests.rs (source / functions) Coverage Total Hit
Test: 7eb96e224e685167ad85f58f858387d8cf253f63.info Lines: 98.0 % 202 198
Test Date: 2024-09-23 21:23:07 Functions: 92.3 % 13 12

            Line data    Source code
       1              : //! Tenant size model tests.
       2              : 
       3              : use tenant_size_model::{Segment, SizeResult, StorageModel};
       4              : 
       5              : use std::collections::HashMap;
       6              : 
       7              : struct ScenarioBuilder {
       8              :     segments: Vec<Segment>,
       9              : 
      10              :     /// Mapping from the branch name to the index of a segment describing its latest state.
      11              :     branches: HashMap<String, usize>,
      12              : }
      13              : 
      14              : impl ScenarioBuilder {
      15              :     /// Creates a new storage with the given default branch name.
      16            6 :     pub fn new(initial_branch: &str) -> ScenarioBuilder {
      17            6 :         let init_segment = Segment {
      18            6 :             parent: None,
      19            6 :             lsn: 0,
      20            6 :             size: Some(0),
      21            6 :             needed: false, // determined later
      22            6 :         };
      23            6 : 
      24            6 :         ScenarioBuilder {
      25            6 :             segments: vec![init_segment],
      26            6 :             branches: HashMap::from([(initial_branch.into(), 0)]),
      27            6 :         }
      28            6 :     }
      29              : 
      30              :     /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
      31           51 :     pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
      32           51 :         let lastseg_id = *self.branches.get(branch).unwrap();
      33           51 :         let newseg_id = self.segments.len();
      34           51 :         let lastseg = &mut self.segments[lastseg_id];
      35           51 : 
      36           51 :         let newseg = Segment {
      37           51 :             parent: Some(lastseg_id),
      38           51 :             lsn: lastseg.lsn + lsn_bytes,
      39           51 :             size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
      40           51 :             needed: false,
      41           51 :         };
      42           51 : 
      43           51 :         self.segments.push(newseg);
      44           51 :         *self.branches.get_mut(branch).expect("read already") = newseg_id;
      45           51 :     }
      46              : 
      47            7 :     pub fn insert(&mut self, branch: &str, bytes: u64) {
      48            7 :         self.modify_branch(branch, bytes, bytes as i64);
      49            7 :     }
      50              : 
      51           39 :     pub fn update(&mut self, branch: &str, bytes: u64) {
      52           39 :         self.modify_branch(branch, bytes, 0i64);
      53           39 :     }
      54              : 
      55            0 :     pub fn _delete(&mut self, branch: &str, bytes: u64) {
      56            0 :         self.modify_branch(branch, bytes, -(bytes as i64));
      57            0 :     }
      58              : 
      59              :     /// Panics if the parent branch cannot be found.
      60            8 :     pub fn branch(&mut self, parent: &str, name: &str) {
      61            8 :         // Find the right segment
      62            8 :         let branchseg_id = *self
      63            8 :             .branches
      64            8 :             .get(parent)
      65            8 :             .expect("should had found the parent by key");
      66            8 :         let _branchseg = &mut self.segments[branchseg_id];
      67            8 : 
      68            8 :         // Create branch name for it
      69            8 :         self.branches.insert(name.to_string(), branchseg_id);
      70            8 :     }
      71              : 
      72            6 :     pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
      73              :         // Phase 1: Mark all the segments that need to be retained
      74           14 :         for (_branch, &last_seg_id) in self.branches.iter() {
      75           14 :             let last_seg = &self.segments[last_seg_id];
      76           14 :             let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
      77           14 :             let mut seg_id = last_seg_id;
      78              :             loop {
      79           27 :                 let seg = &mut self.segments[seg_id];
      80           27 :                 if seg.lsn <= cutoff_lsn {
      81           14 :                     break;
      82           13 :                 }
      83           13 :                 seg.needed = true;
      84           13 :                 if let Some(prev_seg_id) = seg.parent {
      85           13 :                     seg_id = prev_seg_id;
      86           13 :                 } else {
      87            0 :                     break;
      88              :                 }
      89              :             }
      90              :         }
      91              : 
      92              :         // Perform the calculation
      93            6 :         let storage_model = StorageModel {
      94            6 :             segments: self.segments.clone(),
      95            6 :         };
      96            6 :         let size_result = storage_model.calculate();
      97            6 :         (storage_model, size_result)
      98            6 :     }
      99              : }
     100              : 
     101              : // Main branch only. Some updates on it.
     102              : #[test]
     103            1 : fn scenario_1() {
     104            1 :     // Create main branch
     105            1 :     let mut scenario = ScenarioBuilder::new("main");
     106            1 : 
     107            1 :     // Bulk load 5 GB of data to it
     108            1 :     scenario.insert("main", 5_000);
     109              : 
     110              :     // Stream of updates
     111            6 :     for _ in 0..5 {
     112            5 :         scenario.update("main", 1_000);
     113            5 :     }
     114              : 
     115              :     // Calculate the synthetic size with retention horizon 1000
     116            1 :     let (_model, result) = scenario.calculate(1000);
     117            1 : 
     118            1 :     // The end of the branch is at LSN 10000. Need to retain
     119            1 :     // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
     120            1 :     // The logical snapshot has size 5000.
     121            1 :     assert_eq!(result.total_size, 5000 + 1000);
     122            1 : }
     123              : 
     124              : // Main branch only. Some updates on it.
     125              : #[test]
     126            1 : fn scenario_2() {
     127            1 :     // Create main branch
     128            1 :     let mut scenario = ScenarioBuilder::new("main");
     129            1 : 
     130            1 :     // Bulk load 5 GB of data to it
     131            1 :     scenario.insert("main", 5_000);
     132              : 
     133              :     // Stream of updates
     134            6 :     for _ in 0..5 {
     135            5 :         scenario.update("main", 1_000);
     136            5 :     }
     137              : 
     138              :     // Branch
     139            1 :     scenario.branch("main", "child");
     140            1 :     scenario.update("child", 1_000);
     141            1 : 
     142            1 :     // More updates on parent
     143            1 :     scenario.update("main", 1_000);
     144            1 : 
     145            1 :     //
     146            1 :     // The history looks like this now:
     147            1 :     //
     148            1 :     //         10000          11000
     149            1 :     // *----*----*--------------*    main
     150            1 :     //           |
     151            1 :     //           |            11000
     152            1 :     //           +--------------     child
     153            1 :     //
     154            1 :     //
     155            1 :     // With retention horizon 1000, we need to retain logical snapshot
     156            1 :     // at the branch point, size 5000, and the WAL from 10000-11000 on
     157            1 :     // both branches.
     158            1 :     let (_model, result) = scenario.calculate(1000);
     159            1 : 
     160            1 :     assert_eq!(result.total_size, 5000 + 1000 + 1000);
     161            1 : }
     162              : 
     163              : // Like 2, but more updates on main
     164              : #[test]
     165            1 : fn scenario_3() {
     166            1 :     // Create main branch
     167            1 :     let mut scenario = ScenarioBuilder::new("main");
     168            1 : 
     169            1 :     // Bulk load 5 GB of data to it
     170            1 :     scenario.insert("main", 5_000);
     171              : 
     172              :     // Stream of updates
     173            6 :     for _ in 0..5 {
     174            5 :         scenario.update("main", 1_000);
     175            5 :     }
     176              : 
     177              :     // Branch
     178            1 :     scenario.branch("main", "child");
     179            1 :     scenario.update("child", 1_000);
     180              : 
     181              :     // More updates on parent
     182            6 :     for _ in 0..5 {
     183            5 :         scenario.update("main", 1_000);
     184            5 :     }
     185              : 
     186              :     //
     187              :     // The history looks like this now:
     188              :     //
     189              :     //         10000                                 15000
     190              :     // *----*----*------------------------------------*    main
     191              :     //           |
     192              :     //           |            11000
     193              :     //           +--------------     child
     194              :     //
     195              :     //
     196              :     // With retention horizon 1000, it's still cheapest to retain
     197              :     // - snapshot at branch point (size 5000)
     198              :     // - WAL on child between 10000-11000
     199              :     // - WAL on main between 10000-15000
     200              :     //
     201              :     // This is in total 5000 + 1000 + 5000
     202              :     //
     203            1 :     let (_model, result) = scenario.calculate(1000);
     204            1 : 
     205            1 :     assert_eq!(result.total_size, 5000 + 1000 + 5000);
     206            1 : }
     207              : 
     208              : // Diverged branches
     209              : #[test]
     210            1 : fn scenario_4() {
     211            1 :     // Create main branch
     212            1 :     let mut scenario = ScenarioBuilder::new("main");
     213            1 : 
     214            1 :     // Bulk load 5 GB of data to it
     215            1 :     scenario.insert("main", 5_000);
     216              : 
     217              :     // Stream of updates
     218            6 :     for _ in 0..5 {
     219            5 :         scenario.update("main", 1_000);
     220            5 :     }
     221              : 
     222              :     // Branch
     223            1 :     scenario.branch("main", "child");
     224            1 :     scenario.update("child", 1_000);
     225              : 
     226              :     // More updates on parent
     227            9 :     for _ in 0..8 {
     228            8 :         scenario.update("main", 1_000);
     229            8 :     }
     230              : 
     231              :     //
     232              :     // The history looks like this now:
     233              :     //
     234              :     //         10000                                 18000
     235              :     // *----*----*------------------------------------*    main
     236              :     //           |
     237              :     //           |            11000
     238              :     //           +--------------     child
     239              :     //
     240              :     //
     241              :     // With retention horizon 1000, it's now cheapest to retain
     242              :     // separate snapshots on both branches:
     243              :     // - snapshot on main branch at LSN 17000 (size 5000)
     244              :     // - WAL on main between 17000-18000
     245              :     // - snapshot on child branch at LSN 10000 (size 5000)
     246              :     // - WAL on child between 10000-11000
     247              :     //
     248              :     // This is in total 5000 + 1000 + 5000 + 1000 = 12000
     249              :     //
     250              :     // (If we used the method from the previous scenario, and
     251              :     // kept only snapshot at the branch point, we'd need to keep
     252              :     // all the WAL between 10000-18000 on the main branch, so
     253              :     // the total size would be 5000 + 1000 + 8000 = 14000. The
     254              :     // calculation always picks the cheapest alternative)
     255              : 
     256            1 :     let (_model, result) = scenario.calculate(1000);
     257            1 : 
     258            1 :     assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
     259            1 : }
     260              : 
     261              : #[test]
     262            1 : fn scenario_5() {
     263            1 :     let mut scenario = ScenarioBuilder::new("a");
     264            1 :     scenario.insert("a", 5000);
     265            1 :     scenario.branch("a", "b");
     266            1 :     scenario.update("b", 4000);
     267            1 :     scenario.update("a", 2000);
     268            1 :     scenario.branch("a", "c");
     269            1 :     scenario.insert("c", 4000);
     270            1 :     scenario.insert("a", 2000);
     271            1 : 
     272            1 :     let (_model, result) = scenario.calculate(1000);
     273            1 : 
     274            1 :     assert_eq!(result.total_size, 17000);
     275            1 : }
     276              : 
     277              : #[test]
     278            1 : fn scenario_6() {
     279            1 :     let branches = [
     280            1 :         "7ff1edab8182025f15ae33482edb590a",
     281            1 :         "b1719e044db05401a05a2ed588a3ad3f",
     282            1 :         "0xb68d6691c895ad0a70809470020929ef",
     283            1 :     ];
     284            1 : 
     285            1 :     // compared to other scenarios, this one uses bytes instead of kB
     286            1 : 
     287            1 :     let mut scenario = ScenarioBuilder::new("");
     288            1 : 
     289            1 :     scenario.branch("", branches[0]); // at 0
     290            1 :     scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
     291            1 :     scenario.branch(branches[0], branches[1]); // at 108951064
     292            1 :     scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
     293            1 :     scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
     294            1 :     scenario.branch(branches[0], branches[2]); // at 283415424
     295            1 :     scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
     296            1 :     scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
     297            1 : 
     298            1 :     let (model, result) = scenario.calculate(100_000);
     299            1 : 
     300            1 :     // FIXME: We previously calculated 333_792_000. But with this PR, we get
     301            1 :     // a much lower number. At a quick look at the model output and the
     302            1 :     // calculations here, the new result seems correct to me.
     303            1 :     eprintln!(
     304            1 :         " MODEL: {}",
     305            1 :         serde_json::to_string(&model.segments).unwrap()
     306            1 :     );
     307            1 :     eprintln!(
     308            1 :         "RESULT: {}",
     309            1 :         serde_json::to_string(&result.segments).unwrap()
     310            1 :     );
     311            1 : 
     312            1 :     assert_eq!(result.total_size, 136_236_928);
     313            1 : }
        

Generated by: LCOV version 2.1-beta