LCOV - code coverage report
Current view: top level - libs/tenant_size_model/tests - tests.rs (source / functions) Coverage Total Hit
Test: 2aa98e37cd3250b9a68c97ef6050b16fe702ab33.info Lines: 98.0 % 203 199
Test Date: 2024-08-29 11:33:10 Functions: 92.3 % 13 12

            Line data    Source code
       1              : //! Tenant size model tests.
       2              : 
       3              : use tenant_size_model::{Segment, SizeResult, StorageModel};
       4              : 
       5              : use std::collections::HashMap;
       6              : 
       7              : struct ScenarioBuilder {
       8              :     segments: Vec<Segment>,
       9              : 
      10              :     /// Mapping from the branch name to the index of a segment describing its latest state.
      11              :     branches: HashMap<String, usize>,
      12              : }
      13              : 
      14              : impl ScenarioBuilder {
      15              :     /// Creates a new storage with the given default branch name.
      16           36 :     pub fn new(initial_branch: &str) -> ScenarioBuilder {
      17           36 :         let init_segment = Segment {
      18           36 :             parent: None,
      19           36 :             lsn: 0,
      20           36 :             size: Some(0),
      21           36 :             needed: false, // determined later
      22           36 :         };
      23           36 : 
      24           36 :         ScenarioBuilder {
      25           36 :             segments: vec![init_segment],
      26           36 :             branches: HashMap::from([(initial_branch.into(), 0)]),
      27           36 :         }
      28           36 :     }
      29              : 
      30              :     /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
      31          306 :     pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
      32          306 :         let lastseg_id = *self.branches.get(branch).unwrap();
      33          306 :         let newseg_id = self.segments.len();
      34          306 :         let lastseg = &mut self.segments[lastseg_id];
      35          306 : 
      36          306 :         let newseg = Segment {
      37          306 :             parent: Some(lastseg_id),
      38          306 :             lsn: lastseg.lsn + lsn_bytes,
      39          306 :             size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
      40          306 :             needed: false,
      41          306 :         };
      42          306 : 
      43          306 :         self.segments.push(newseg);
      44          306 :         *self.branches.get_mut(branch).expect("read already") = newseg_id;
      45          306 :     }
      46              : 
      47           42 :     pub fn insert(&mut self, branch: &str, bytes: u64) {
      48           42 :         self.modify_branch(branch, bytes, bytes as i64);
      49           42 :     }
      50              : 
      51          234 :     pub fn update(&mut self, branch: &str, bytes: u64) {
      52          234 :         self.modify_branch(branch, bytes, 0i64);
      53          234 :     }
      54              : 
      55            0 :     pub fn _delete(&mut self, branch: &str, bytes: u64) {
      56            0 :         self.modify_branch(branch, bytes, -(bytes as i64));
      57            0 :     }
      58              : 
      59              :     /// Panics if the parent branch cannot be found.
      60           48 :     pub fn branch(&mut self, parent: &str, name: &str) {
      61           48 :         // Find the right segment
      62           48 :         let branchseg_id = *self
      63           48 :             .branches
      64           48 :             .get(parent)
      65           48 :             .expect("should had found the parent by key");
      66           48 :         let _branchseg = &mut self.segments[branchseg_id];
      67           48 : 
      68           48 :         // Create branch name for it
      69           48 :         self.branches.insert(name.to_string(), branchseg_id);
      70           48 :     }
      71              : 
      72           36 :     pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
      73              :         // Phase 1: Mark all the segments that need to be retained
      74           84 :         for (_branch, &last_seg_id) in self.branches.iter() {
      75           84 :             let last_seg = &self.segments[last_seg_id];
      76           84 :             let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
      77           84 :             let mut seg_id = last_seg_id;
      78          162 :             loop {
      79          162 :                 let seg = &mut self.segments[seg_id];
      80          162 :                 if seg.lsn <= cutoff_lsn {
      81           84 :                     break;
      82           78 :                 }
      83           78 :                 seg.needed = true;
      84           78 :                 if let Some(prev_seg_id) = seg.parent {
      85           78 :                     seg_id = prev_seg_id;
      86           78 :                 } else {
      87            0 :                     break;
      88              :                 }
      89              :             }
      90              :         }
      91              : 
      92              :         // Perform the calculation
      93           36 :         let storage_model = StorageModel {
      94           36 :             segments: self.segments.clone(),
      95           36 :         };
      96           36 :         let size_result = storage_model.calculate();
      97           36 :         (storage_model, size_result)
      98           36 :     }
      99              : }
     100              : 
     101              : // Main branch only. Some updates on it.
     102              : #[test]
     103            6 : fn scenario_1() {
     104            6 :     // Create main branch
     105            6 :     let mut scenario = ScenarioBuilder::new("main");
     106            6 : 
     107            6 :     // Bulk load 5 GB of data to it
     108            6 :     scenario.insert("main", 5_000);
     109              : 
     110              :     // Stream of updates
     111           36 :     for _ in 0..5 {
     112           30 :         scenario.update("main", 1_000);
     113           30 :     }
     114              : 
     115              :     // Calculate the synthetic size with retention horizon 1000
     116            6 :     let (_model, result) = scenario.calculate(1000);
     117            6 : 
     118            6 :     // The end of the branch is at LSN 10000. Need to retain
     119            6 :     // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
     120            6 :     // The logical snapshot has size 5000.
     121            6 :     assert_eq!(result.total_size, 5000 + 1000);
     122            6 : }
     123              : 
     124              : // Main branch only. Some updates on it.
     125              : #[test]
     126            6 : fn scenario_2() {
     127            6 :     // Create main branch
     128            6 :     let mut scenario = ScenarioBuilder::new("main");
     129            6 : 
     130            6 :     // Bulk load 5 GB of data to it
     131            6 :     scenario.insert("main", 5_000);
     132              : 
     133              :     // Stream of updates
     134           36 :     for _ in 0..5 {
     135           30 :         scenario.update("main", 1_000);
     136           30 :     }
     137              : 
     138              :     // Branch
     139            6 :     scenario.branch("main", "child");
     140            6 :     scenario.update("child", 1_000);
     141            6 : 
     142            6 :     // More updates on parent
     143            6 :     scenario.update("main", 1_000);
     144            6 : 
     145            6 :     //
     146            6 :     // The history looks like this now:
     147            6 :     //
     148            6 :     //         10000          11000
     149            6 :     // *----*----*--------------*    main
     150            6 :     //           |
     151            6 :     //           |            11000
     152            6 :     //           +--------------     child
     153            6 :     //
     154            6 :     //
     155            6 :     // With retention horizon 1000, we need to retain logical snapshot
     156            6 :     // at the branch point, size 5000, and the WAL from 10000-11000 on
     157            6 :     // both branches.
     158            6 :     let (_model, result) = scenario.calculate(1000);
     159            6 : 
     160            6 :     assert_eq!(result.total_size, 5000 + 1000 + 1000);
     161            6 : }
     162              : 
     163              : // Like 2, but more updates on main
     164              : #[test]
     165            6 : fn scenario_3() {
     166            6 :     // Create main branch
     167            6 :     let mut scenario = ScenarioBuilder::new("main");
     168            6 : 
     169            6 :     // Bulk load 5 GB of data to it
     170            6 :     scenario.insert("main", 5_000);
     171              : 
     172              :     // Stream of updates
     173           36 :     for _ in 0..5 {
     174           30 :         scenario.update("main", 1_000);
     175           30 :     }
     176              : 
     177              :     // Branch
     178            6 :     scenario.branch("main", "child");
     179            6 :     scenario.update("child", 1_000);
     180              : 
     181              :     // More updates on parent
     182           36 :     for _ in 0..5 {
     183           30 :         scenario.update("main", 1_000);
     184           30 :     }
     185              : 
     186              :     //
     187              :     // The history looks like this now:
     188              :     //
     189              :     //         10000                                 15000
     190              :     // *----*----*------------------------------------*    main
     191              :     //           |
     192              :     //           |            11000
     193              :     //           +--------------     child
     194              :     //
     195              :     //
     196              :     // With retention horizon 1000, it's still cheapest to retain
     197              :     // - snapshot at branch point (size 5000)
     198              :     // - WAL on child between 10000-11000
     199              :     // - WAL on main between 10000-15000
     200              :     //
     201              :     // This is in total 5000 + 1000 + 5000
     202              :     //
     203            6 :     let (_model, result) = scenario.calculate(1000);
     204            6 : 
     205            6 :     assert_eq!(result.total_size, 5000 + 1000 + 5000);
     206            6 : }
     207              : 
     208              : // Diverged branches
     209              : #[test]
     210            6 : fn scenario_4() {
     211            6 :     // Create main branch
     212            6 :     let mut scenario = ScenarioBuilder::new("main");
     213            6 : 
     214            6 :     // Bulk load 5 GB of data to it
     215            6 :     scenario.insert("main", 5_000);
     216              : 
     217              :     // Stream of updates
     218           36 :     for _ in 0..5 {
     219           30 :         scenario.update("main", 1_000);
     220           30 :     }
     221              : 
     222              :     // Branch
     223            6 :     scenario.branch("main", "child");
     224            6 :     scenario.update("child", 1_000);
     225              : 
     226              :     // More updates on parent
     227           54 :     for _ in 0..8 {
     228           48 :         scenario.update("main", 1_000);
     229           48 :     }
     230              : 
     231              :     //
     232              :     // The history looks like this now:
     233              :     //
     234              :     //         10000                                 18000
     235              :     // *----*----*------------------------------------*    main
     236              :     //           |
     237              :     //           |            11000
     238              :     //           +--------------     child
     239              :     //
     240              :     //
     241              :     // With retention horizon 1000, it's now cheapest to retain
     242              :     // separate snapshots on both branches:
     243              :     // - snapshot on main branch at LSN 17000 (size 5000)
     244              :     // - WAL on main between 17000-18000
     245              :     // - snapshot on child branch at LSN 10000 (size 5000)
     246              :     // - WAL on child between 10000-11000
     247              :     //
     248              :     // This is in total 5000 + 1000 + 5000 + 1000 = 12000
     249              :     //
     250              :     // (If we used the method from the previous scenario, and
     251              :     // kept only snapshot at the branch point, we'd need to keep
     252              :     // all the WAL between 10000-18000 on the main branch, so
     253              :     // the total size would be 5000 + 1000 + 8000 = 14000. The
     254              :     // calculation always picks the cheapest alternative)
     255              : 
     256            6 :     let (_model, result) = scenario.calculate(1000);
     257            6 : 
     258            6 :     assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
     259            6 : }
     260              : 
     261              : #[test]
     262            6 : fn scenario_5() {
     263            6 :     let mut scenario = ScenarioBuilder::new("a");
     264            6 :     scenario.insert("a", 5000);
     265            6 :     scenario.branch("a", "b");
     266            6 :     scenario.update("b", 4000);
     267            6 :     scenario.update("a", 2000);
     268            6 :     scenario.branch("a", "c");
     269            6 :     scenario.insert("c", 4000);
     270            6 :     scenario.insert("a", 2000);
     271            6 : 
     272            6 :     let (_model, result) = scenario.calculate(1000);
     273            6 : 
     274            6 :     assert_eq!(result.total_size, 17000);
     275            6 : }
     276              : 
     277              : #[test]
     278            6 : fn scenario_6() {
     279            6 :     let branches = [
     280            6 :         "7ff1edab8182025f15ae33482edb590a",
     281            6 :         "b1719e044db05401a05a2ed588a3ad3f",
     282            6 :         "0xb68d6691c895ad0a70809470020929ef",
     283            6 :     ];
     284            6 : 
     285            6 :     // compared to other scenarios, this one uses bytes instead of kB
     286            6 : 
     287            6 :     let mut scenario = ScenarioBuilder::new("");
     288            6 : 
     289            6 :     scenario.branch("", branches[0]); // at 0
     290            6 :     scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
     291            6 :     scenario.branch(branches[0], branches[1]); // at 108951064
     292            6 :     scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
     293            6 :     scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
     294            6 :     scenario.branch(branches[0], branches[2]); // at 283415424
     295            6 :     scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
     296            6 :     scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
     297            6 : 
     298            6 :     let (model, result) = scenario.calculate(100_000);
     299            6 : 
     300            6 :     // FIXME: We previously calculated 333_792_000. But with this PR, we get
     301            6 :     // a much lower number. At a quick look at the model output and the
     302            6 :     // calculations here, the new result seems correct to me.
     303            6 :     eprintln!(
     304            6 :         " MODEL: {}",
     305            6 :         serde_json::to_string(&model.segments).unwrap()
     306            6 :     );
     307            6 :     eprintln!(
     308            6 :         "RESULT: {}",
     309            6 :         serde_json::to_string(&result.segments).unwrap()
     310            6 :     );
     311            6 : 
     312            6 :     assert_eq!(result.total_size, 136_236_928);
     313            6 : }
        

Generated by: LCOV version 2.1-beta