LCOV - code coverage report
Current view: top level - libs/tenant_size_model/tests - tests.rs (source / functions) Coverage Total Hit
Test: d0feeceb9d5ee9c8e73bee7d4ffcced539793178.info Lines: 98.0 % 203 199
Test Date: 2024-06-26 15:19:01 Functions: 92.3 % 13 12

            Line data    Source code
       1              : //! Tenant size model tests.
       2              : 
       3              : use tenant_size_model::{Segment, SizeResult, StorageModel};
       4              : 
       5              : use std::collections::HashMap;
       6              : 
       7              : struct ScenarioBuilder {
       8              :     segments: Vec<Segment>,
       9              : 
      10              :     /// Mapping from the branch name to the index of a segment describing its latest state.
      11              :     branches: HashMap<String, usize>,
      12              : }
      13              : 
      14              : impl ScenarioBuilder {
      15              :     /// Creates a new storage with the given default branch name.
      16           12 :     pub fn new(initial_branch: &str) -> ScenarioBuilder {
      17           12 :         let init_segment = Segment {
      18           12 :             parent: None,
      19           12 :             lsn: 0,
      20           12 :             size: Some(0),
      21           12 :             needed: false, // determined later
      22           12 :         };
      23           12 : 
      24           12 :         ScenarioBuilder {
      25           12 :             segments: vec![init_segment],
      26           12 :             branches: HashMap::from([(initial_branch.into(), 0)]),
      27           12 :         }
      28           12 :     }
      29              : 
      30              :     /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
      31          102 :     pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
      32          102 :         let lastseg_id = *self.branches.get(branch).unwrap();
      33          102 :         let newseg_id = self.segments.len();
      34          102 :         let lastseg = &mut self.segments[lastseg_id];
      35          102 : 
      36          102 :         let newseg = Segment {
      37          102 :             parent: Some(lastseg_id),
      38          102 :             lsn: lastseg.lsn + lsn_bytes,
      39          102 :             size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
      40          102 :             needed: false,
      41          102 :         };
      42          102 : 
      43          102 :         self.segments.push(newseg);
      44          102 :         *self.branches.get_mut(branch).expect("read already") = newseg_id;
      45          102 :     }
      46              : 
      47           14 :     pub fn insert(&mut self, branch: &str, bytes: u64) {
      48           14 :         self.modify_branch(branch, bytes, bytes as i64);
      49           14 :     }
      50              : 
      51           78 :     pub fn update(&mut self, branch: &str, bytes: u64) {
      52           78 :         self.modify_branch(branch, bytes, 0i64);
      53           78 :     }
      54              : 
      55            0 :     pub fn _delete(&mut self, branch: &str, bytes: u64) {
      56            0 :         self.modify_branch(branch, bytes, -(bytes as i64));
      57            0 :     }
      58              : 
      59              :     /// Panics if the parent branch cannot be found.
      60           16 :     pub fn branch(&mut self, parent: &str, name: &str) {
      61           16 :         // Find the right segment
      62           16 :         let branchseg_id = *self
      63           16 :             .branches
      64           16 :             .get(parent)
      65           16 :             .expect("should had found the parent by key");
      66           16 :         let _branchseg = &mut self.segments[branchseg_id];
      67           16 : 
      68           16 :         // Create branch name for it
      69           16 :         self.branches.insert(name.to_string(), branchseg_id);
      70           16 :     }
      71              : 
      72           12 :     pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
      73              :         // Phase 1: Mark all the segments that need to be retained
      74           28 :         for (_branch, &last_seg_id) in self.branches.iter() {
      75           28 :             let last_seg = &self.segments[last_seg_id];
      76           28 :             let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
      77           28 :             let mut seg_id = last_seg_id;
      78           54 :             loop {
      79           54 :                 let seg = &mut self.segments[seg_id];
      80           54 :                 if seg.lsn <= cutoff_lsn {
      81           28 :                     break;
      82           26 :                 }
      83           26 :                 seg.needed = true;
      84           26 :                 if let Some(prev_seg_id) = seg.parent {
      85           26 :                     seg_id = prev_seg_id;
      86           26 :                 } else {
      87            0 :                     break;
      88              :                 }
      89              :             }
      90              :         }
      91              : 
      92              :         // Perform the calculation
      93           12 :         let storage_model = StorageModel {
      94           12 :             segments: self.segments.clone(),
      95           12 :         };
      96           12 :         let size_result = storage_model.calculate();
      97           12 :         (storage_model, size_result)
      98           12 :     }
      99              : }
     100              : 
     101              : // Main branch only. Some updates on it.
     102              : #[test]
     103            2 : fn scenario_1() {
     104            2 :     // Create main branch
     105            2 :     let mut scenario = ScenarioBuilder::new("main");
     106            2 : 
     107            2 :     // Bulk load 5 GB of data to it
     108            2 :     scenario.insert("main", 5_000);
     109              : 
     110              :     // Stream of updates
     111           12 :     for _ in 0..5 {
     112           10 :         scenario.update("main", 1_000);
     113           10 :     }
     114              : 
     115              :     // Calculate the synthetic size with retention horizon 1000
     116            2 :     let (_model, result) = scenario.calculate(1000);
     117            2 : 
     118            2 :     // The end of the branch is at LSN 10000. Need to retain
     119            2 :     // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
     120            2 :     // The logical snapshot has size 5000.
     121            2 :     assert_eq!(result.total_size, 5000 + 1000);
     122            2 : }
     123              : 
     124              : // Main branch only. Some updates on it.
     125              : #[test]
     126            2 : fn scenario_2() {
     127            2 :     // Create main branch
     128            2 :     let mut scenario = ScenarioBuilder::new("main");
     129            2 : 
     130            2 :     // Bulk load 5 GB of data to it
     131            2 :     scenario.insert("main", 5_000);
     132              : 
     133              :     // Stream of updates
     134           12 :     for _ in 0..5 {
     135           10 :         scenario.update("main", 1_000);
     136           10 :     }
     137              : 
     138              :     // Branch
     139            2 :     scenario.branch("main", "child");
     140            2 :     scenario.update("child", 1_000);
     141            2 : 
     142            2 :     // More updates on parent
     143            2 :     scenario.update("main", 1_000);
     144            2 : 
     145            2 :     //
     146            2 :     // The history looks like this now:
     147            2 :     //
     148            2 :     //         10000          11000
     149            2 :     // *----*----*--------------*    main
     150            2 :     //           |
     151            2 :     //           |            11000
     152            2 :     //           +--------------     child
     153            2 :     //
     154            2 :     //
     155            2 :     // With retention horizon 1000, we need to retain logical snapshot
     156            2 :     // at the branch point, size 5000, and the WAL from 10000-11000 on
     157            2 :     // both branches.
     158            2 :     let (_model, result) = scenario.calculate(1000);
     159            2 : 
     160            2 :     assert_eq!(result.total_size, 5000 + 1000 + 1000);
     161            2 : }
     162              : 
     163              : // Like 2, but more updates on main
     164              : #[test]
     165            2 : fn scenario_3() {
     166            2 :     // Create main branch
     167            2 :     let mut scenario = ScenarioBuilder::new("main");
     168            2 : 
     169            2 :     // Bulk load 5 GB of data to it
     170            2 :     scenario.insert("main", 5_000);
     171              : 
     172              :     // Stream of updates
     173           12 :     for _ in 0..5 {
     174           10 :         scenario.update("main", 1_000);
     175           10 :     }
     176              : 
     177              :     // Branch
     178            2 :     scenario.branch("main", "child");
     179            2 :     scenario.update("child", 1_000);
     180              : 
     181              :     // More updates on parent
     182           12 :     for _ in 0..5 {
     183           10 :         scenario.update("main", 1_000);
     184           10 :     }
     185              : 
     186              :     //
     187              :     // The history looks like this now:
     188              :     //
     189              :     //         10000                                 15000
     190              :     // *----*----*------------------------------------*    main
     191              :     //           |
     192              :     //           |            11000
     193              :     //           +--------------     child
     194              :     //
     195              :     //
     196              :     // With retention horizon 1000, it's still cheapest to retain
     197              :     // - snapshot at branch point (size 5000)
     198              :     // - WAL on child between 10000-11000
     199              :     // - WAL on main between 10000-15000
     200              :     //
     201              :     // This is in total 5000 + 1000 + 5000
     202              :     //
     203            2 :     let (_model, result) = scenario.calculate(1000);
     204            2 : 
     205            2 :     assert_eq!(result.total_size, 5000 + 1000 + 5000);
     206            2 : }
     207              : 
     208              : // Diverged branches
     209              : #[test]
     210            2 : fn scenario_4() {
     211            2 :     // Create main branch
     212            2 :     let mut scenario = ScenarioBuilder::new("main");
     213            2 : 
     214            2 :     // Bulk load 5 GB of data to it
     215            2 :     scenario.insert("main", 5_000);
     216              : 
     217              :     // Stream of updates
     218           12 :     for _ in 0..5 {
     219           10 :         scenario.update("main", 1_000);
     220           10 :     }
     221              : 
     222              :     // Branch
     223            2 :     scenario.branch("main", "child");
     224            2 :     scenario.update("child", 1_000);
     225              : 
     226              :     // More updates on parent
     227           18 :     for _ in 0..8 {
     228           16 :         scenario.update("main", 1_000);
     229           16 :     }
     230              : 
     231              :     //
     232              :     // The history looks like this now:
     233              :     //
     234              :     //         10000                                 18000
     235              :     // *----*----*------------------------------------*    main
     236              :     //           |
     237              :     //           |            11000
     238              :     //           +--------------     child
     239              :     //
     240              :     //
     241              :     // With retention horizon 1000, it's now cheapest to retain
     242              :     // separate snapshots on both branches:
     243              :     // - snapshot on main branch at LSN 17000 (size 5000)
     244              :     // - WAL on main between 17000-18000
     245              :     // - snapshot on child branch at LSN 10000 (size 5000)
     246              :     // - WAL on child between 10000-11000
     247              :     //
     248              :     // This is in total 5000 + 1000 + 5000 + 1000 = 12000
     249              :     //
     250              :     // (If we used the method from the previous scenario, and
     251              :     // kept only snapshot at the branch point, we'd need to keep
     252              :     // all the WAL between 10000-18000 on the main branch, so
     253              :     // the total size would be 5000 + 1000 + 8000 = 14000. The
     254              :     // calculation always picks the cheapest alternative)
     255              : 
     256            2 :     let (_model, result) = scenario.calculate(1000);
     257            2 : 
     258            2 :     assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
     259            2 : }
     260              : 
     261              : #[test]
     262            2 : fn scenario_5() {
     263            2 :     let mut scenario = ScenarioBuilder::new("a");
     264            2 :     scenario.insert("a", 5000);
     265            2 :     scenario.branch("a", "b");
     266            2 :     scenario.update("b", 4000);
     267            2 :     scenario.update("a", 2000);
     268            2 :     scenario.branch("a", "c");
     269            2 :     scenario.insert("c", 4000);
     270            2 :     scenario.insert("a", 2000);
     271            2 : 
     272            2 :     let (_model, result) = scenario.calculate(1000);
     273            2 : 
     274            2 :     assert_eq!(result.total_size, 17000);
     275            2 : }
     276              : 
     277              : #[test]
     278            2 : fn scenario_6() {
     279            2 :     let branches = [
     280            2 :         "7ff1edab8182025f15ae33482edb590a",
     281            2 :         "b1719e044db05401a05a2ed588a3ad3f",
     282            2 :         "0xb68d6691c895ad0a70809470020929ef",
     283            2 :     ];
     284            2 : 
     285            2 :     // compared to other scenarios, this one uses bytes instead of kB
     286            2 : 
     287            2 :     let mut scenario = ScenarioBuilder::new("");
     288            2 : 
     289            2 :     scenario.branch("", branches[0]); // at 0
     290            2 :     scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
     291            2 :     scenario.branch(branches[0], branches[1]); // at 108951064
     292            2 :     scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
     293            2 :     scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
     294            2 :     scenario.branch(branches[0], branches[2]); // at 283415424
     295            2 :     scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
     296            2 :     scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
     297            2 : 
     298            2 :     let (model, result) = scenario.calculate(100_000);
     299            2 : 
     300            2 :     // FIXME: We previously calculated 333_792_000. But with this PR, we get
     301            2 :     // a much lower number. At a quick look at the model output and the
     302            2 :     // calculations here, the new result seems correct to me.
     303            2 :     eprintln!(
     304            2 :         " MODEL: {}",
     305            2 :         serde_json::to_string(&model.segments).unwrap()
     306            2 :     );
     307            2 :     eprintln!(
     308            2 :         "RESULT: {}",
     309            2 :         serde_json::to_string(&result.segments).unwrap()
     310            2 :     );
     311            2 : 
     312            2 :     assert_eq!(result.total_size, 136_236_928);
     313            2 : }
        

Generated by: LCOV version 2.1-beta