LCOV - differential code coverage report
Current view: top level - libs/tenant_size_model/tests - tests.rs (source / functions) Coverage Total Hit UBC CBC
Current: f6946e90941b557c917ac98cd5a7e9506d180f3e.info Lines: 98.1 % 209 205 4 205
Current Date: 2023-10-19 02:04:12 Functions: 94.7 % 19 18 1 18
Baseline: c8637f37369098875162f194f92736355783b050.info
Baseline Date: 2023-10-18 20:25:20

           TLA  Line data    Source code
       1                 : //! Tenant size model tests.
       2                 : 
       3                 : use tenant_size_model::{Segment, SizeResult, StorageModel};
       4                 : 
       5                 : use std::collections::HashMap;
       6                 : 
       7                 : struct ScenarioBuilder {
       8                 :     segments: Vec<Segment>,
       9                 : 
      10                 :     /// Mapping from the branch name to the index of a segment describing its latest state.
      11                 :     branches: HashMap<String, usize>,
      12                 : }
      13                 : 
      14                 : impl ScenarioBuilder {
      15                 :     /// Creates a new storage with the given default branch name.
      16 CBC           6 :     pub fn new(initial_branch: &str) -> ScenarioBuilder {
      17               6 :         let init_segment = Segment {
      18               6 :             parent: None,
      19               6 :             lsn: 0,
      20               6 :             size: Some(0),
      21               6 :             needed: false, // determined later
      22               6 :         };
      23               6 : 
      24               6 :         ScenarioBuilder {
      25               6 :             segments: vec![init_segment],
      26               6 :             branches: HashMap::from([(initial_branch.into(), 0)]),
      27               6 :         }
      28               6 :     }
      29                 : 
      30                 :     /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
      31              51 :     pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
      32              51 :         let lastseg_id = *self.branches.get(branch).unwrap();
      33              51 :         let newseg_id = self.segments.len();
      34              51 :         let lastseg = &mut self.segments[lastseg_id];
      35              51 : 
      36              51 :         let newseg = Segment {
      37              51 :             parent: Some(lastseg_id),
      38              51 :             lsn: lastseg.lsn + lsn_bytes,
      39              51 :             size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
      40              51 :             needed: false,
      41              51 :         };
      42              51 : 
      43              51 :         self.segments.push(newseg);
      44              51 :         *self.branches.get_mut(branch).expect("read already") = newseg_id;
      45              51 :     }
      46                 : 
      47               7 :     pub fn insert(&mut self, branch: &str, bytes: u64) {
      48               7 :         self.modify_branch(branch, bytes, bytes as i64);
      49               7 :     }
      50                 : 
      51              39 :     pub fn update(&mut self, branch: &str, bytes: u64) {
      52              39 :         self.modify_branch(branch, bytes, 0i64);
      53              39 :     }
      54                 : 
      55 UBC           0 :     pub fn _delete(&mut self, branch: &str, bytes: u64) {
      56               0 :         self.modify_branch(branch, bytes, -(bytes as i64));
      57               0 :     }
      58                 : 
      59                 :     /// Panics if the parent branch cannot be found.
      60 CBC           8 :     pub fn branch(&mut self, parent: &str, name: &str) {
      61               8 :         // Find the right segment
      62               8 :         let branchseg_id = *self
      63               8 :             .branches
      64               8 :             .get(parent)
      65               8 :             .expect("should had found the parent by key");
      66               8 :         let _branchseg = &mut self.segments[branchseg_id];
      67               8 : 
      68               8 :         // Create branch name for it
      69               8 :         self.branches.insert(name.to_string(), branchseg_id);
      70               8 :     }
      71                 : 
      72               6 :     pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
      73                 :         // Phase 1: Mark all the segments that need to be retained
      74              14 :         for (_branch, &last_seg_id) in self.branches.iter() {
      75              14 :             let last_seg = &self.segments[last_seg_id];
      76              14 :             let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
      77              14 :             let mut seg_id = last_seg_id;
      78              27 :             loop {
      79              27 :                 let seg = &mut self.segments[seg_id];
      80              27 :                 if seg.lsn <= cutoff_lsn {
      81              14 :                     break;
      82              13 :                 }
      83              13 :                 seg.needed = true;
      84              13 :                 if let Some(prev_seg_id) = seg.parent {
      85              13 :                     seg_id = prev_seg_id;
      86              13 :                 } else {
      87 UBC           0 :                     break;
      88                 :                 }
      89                 :             }
      90                 :         }
      91                 : 
      92                 :         // Perform the calculation
      93 CBC           6 :         let storage_model = StorageModel {
      94               6 :             segments: self.segments.clone(),
      95               6 :         };
      96               6 :         let size_result = storage_model.calculate();
      97               6 :         (storage_model, size_result)
      98               6 :     }
      99                 : }
     100                 : 
     101                 : // Main branch only. Some updates on it.
     102               1 : #[test]
     103               1 : fn scenario_1() {
     104               1 :     // Create main branch
     105               1 :     let mut scenario = ScenarioBuilder::new("main");
     106               1 : 
     107               1 :     // Bulk load 5 GB of data to it
     108               1 :     scenario.insert("main", 5_000);
     109                 : 
     110                 :     // Stream of updates
     111               6 :     for _ in 0..5 {
     112               5 :         scenario.update("main", 1_000);
     113               5 :     }
     114                 : 
     115                 :     // Calculate the synthetic size with retention horizon 1000
     116               1 :     let (_model, result) = scenario.calculate(1000);
     117               1 : 
     118               1 :     // The end of the branch is at LSN 10000. Need to retain
     119               1 :     // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
     120               1 :     // The logical snapshot has size 5000.
     121               1 :     assert_eq!(result.total_size, 5000 + 1000);
     122               1 : }
     123                 : 
     124                 : // Main branch only. Some updates on it.
     125               1 : #[test]
     126               1 : fn scenario_2() {
     127               1 :     // Create main branch
     128               1 :     let mut scenario = ScenarioBuilder::new("main");
     129               1 : 
     130               1 :     // Bulk load 5 GB of data to it
     131               1 :     scenario.insert("main", 5_000);
     132                 : 
     133                 :     // Stream of updates
     134               6 :     for _ in 0..5 {
     135               5 :         scenario.update("main", 1_000);
     136               5 :     }
     137                 : 
     138                 :     // Branch
     139               1 :     scenario.branch("main", "child");
     140               1 :     scenario.update("child", 1_000);
     141               1 : 
     142               1 :     // More updates on parent
     143               1 :     scenario.update("main", 1_000);
     144               1 : 
     145               1 :     //
     146               1 :     // The history looks like this now:
     147               1 :     //
     148               1 :     //         10000          11000
     149               1 :     // *----*----*--------------*    main
     150               1 :     //           |
     151               1 :     //           |            11000
     152               1 :     //           +--------------     child
     153               1 :     //
     154               1 :     //
     155               1 :     // With retention horizon 1000, we need to retain logical snapshot
     156               1 :     // at the branch point, size 5000, and the WAL from 10000-11000 on
     157               1 :     // both branches.
     158               1 :     let (_model, result) = scenario.calculate(1000);
     159               1 : 
     160               1 :     assert_eq!(result.total_size, 5000 + 1000 + 1000);
     161               1 : }
     162                 : 
     163                 : // Like 2, but more updates on main
     164               1 : #[test]
     165               1 : fn scenario_3() {
     166               1 :     // Create main branch
     167               1 :     let mut scenario = ScenarioBuilder::new("main");
     168               1 : 
     169               1 :     // Bulk load 5 GB of data to it
     170               1 :     scenario.insert("main", 5_000);
     171                 : 
     172                 :     // Stream of updates
     173               6 :     for _ in 0..5 {
     174               5 :         scenario.update("main", 1_000);
     175               5 :     }
     176                 : 
     177                 :     // Branch
     178               1 :     scenario.branch("main", "child");
     179               1 :     scenario.update("child", 1_000);
     180                 : 
     181                 :     // More updates on parent
     182               6 :     for _ in 0..5 {
     183               5 :         scenario.update("main", 1_000);
     184               5 :     }
     185                 : 
     186                 :     //
     187                 :     // The history looks like this now:
     188                 :     //
     189                 :     //         10000                                 15000
     190                 :     // *----*----*------------------------------------*    main
     191                 :     //           |
     192                 :     //           |            11000
     193                 :     //           +--------------     child
     194                 :     //
     195                 :     //
     196                 :     // With retention horizon 1000, it's still cheapest to retain
     197                 :     // - snapshot at branch point (size 5000)
     198                 :     // - WAL on child between 10000-11000
     199                 :     // - WAL on main between 10000-15000
     200                 :     //
     201                 :     // This is in total 5000 + 1000 + 5000
     202                 :     //
     203               1 :     let (_model, result) = scenario.calculate(1000);
     204               1 : 
     205               1 :     assert_eq!(result.total_size, 5000 + 1000 + 5000);
     206               1 : }
     207                 : 
     208                 : // Diverged branches
     209               1 : #[test]
     210               1 : fn scenario_4() {
     211               1 :     // Create main branch
     212               1 :     let mut scenario = ScenarioBuilder::new("main");
     213               1 : 
     214               1 :     // Bulk load 5 GB of data to it
     215               1 :     scenario.insert("main", 5_000);
     216                 : 
     217                 :     // Stream of updates
     218               6 :     for _ in 0..5 {
     219               5 :         scenario.update("main", 1_000);
     220               5 :     }
     221                 : 
     222                 :     // Branch
     223               1 :     scenario.branch("main", "child");
     224               1 :     scenario.update("child", 1_000);
     225                 : 
     226                 :     // More updates on parent
     227               9 :     for _ in 0..8 {
     228               8 :         scenario.update("main", 1_000);
     229               8 :     }
     230                 : 
     231                 :     //
     232                 :     // The history looks like this now:
     233                 :     //
     234                 :     //         10000                                 18000
     235                 :     // *----*----*------------------------------------*    main
     236                 :     //           |
     237                 :     //           |            11000
     238                 :     //           +--------------     child
     239                 :     //
     240                 :     //
     241                 :     // With retention horizon 1000, it's now cheapest to retain
     242                 :     // separate snapshots on both branches:
     243                 :     // - snapshot on main branch at LSN 17000 (size 5000)
     244                 :     // - WAL on main between 17000-18000
     245                 :     // - snapshot on child branch at LSN 10000 (size 5000)
     246                 :     // - WAL on child between 10000-11000
     247                 :     //
     248                 :     // This is in total 5000 + 1000 + 5000 + 1000 = 12000
     249                 :     //
     250                 :     // (If we used the the method from the previous scenario, and
     251                 :     // kept only snapshot at the branch point, we'd need to keep
     252                 :     // all the WAL between 10000-18000 on the main branch, so
     253                 :     // the total size would be 5000 + 1000 + 8000 = 14000. The
     254                 :     // calculation always picks the cheapest alternative)
     255                 : 
     256               1 :     let (_model, result) = scenario.calculate(1000);
     257               1 : 
     258               1 :     assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
     259               1 : }
     260                 : 
     261               1 : #[test]
     262               1 : fn scenario_5() {
     263               1 :     let mut scenario = ScenarioBuilder::new("a");
     264               1 :     scenario.insert("a", 5000);
     265               1 :     scenario.branch("a", "b");
     266               1 :     scenario.update("b", 4000);
     267               1 :     scenario.update("a", 2000);
     268               1 :     scenario.branch("a", "c");
     269               1 :     scenario.insert("c", 4000);
     270               1 :     scenario.insert("a", 2000);
     271               1 : 
     272               1 :     let (_model, result) = scenario.calculate(1000);
     273               1 : 
     274               1 :     assert_eq!(result.total_size, 17000);
     275               1 : }
     276                 : 
     277               1 : #[test]
     278               1 : fn scenario_6() {
     279               1 :     let branches = [
     280               1 :         "7ff1edab8182025f15ae33482edb590a",
     281               1 :         "b1719e044db05401a05a2ed588a3ad3f",
     282               1 :         "0xb68d6691c895ad0a70809470020929ef",
     283               1 :     ];
     284               1 : 
     285               1 :     // compared to other scenarios, this one uses bytes instead of kB
     286               1 : 
     287               1 :     let mut scenario = ScenarioBuilder::new("");
     288               1 : 
     289               1 :     scenario.branch("", branches[0]); // at 0
     290               1 :     scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
     291               1 :     scenario.branch(branches[0], branches[1]); // at 108951064
     292               1 :     scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
     293               1 :     scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
     294               1 :     scenario.branch(branches[0], branches[2]); // at 283415424
     295               1 :     scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
     296               1 :     scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
     297               1 : 
     298               1 :     let (model, result) = scenario.calculate(100_000);
     299               1 : 
     300               1 :     // FIXME: We previously calculated 333_792_000. But with this PR, we get
     301               1 :     // a much lower number. At a quick look at the model output and the
     302               1 :     // calculations here, the new result seems correct to me.
     303               1 :     eprintln!(
     304               1 :         " MODEL: {}",
     305               1 :         serde_json::to_string(&model.segments).unwrap()
     306               1 :     );
     307               1 :     eprintln!(
     308               1 :         "RESULT: {}",
     309               1 :         serde_json::to_string(&result.segments).unwrap()
     310               1 :     );
     311               1 : 
     312               1 :     assert_eq!(result.total_size, 136_236_928);
     313               1 : }
        

Generated by: LCOV version 2.1-beta