Line data Source code
1 : //! Tenant size model tests.
2 :
3 : use tenant_size_model::{Segment, SizeResult, StorageModel};
4 :
5 : use std::collections::HashMap;
6 :
7 : struct ScenarioBuilder {
8 : segments: Vec<Segment>,
9 :
10 : /// Mapping from the branch name to the index of a segment describing its latest state.
11 : branches: HashMap<String, usize>,
12 : }
13 :
14 : impl ScenarioBuilder {
15 : /// Creates a new storage with the given default branch name.
16 36 : pub fn new(initial_branch: &str) -> ScenarioBuilder {
17 36 : let init_segment = Segment {
18 36 : parent: None,
19 36 : lsn: 0,
20 36 : size: Some(0),
21 36 : needed: false, // determined later
22 36 : };
23 36 :
24 36 : ScenarioBuilder {
25 36 : segments: vec![init_segment],
26 36 : branches: HashMap::from([(initial_branch.into(), 0)]),
27 36 : }
28 36 : }
29 :
30 : /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
31 306 : pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
32 306 : let lastseg_id = *self.branches.get(branch).unwrap();
33 306 : let newseg_id = self.segments.len();
34 306 : let lastseg = &mut self.segments[lastseg_id];
35 306 :
36 306 : let newseg = Segment {
37 306 : parent: Some(lastseg_id),
38 306 : lsn: lastseg.lsn + lsn_bytes,
39 306 : size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
40 306 : needed: false,
41 306 : };
42 306 :
43 306 : self.segments.push(newseg);
44 306 : *self.branches.get_mut(branch).expect("read already") = newseg_id;
45 306 : }
46 :
47 42 : pub fn insert(&mut self, branch: &str, bytes: u64) {
48 42 : self.modify_branch(branch, bytes, bytes as i64);
49 42 : }
50 :
51 234 : pub fn update(&mut self, branch: &str, bytes: u64) {
52 234 : self.modify_branch(branch, bytes, 0i64);
53 234 : }
54 :
55 0 : pub fn _delete(&mut self, branch: &str, bytes: u64) {
56 0 : self.modify_branch(branch, bytes, -(bytes as i64));
57 0 : }
58 :
59 : /// Panics if the parent branch cannot be found.
60 48 : pub fn branch(&mut self, parent: &str, name: &str) {
61 48 : // Find the right segment
62 48 : let branchseg_id = *self
63 48 : .branches
64 48 : .get(parent)
65 48 : .expect("should had found the parent by key");
66 48 : let _branchseg = &mut self.segments[branchseg_id];
67 48 :
68 48 : // Create branch name for it
69 48 : self.branches.insert(name.to_string(), branchseg_id);
70 48 : }
71 :
72 36 : pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
73 : // Phase 1: Mark all the segments that need to be retained
74 84 : for (_branch, &last_seg_id) in self.branches.iter() {
75 84 : let last_seg = &self.segments[last_seg_id];
76 84 : let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
77 84 : let mut seg_id = last_seg_id;
78 162 : loop {
79 162 : let seg = &mut self.segments[seg_id];
80 162 : if seg.lsn <= cutoff_lsn {
81 84 : break;
82 78 : }
83 78 : seg.needed = true;
84 78 : if let Some(prev_seg_id) = seg.parent {
85 78 : seg_id = prev_seg_id;
86 78 : } else {
87 0 : break;
88 : }
89 : }
90 : }
91 :
92 : // Perform the calculation
93 36 : let storage_model = StorageModel {
94 36 : segments: self.segments.clone(),
95 36 : };
96 36 : let size_result = storage_model.calculate();
97 36 : (storage_model, size_result)
98 36 : }
99 : }
100 :
101 : // Main branch only. Some updates on it.
102 : #[test]
103 6 : fn scenario_1() {
104 6 : // Create main branch
105 6 : let mut scenario = ScenarioBuilder::new("main");
106 6 :
107 6 : // Bulk load 5 GB of data to it
108 6 : scenario.insert("main", 5_000);
109 :
110 : // Stream of updates
111 36 : for _ in 0..5 {
112 30 : scenario.update("main", 1_000);
113 30 : }
114 :
115 : // Calculate the synthetic size with retention horizon 1000
116 6 : let (_model, result) = scenario.calculate(1000);
117 6 :
118 6 : // The end of the branch is at LSN 10000. Need to retain
119 6 : // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
120 6 : // The logical snapshot has size 5000.
121 6 : assert_eq!(result.total_size, 5000 + 1000);
122 6 : }
123 :
124 : // Main branch only. Some updates on it.
125 : #[test]
126 6 : fn scenario_2() {
127 6 : // Create main branch
128 6 : let mut scenario = ScenarioBuilder::new("main");
129 6 :
130 6 : // Bulk load 5 GB of data to it
131 6 : scenario.insert("main", 5_000);
132 :
133 : // Stream of updates
134 36 : for _ in 0..5 {
135 30 : scenario.update("main", 1_000);
136 30 : }
137 :
138 : // Branch
139 6 : scenario.branch("main", "child");
140 6 : scenario.update("child", 1_000);
141 6 :
142 6 : // More updates on parent
143 6 : scenario.update("main", 1_000);
144 6 :
145 6 : //
146 6 : // The history looks like this now:
147 6 : //
148 6 : // 10000 11000
149 6 : // *----*----*--------------* main
150 6 : // |
151 6 : // | 11000
152 6 : // +-------------- child
153 6 : //
154 6 : //
155 6 : // With retention horizon 1000, we need to retain logical snapshot
156 6 : // at the branch point, size 5000, and the WAL from 10000-11000 on
157 6 : // both branches.
158 6 : let (_model, result) = scenario.calculate(1000);
159 6 :
160 6 : assert_eq!(result.total_size, 5000 + 1000 + 1000);
161 6 : }
162 :
163 : // Like 2, but more updates on main
164 : #[test]
165 6 : fn scenario_3() {
166 6 : // Create main branch
167 6 : let mut scenario = ScenarioBuilder::new("main");
168 6 :
169 6 : // Bulk load 5 GB of data to it
170 6 : scenario.insert("main", 5_000);
171 :
172 : // Stream of updates
173 36 : for _ in 0..5 {
174 30 : scenario.update("main", 1_000);
175 30 : }
176 :
177 : // Branch
178 6 : scenario.branch("main", "child");
179 6 : scenario.update("child", 1_000);
180 :
181 : // More updates on parent
182 36 : for _ in 0..5 {
183 30 : scenario.update("main", 1_000);
184 30 : }
185 :
186 : //
187 : // The history looks like this now:
188 : //
189 : // 10000 15000
190 : // *----*----*------------------------------------* main
191 : // |
192 : // | 11000
193 : // +-------------- child
194 : //
195 : //
196 : // With retention horizon 1000, it's still cheapest to retain
197 : // - snapshot at branch point (size 5000)
198 : // - WAL on child between 10000-11000
199 : // - WAL on main between 10000-15000
200 : //
201 : // This is in total 5000 + 1000 + 5000
202 : //
203 6 : let (_model, result) = scenario.calculate(1000);
204 6 :
205 6 : assert_eq!(result.total_size, 5000 + 1000 + 5000);
206 6 : }
207 :
208 : // Diverged branches
209 : #[test]
210 6 : fn scenario_4() {
211 6 : // Create main branch
212 6 : let mut scenario = ScenarioBuilder::new("main");
213 6 :
214 6 : // Bulk load 5 GB of data to it
215 6 : scenario.insert("main", 5_000);
216 :
217 : // Stream of updates
218 36 : for _ in 0..5 {
219 30 : scenario.update("main", 1_000);
220 30 : }
221 :
222 : // Branch
223 6 : scenario.branch("main", "child");
224 6 : scenario.update("child", 1_000);
225 :
226 : // More updates on parent
227 54 : for _ in 0..8 {
228 48 : scenario.update("main", 1_000);
229 48 : }
230 :
231 : //
232 : // The history looks like this now:
233 : //
234 : // 10000 18000
235 : // *----*----*------------------------------------* main
236 : // |
237 : // | 11000
238 : // +-------------- child
239 : //
240 : //
241 : // With retention horizon 1000, it's now cheapest to retain
242 : // separate snapshots on both branches:
243 : // - snapshot on main branch at LSN 17000 (size 5000)
244 : // - WAL on main between 17000-18000
245 : // - snapshot on child branch at LSN 10000 (size 5000)
246 : // - WAL on child between 10000-11000
247 : //
248 : // This is in total 5000 + 1000 + 5000 + 1000 = 12000
249 : //
250 : // (If we used the method from the previous scenario, and
251 : // kept only snapshot at the branch point, we'd need to keep
252 : // all the WAL between 10000-18000 on the main branch, so
253 : // the total size would be 5000 + 1000 + 8000 = 14000. The
254 : // calculation always picks the cheapest alternative)
255 :
256 6 : let (_model, result) = scenario.calculate(1000);
257 6 :
258 6 : assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
259 6 : }
260 :
261 : #[test]
262 6 : fn scenario_5() {
263 6 : let mut scenario = ScenarioBuilder::new("a");
264 6 : scenario.insert("a", 5000);
265 6 : scenario.branch("a", "b");
266 6 : scenario.update("b", 4000);
267 6 : scenario.update("a", 2000);
268 6 : scenario.branch("a", "c");
269 6 : scenario.insert("c", 4000);
270 6 : scenario.insert("a", 2000);
271 6 :
272 6 : let (_model, result) = scenario.calculate(1000);
273 6 :
274 6 : assert_eq!(result.total_size, 17000);
275 6 : }
276 :
277 : #[test]
278 6 : fn scenario_6() {
279 6 : let branches = [
280 6 : "7ff1edab8182025f15ae33482edb590a",
281 6 : "b1719e044db05401a05a2ed588a3ad3f",
282 6 : "0xb68d6691c895ad0a70809470020929ef",
283 6 : ];
284 6 :
285 6 : // compared to other scenarios, this one uses bytes instead of kB
286 6 :
287 6 : let mut scenario = ScenarioBuilder::new("");
288 6 :
289 6 : scenario.branch("", branches[0]); // at 0
290 6 : scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
291 6 : scenario.branch(branches[0], branches[1]); // at 108951064
292 6 : scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
293 6 : scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
294 6 : scenario.branch(branches[0], branches[2]); // at 283415424
295 6 : scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
296 6 : scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
297 6 :
298 6 : let (model, result) = scenario.calculate(100_000);
299 6 :
300 6 : // FIXME: We previously calculated 333_792_000. But with this PR, we get
301 6 : // a much lower number. At a quick look at the model output and the
302 6 : // calculations here, the new result seems correct to me.
303 6 : eprintln!(
304 6 : " MODEL: {}",
305 6 : serde_json::to_string(&model.segments).unwrap()
306 6 : );
307 6 : eprintln!(
308 6 : "RESULT: {}",
309 6 : serde_json::to_string(&result.segments).unwrap()
310 6 : );
311 6 :
312 6 : assert_eq!(result.total_size, 136_236_928);
313 6 : }
|