Line data Source code
1 : //! Tenant size model tests.
2 :
3 : use tenant_size_model::{Segment, SizeResult, StorageModel};
4 :
5 : use std::collections::HashMap;
6 :
7 : struct ScenarioBuilder {
8 : segments: Vec<Segment>,
9 :
10 : /// Mapping from the branch name to the index of a segment describing its latest state.
11 : branches: HashMap<String, usize>,
12 : }
13 :
14 : impl ScenarioBuilder {
15 : /// Creates a new storage with the given default branch name.
16 12 : pub fn new(initial_branch: &str) -> ScenarioBuilder {
17 12 : let init_segment = Segment {
18 12 : parent: None,
19 12 : lsn: 0,
20 12 : size: Some(0),
21 12 : needed: false, // determined later
22 12 : };
23 12 :
24 12 : ScenarioBuilder {
25 12 : segments: vec![init_segment],
26 12 : branches: HashMap::from([(initial_branch.into(), 0)]),
27 12 : }
28 12 : }
29 :
30 : /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
31 102 : pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
32 102 : let lastseg_id = *self.branches.get(branch).unwrap();
33 102 : let newseg_id = self.segments.len();
34 102 : let lastseg = &mut self.segments[lastseg_id];
35 102 :
36 102 : let newseg = Segment {
37 102 : parent: Some(lastseg_id),
38 102 : lsn: lastseg.lsn + lsn_bytes,
39 102 : size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
40 102 : needed: false,
41 102 : };
42 102 :
43 102 : self.segments.push(newseg);
44 102 : *self.branches.get_mut(branch).expect("read already") = newseg_id;
45 102 : }
46 :
47 14 : pub fn insert(&mut self, branch: &str, bytes: u64) {
48 14 : self.modify_branch(branch, bytes, bytes as i64);
49 14 : }
50 :
51 78 : pub fn update(&mut self, branch: &str, bytes: u64) {
52 78 : self.modify_branch(branch, bytes, 0i64);
53 78 : }
54 :
55 0 : pub fn _delete(&mut self, branch: &str, bytes: u64) {
56 0 : self.modify_branch(branch, bytes, -(bytes as i64));
57 0 : }
58 :
59 : /// Panics if the parent branch cannot be found.
60 16 : pub fn branch(&mut self, parent: &str, name: &str) {
61 16 : // Find the right segment
62 16 : let branchseg_id = *self
63 16 : .branches
64 16 : .get(parent)
65 16 : .expect("should had found the parent by key");
66 16 : let _branchseg = &mut self.segments[branchseg_id];
67 16 :
68 16 : // Create branch name for it
69 16 : self.branches.insert(name.to_string(), branchseg_id);
70 16 : }
71 :
72 12 : pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
73 : // Phase 1: Mark all the segments that need to be retained
74 28 : for (_branch, &last_seg_id) in self.branches.iter() {
75 28 : let last_seg = &self.segments[last_seg_id];
76 28 : let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
77 28 : let mut seg_id = last_seg_id;
78 54 : loop {
79 54 : let seg = &mut self.segments[seg_id];
80 54 : if seg.lsn <= cutoff_lsn {
81 28 : break;
82 26 : }
83 26 : seg.needed = true;
84 26 : if let Some(prev_seg_id) = seg.parent {
85 26 : seg_id = prev_seg_id;
86 26 : } else {
87 0 : break;
88 : }
89 : }
90 : }
91 :
92 : // Perform the calculation
93 12 : let storage_model = StorageModel {
94 12 : segments: self.segments.clone(),
95 12 : };
96 12 : let size_result = storage_model.calculate();
97 12 : (storage_model, size_result)
98 12 : }
99 : }
100 :
101 : // Main branch only. Some updates on it.
102 2 : #[test]
103 2 : fn scenario_1() {
104 2 : // Create main branch
105 2 : let mut scenario = ScenarioBuilder::new("main");
106 2 :
107 2 : // Bulk load 5 GB of data to it
108 2 : scenario.insert("main", 5_000);
109 :
110 : // Stream of updates
111 12 : for _ in 0..5 {
112 10 : scenario.update("main", 1_000);
113 10 : }
114 :
115 : // Calculate the synthetic size with retention horizon 1000
116 2 : let (_model, result) = scenario.calculate(1000);
117 2 :
118 2 : // The end of the branch is at LSN 10000. Need to retain
119 2 : // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
120 2 : // The logical snapshot has size 5000.
121 2 : assert_eq!(result.total_size, 5000 + 1000);
122 2 : }
123 :
124 : // Main branch only. Some updates on it.
125 2 : #[test]
126 2 : fn scenario_2() {
127 2 : // Create main branch
128 2 : let mut scenario = ScenarioBuilder::new("main");
129 2 :
130 2 : // Bulk load 5 GB of data to it
131 2 : scenario.insert("main", 5_000);
132 :
133 : // Stream of updates
134 12 : for _ in 0..5 {
135 10 : scenario.update("main", 1_000);
136 10 : }
137 :
138 : // Branch
139 2 : scenario.branch("main", "child");
140 2 : scenario.update("child", 1_000);
141 2 :
142 2 : // More updates on parent
143 2 : scenario.update("main", 1_000);
144 2 :
145 2 : //
146 2 : // The history looks like this now:
147 2 : //
148 2 : // 10000 11000
149 2 : // *----*----*--------------* main
150 2 : // |
151 2 : // | 11000
152 2 : // +-------------- child
153 2 : //
154 2 : //
155 2 : // With retention horizon 1000, we need to retain logical snapshot
156 2 : // at the branch point, size 5000, and the WAL from 10000-11000 on
157 2 : // both branches.
158 2 : let (_model, result) = scenario.calculate(1000);
159 2 :
160 2 : assert_eq!(result.total_size, 5000 + 1000 + 1000);
161 2 : }
162 :
163 : // Like 2, but more updates on main
164 2 : #[test]
165 2 : fn scenario_3() {
166 2 : // Create main branch
167 2 : let mut scenario = ScenarioBuilder::new("main");
168 2 :
169 2 : // Bulk load 5 GB of data to it
170 2 : scenario.insert("main", 5_000);
171 :
172 : // Stream of updates
173 12 : for _ in 0..5 {
174 10 : scenario.update("main", 1_000);
175 10 : }
176 :
177 : // Branch
178 2 : scenario.branch("main", "child");
179 2 : scenario.update("child", 1_000);
180 :
181 : // More updates on parent
182 12 : for _ in 0..5 {
183 10 : scenario.update("main", 1_000);
184 10 : }
185 :
186 : //
187 : // The history looks like this now:
188 : //
189 : // 10000 15000
190 : // *----*----*------------------------------------* main
191 : // |
192 : // | 11000
193 : // +-------------- child
194 : //
195 : //
196 : // With retention horizon 1000, it's still cheapest to retain
197 : // - snapshot at branch point (size 5000)
198 : // - WAL on child between 10000-11000
199 : // - WAL on main between 10000-15000
200 : //
201 : // This is in total 5000 + 1000 + 5000
202 : //
203 2 : let (_model, result) = scenario.calculate(1000);
204 2 :
205 2 : assert_eq!(result.total_size, 5000 + 1000 + 5000);
206 2 : }
207 :
208 : // Diverged branches
209 2 : #[test]
210 2 : fn scenario_4() {
211 2 : // Create main branch
212 2 : let mut scenario = ScenarioBuilder::new("main");
213 2 :
214 2 : // Bulk load 5 GB of data to it
215 2 : scenario.insert("main", 5_000);
216 :
217 : // Stream of updates
218 12 : for _ in 0..5 {
219 10 : scenario.update("main", 1_000);
220 10 : }
221 :
222 : // Branch
223 2 : scenario.branch("main", "child");
224 2 : scenario.update("child", 1_000);
225 :
226 : // More updates on parent
227 18 : for _ in 0..8 {
228 16 : scenario.update("main", 1_000);
229 16 : }
230 :
231 : //
232 : // The history looks like this now:
233 : //
234 : // 10000 18000
235 : // *----*----*------------------------------------* main
236 : // |
237 : // | 11000
238 : // +-------------- child
239 : //
240 : //
241 : // With retention horizon 1000, it's now cheapest to retain
242 : // separate snapshots on both branches:
243 : // - snapshot on main branch at LSN 17000 (size 5000)
244 : // - WAL on main between 17000-18000
245 : // - snapshot on child branch at LSN 10000 (size 5000)
246 : // - WAL on child between 10000-11000
247 : //
248 : // This is in total 5000 + 1000 + 5000 + 1000 = 12000
249 : //
250 : // (If we used the the method from the previous scenario, and
251 : // kept only snapshot at the branch point, we'd need to keep
252 : // all the WAL between 10000-18000 on the main branch, so
253 : // the total size would be 5000 + 1000 + 8000 = 14000. The
254 : // calculation always picks the cheapest alternative)
255 :
256 2 : let (_model, result) = scenario.calculate(1000);
257 2 :
258 2 : assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
259 2 : }
260 :
261 2 : #[test]
262 2 : fn scenario_5() {
263 2 : let mut scenario = ScenarioBuilder::new("a");
264 2 : scenario.insert("a", 5000);
265 2 : scenario.branch("a", "b");
266 2 : scenario.update("b", 4000);
267 2 : scenario.update("a", 2000);
268 2 : scenario.branch("a", "c");
269 2 : scenario.insert("c", 4000);
270 2 : scenario.insert("a", 2000);
271 2 :
272 2 : let (_model, result) = scenario.calculate(1000);
273 2 :
274 2 : assert_eq!(result.total_size, 17000);
275 2 : }
276 :
277 2 : #[test]
278 2 : fn scenario_6() {
279 2 : let branches = [
280 2 : "7ff1edab8182025f15ae33482edb590a",
281 2 : "b1719e044db05401a05a2ed588a3ad3f",
282 2 : "0xb68d6691c895ad0a70809470020929ef",
283 2 : ];
284 2 :
285 2 : // compared to other scenarios, this one uses bytes instead of kB
286 2 :
287 2 : let mut scenario = ScenarioBuilder::new("");
288 2 :
289 2 : scenario.branch("", branches[0]); // at 0
290 2 : scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
291 2 : scenario.branch(branches[0], branches[1]); // at 108951064
292 2 : scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
293 2 : scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
294 2 : scenario.branch(branches[0], branches[2]); // at 283415424
295 2 : scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
296 2 : scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
297 2 :
298 2 : let (model, result) = scenario.calculate(100_000);
299 2 :
300 2 : // FIXME: We previously calculated 333_792_000. But with this PR, we get
301 2 : // a much lower number. At a quick look at the model output and the
302 2 : // calculations here, the new result seems correct to me.
303 2 : eprintln!(
304 2 : " MODEL: {}",
305 2 : serde_json::to_string(&model.segments).unwrap()
306 2 : );
307 2 : eprintln!(
308 2 : "RESULT: {}",
309 2 : serde_json::to_string(&result.segments).unwrap()
310 2 : );
311 2 :
312 2 : assert_eq!(result.total_size, 136_236_928);
313 2 : }
|