Line data Source code
1 : //! Tenant size model tests.
2 :
3 : use tenant_size_model::{Segment, SizeResult, StorageModel};
4 :
5 : use std::collections::HashMap;
6 :
7 : struct ScenarioBuilder {
8 : segments: Vec<Segment>,
9 :
10 : /// Mapping from the branch name to the index of a segment describing its latest state.
11 : branches: HashMap<String, usize>,
12 : }
13 :
14 : impl ScenarioBuilder {
15 : /// Creates a new storage with the given default branch name.
16 6 : pub fn new(initial_branch: &str) -> ScenarioBuilder {
17 6 : let init_segment = Segment {
18 6 : parent: None,
19 6 : lsn: 0,
20 6 : size: Some(0),
21 6 : needed: false, // determined later
22 6 : };
23 6 :
24 6 : ScenarioBuilder {
25 6 : segments: vec![init_segment],
26 6 : branches: HashMap::from([(initial_branch.into(), 0)]),
27 6 : }
28 6 : }
29 :
30 : /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
31 51 : pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
32 51 : let lastseg_id = *self.branches.get(branch).unwrap();
33 51 : let newseg_id = self.segments.len();
34 51 : let lastseg = &mut self.segments[lastseg_id];
35 51 :
36 51 : let newseg = Segment {
37 51 : parent: Some(lastseg_id),
38 51 : lsn: lastseg.lsn + lsn_bytes,
39 51 : size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
40 51 : needed: false,
41 51 : };
42 51 :
43 51 : self.segments.push(newseg);
44 51 : *self.branches.get_mut(branch).expect("read already") = newseg_id;
45 51 : }
46 :
47 7 : pub fn insert(&mut self, branch: &str, bytes: u64) {
48 7 : self.modify_branch(branch, bytes, bytes as i64);
49 7 : }
50 :
51 39 : pub fn update(&mut self, branch: &str, bytes: u64) {
52 39 : self.modify_branch(branch, bytes, 0i64);
53 39 : }
54 :
55 0 : pub fn _delete(&mut self, branch: &str, bytes: u64) {
56 0 : self.modify_branch(branch, bytes, -(bytes as i64));
57 0 : }
58 :
59 : /// Panics if the parent branch cannot be found.
60 8 : pub fn branch(&mut self, parent: &str, name: &str) {
61 8 : // Find the right segment
62 8 : let branchseg_id = *self
63 8 : .branches
64 8 : .get(parent)
65 8 : .expect("should had found the parent by key");
66 8 : let _branchseg = &mut self.segments[branchseg_id];
67 8 :
68 8 : // Create branch name for it
69 8 : self.branches.insert(name.to_string(), branchseg_id);
70 8 : }
71 :
72 6 : pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
73 : // Phase 1: Mark all the segments that need to be retained
74 14 : for (_branch, &last_seg_id) in self.branches.iter() {
75 14 : let last_seg = &self.segments[last_seg_id];
76 14 : let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
77 14 : let mut seg_id = last_seg_id;
78 : loop {
79 27 : let seg = &mut self.segments[seg_id];
80 27 : if seg.lsn <= cutoff_lsn {
81 14 : break;
82 13 : }
83 13 : seg.needed = true;
84 13 : if let Some(prev_seg_id) = seg.parent {
85 13 : seg_id = prev_seg_id;
86 13 : } else {
87 0 : break;
88 : }
89 : }
90 : }
91 :
92 : // Perform the calculation
93 6 : let storage_model = StorageModel {
94 6 : segments: self.segments.clone(),
95 6 : };
96 6 : let size_result = storage_model.calculate();
97 6 : (storage_model, size_result)
98 6 : }
99 : }
100 :
101 : // Main branch only. Some updates on it.
102 : #[test]
103 1 : fn scenario_1() {
104 1 : // Create main branch
105 1 : let mut scenario = ScenarioBuilder::new("main");
106 1 :
107 1 : // Bulk load 5 GB of data to it
108 1 : scenario.insert("main", 5_000);
109 :
110 : // Stream of updates
111 6 : for _ in 0..5 {
112 5 : scenario.update("main", 1_000);
113 5 : }
114 :
115 : // Calculate the synthetic size with retention horizon 1000
116 1 : let (_model, result) = scenario.calculate(1000);
117 1 :
118 1 : // The end of the branch is at LSN 10000. Need to retain
119 1 : // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
120 1 : // The logical snapshot has size 5000.
121 1 : assert_eq!(result.total_size, 5000 + 1000);
122 1 : }
123 :
124 : // Main branch only. Some updates on it.
125 : #[test]
126 1 : fn scenario_2() {
127 1 : // Create main branch
128 1 : let mut scenario = ScenarioBuilder::new("main");
129 1 :
130 1 : // Bulk load 5 GB of data to it
131 1 : scenario.insert("main", 5_000);
132 :
133 : // Stream of updates
134 6 : for _ in 0..5 {
135 5 : scenario.update("main", 1_000);
136 5 : }
137 :
138 : // Branch
139 1 : scenario.branch("main", "child");
140 1 : scenario.update("child", 1_000);
141 1 :
142 1 : // More updates on parent
143 1 : scenario.update("main", 1_000);
144 1 :
145 1 : //
146 1 : // The history looks like this now:
147 1 : //
148 1 : // 10000 11000
149 1 : // *----*----*--------------* main
150 1 : // |
151 1 : // | 11000
152 1 : // +-------------- child
153 1 : //
154 1 : //
155 1 : // With retention horizon 1000, we need to retain logical snapshot
156 1 : // at the branch point, size 5000, and the WAL from 10000-11000 on
157 1 : // both branches.
158 1 : let (_model, result) = scenario.calculate(1000);
159 1 :
160 1 : assert_eq!(result.total_size, 5000 + 1000 + 1000);
161 1 : }
162 :
163 : // Like 2, but more updates on main
164 : #[test]
165 1 : fn scenario_3() {
166 1 : // Create main branch
167 1 : let mut scenario = ScenarioBuilder::new("main");
168 1 :
169 1 : // Bulk load 5 GB of data to it
170 1 : scenario.insert("main", 5_000);
171 :
172 : // Stream of updates
173 6 : for _ in 0..5 {
174 5 : scenario.update("main", 1_000);
175 5 : }
176 :
177 : // Branch
178 1 : scenario.branch("main", "child");
179 1 : scenario.update("child", 1_000);
180 :
181 : // More updates on parent
182 6 : for _ in 0..5 {
183 5 : scenario.update("main", 1_000);
184 5 : }
185 :
186 : //
187 : // The history looks like this now:
188 : //
189 : // 10000 15000
190 : // *----*----*------------------------------------* main
191 : // |
192 : // | 11000
193 : // +-------------- child
194 : //
195 : //
196 : // With retention horizon 1000, it's still cheapest to retain
197 : // - snapshot at branch point (size 5000)
198 : // - WAL on child between 10000-11000
199 : // - WAL on main between 10000-15000
200 : //
201 : // This is in total 5000 + 1000 + 5000
202 : //
203 1 : let (_model, result) = scenario.calculate(1000);
204 1 :
205 1 : assert_eq!(result.total_size, 5000 + 1000 + 5000);
206 1 : }
207 :
208 : // Diverged branches
209 : #[test]
210 1 : fn scenario_4() {
211 1 : // Create main branch
212 1 : let mut scenario = ScenarioBuilder::new("main");
213 1 :
214 1 : // Bulk load 5 GB of data to it
215 1 : scenario.insert("main", 5_000);
216 :
217 : // Stream of updates
218 6 : for _ in 0..5 {
219 5 : scenario.update("main", 1_000);
220 5 : }
221 :
222 : // Branch
223 1 : scenario.branch("main", "child");
224 1 : scenario.update("child", 1_000);
225 :
226 : // More updates on parent
227 9 : for _ in 0..8 {
228 8 : scenario.update("main", 1_000);
229 8 : }
230 :
231 : //
232 : // The history looks like this now:
233 : //
234 : // 10000 18000
235 : // *----*----*------------------------------------* main
236 : // |
237 : // | 11000
238 : // +-------------- child
239 : //
240 : //
241 : // With retention horizon 1000, it's now cheapest to retain
242 : // separate snapshots on both branches:
243 : // - snapshot on main branch at LSN 17000 (size 5000)
244 : // - WAL on main between 17000-18000
245 : // - snapshot on child branch at LSN 10000 (size 5000)
246 : // - WAL on child between 10000-11000
247 : //
248 : // This is in total 5000 + 1000 + 5000 + 1000 = 12000
249 : //
250 : // (If we used the method from the previous scenario, and
251 : // kept only snapshot at the branch point, we'd need to keep
252 : // all the WAL between 10000-18000 on the main branch, so
253 : // the total size would be 5000 + 1000 + 8000 = 14000. The
254 : // calculation always picks the cheapest alternative)
255 :
256 1 : let (_model, result) = scenario.calculate(1000);
257 1 :
258 1 : assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
259 1 : }
260 :
261 : #[test]
262 1 : fn scenario_5() {
263 1 : let mut scenario = ScenarioBuilder::new("a");
264 1 : scenario.insert("a", 5000);
265 1 : scenario.branch("a", "b");
266 1 : scenario.update("b", 4000);
267 1 : scenario.update("a", 2000);
268 1 : scenario.branch("a", "c");
269 1 : scenario.insert("c", 4000);
270 1 : scenario.insert("a", 2000);
271 1 :
272 1 : let (_model, result) = scenario.calculate(1000);
273 1 :
274 1 : assert_eq!(result.total_size, 17000);
275 1 : }
276 :
277 : #[test]
278 1 : fn scenario_6() {
279 1 : let branches = [
280 1 : "7ff1edab8182025f15ae33482edb590a",
281 1 : "b1719e044db05401a05a2ed588a3ad3f",
282 1 : "0xb68d6691c895ad0a70809470020929ef",
283 1 : ];
284 1 :
285 1 : // compared to other scenarios, this one uses bytes instead of kB
286 1 :
287 1 : let mut scenario = ScenarioBuilder::new("");
288 1 :
289 1 : scenario.branch("", branches[0]); // at 0
290 1 : scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
291 1 : scenario.branch(branches[0], branches[1]); // at 108951064
292 1 : scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
293 1 : scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
294 1 : scenario.branch(branches[0], branches[2]); // at 283415424
295 1 : scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
296 1 : scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
297 1 :
298 1 : let (model, result) = scenario.calculate(100_000);
299 1 :
300 1 : // FIXME: We previously calculated 333_792_000. But with this PR, we get
301 1 : // a much lower number. At a quick look at the model output and the
302 1 : // calculations here, the new result seems correct to me.
303 1 : eprintln!(
304 1 : " MODEL: {}",
305 1 : serde_json::to_string(&model.segments).unwrap()
306 1 : );
307 1 : eprintln!(
308 1 : "RESULT: {}",
309 1 : serde_json::to_string(&result.segments).unwrap()
310 1 : );
311 1 :
312 1 : assert_eq!(result.total_size, 136_236_928);
313 1 : }
|