Line data Source code
1 : use std::cmp;
2 : use std::collections::hash_map::Entry;
3 : use std::collections::{HashMap, HashSet};
4 : use std::sync::Arc;
5 :
6 : use anyhow::{bail, Context};
7 : use tokio::sync::oneshot::error::RecvError;
8 : use tokio::sync::Semaphore;
9 : use tokio_util::sync::CancellationToken;
10 :
11 : use crate::context::RequestContext;
12 : use crate::pgdatadir_mapping::CalculateLogicalSizeError;
13 :
14 : use super::{LogicalSizeCalculationCause, Tenant};
15 : use crate::tenant::Timeline;
16 : use utils::id::TimelineId;
17 : use utils::lsn::Lsn;
18 :
19 : use tracing::*;
20 :
21 : use tenant_size_model::{Segment, StorageModel};
22 :
23 : /// Inputs to the actual tenant sizing model
24 : ///
25 : /// Implements [`serde::Serialize`] but is not meant to be part of the public API, instead meant to
26 : /// be a transferrable format between execution environments and developer.
27 : ///
28 : /// This tracks more information than the actual StorageModel that calculation
29 : /// needs. We will convert this into a StorageModel when it's time to perform
30 : /// the calculation.
31 : ///
32 53 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
33 : pub struct ModelInputs {
34 : pub segments: Vec<SegmentMeta>,
35 : pub timeline_inputs: Vec<TimelineInputs>,
36 : }
37 :
38 : /// A [`Segment`], with some extra information for display purposes
39 257 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
40 : pub struct SegmentMeta {
41 : pub segment: Segment,
42 : pub timeline_id: TimelineId,
43 : pub kind: LsnKind,
44 : }
45 :
46 : impl SegmentMeta {
47 618 : fn size_needed(&self) -> bool {
48 618 : match self.kind {
49 : LsnKind::BranchStart => {
50 : // If we don't have a later GcCutoff point on this branch, and
51 : // no ancestor, calculate size for the branch start point.
52 240 : self.segment.needed && self.segment.parent.is_none()
53 : }
54 82 : LsnKind::BranchPoint => true,
55 56 : LsnKind::GcCutOff => true,
56 240 : LsnKind::BranchEnd => false,
57 : }
58 618 : }
59 : }
60 :
61 : #[derive(
62 257 : Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize,
63 : )]
64 : pub enum LsnKind {
65 : /// A timeline starting here
66 : BranchStart,
67 : /// A child timeline branches off from here
68 : BranchPoint,
69 : /// GC cutoff point
70 : GcCutOff,
71 : /// Last record LSN
72 : BranchEnd,
73 : }
74 :
75 : /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
76 : /// part of [`ModelInputs`] from the HTTP api, explaining the inputs.
77 136 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
78 : pub struct TimelineInputs {
79 : pub timeline_id: TimelineId,
80 :
81 : pub ancestor_id: Option<TimelineId>,
82 :
83 : ancestor_lsn: Lsn,
84 : last_record: Lsn,
85 : latest_gc_cutoff: Lsn,
86 : horizon_cutoff: Lsn,
87 : pitr_cutoff: Lsn,
88 :
89 : /// Cutoff point based on GC settings
90 : next_gc_cutoff: Lsn,
91 :
92 : /// Cutoff point calculated from the user-supplied 'max_retention_period'
93 : retention_param_cutoff: Option<Lsn>,
94 : }
95 :
96 : /// Gathers the inputs for the tenant sizing model.
97 : ///
98 : /// Tenant size does not consider the latest state, but only the state until next_gc_cutoff, which
99 : /// is updated on-demand, during the start of this calculation and separate from the
100 : /// [`TimelineInputs::latest_gc_cutoff`].
101 : ///
102 : /// For timelines in general:
103 : ///
104 : /// ```text
105 : /// 0-----|---------|----|------------| · · · · · |·> lsn
106 : /// initdb_lsn branchpoints* next_gc_cutoff latest
107 : /// ```
108 : ///
109 : /// Until gc_horizon_cutoff > `Timeline::last_record_lsn` for any of the tenant's timelines, the
110 : /// tenant size will be zero.
111 82 : pub(super) async fn gather_inputs(
112 82 : tenant: &Tenant,
113 82 : limit: &Arc<Semaphore>,
114 82 : max_retention_period: Option<u64>,
115 82 : logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
116 82 : cause: LogicalSizeCalculationCause,
117 82 : cancel: &CancellationToken,
118 82 : ctx: &RequestContext,
119 82 : ) -> anyhow::Result<ModelInputs> {
120 82 : // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
121 82 : tenant
122 82 : .refresh_gc_info(cancel, ctx)
123 24 : .await
124 82 : .context("Failed to refresh gc_info before gathering inputs")?;
125 :
126 : // Collect information about all the timelines
127 82 : let mut timelines = tenant.list_timelines();
128 82 :
129 82 : if timelines.is_empty() {
130 : // perhaps the tenant has just been created, and as such doesn't have any data yet
131 3 : return Ok(ModelInputs {
132 3 : segments: vec![],
133 3 : timeline_inputs: Vec::new(),
134 3 : });
135 79 : }
136 79 :
137 79 : // Filter out timelines that are not active
138 79 : //
139 79 : // There may be a race when a timeline is dropped,
140 79 : // but it is unlikely to cause any issues. In the worst case,
141 79 : // the calculation will error out.
142 120 : timelines.retain(|t| t.is_active());
143 79 :
144 79 : // Build a map of branch points.
145 79 : let mut branchpoints: HashMap<TimelineId, HashSet<Lsn>> = HashMap::new();
146 120 : for timeline in timelines.iter() {
147 120 : if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
148 41 : branchpoints
149 41 : .entry(ancestor_id)
150 41 : .or_default()
151 41 : .insert(timeline.get_ancestor_lsn());
152 79 : }
153 : }
154 :
155 : // These become the final result.
156 79 : let mut timeline_inputs = Vec::with_capacity(timelines.len());
157 79 : let mut segments: Vec<SegmentMeta> = Vec::new();
158 79 :
159 79 : //
160 79 : // Build Segments representing each timeline. As we do that, also remember
161 79 : // the branchpoints and branch startpoints in 'branchpoint_segments' and
162 79 : // 'branchstart_segments'
163 79 : //
164 79 :
165 79 : // BranchPoint segments of each timeline
166 79 : // (timeline, branchpoint LSN) -> segment_id
167 79 : let mut branchpoint_segments: HashMap<(TimelineId, Lsn), usize> = HashMap::new();
168 79 :
169 79 : // timeline, Branchpoint seg id, (ancestor, ancestor LSN)
170 79 : type BranchStartSegment = (TimelineId, usize, Option<(TimelineId, Lsn)>);
171 79 : let mut branchstart_segments: Vec<BranchStartSegment> = Vec::new();
172 :
173 120 : for timeline in timelines.iter() {
174 120 : let timeline_id = timeline.timeline_id;
175 120 : let last_record_lsn = timeline.get_last_record_lsn();
176 120 : let ancestor_lsn = timeline.get_ancestor_lsn();
177 120 :
178 120 : // there's a race between the update (holding tenant.gc_lock) and this read but it
179 120 : // might not be an issue, because it's not for Timeline::gc
180 120 : let gc_info = timeline.gc_info.read().unwrap();
181 120 :
182 120 : // similar to gc, but Timeline::get_latest_gc_cutoff_lsn() will not be updated before a
183 120 : // new gc run, which we have no control over. however differently from `Timeline::gc`
184 120 : // we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not
185 120 : // actually removing files.
186 120 : let mut next_gc_cutoff = cmp::min(gc_info.horizon_cutoff, gc_info.pitr_cutoff);
187 :
188 : // If the caller provided a shorter retention period, use that instead of the GC cutoff.
189 120 : let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
190 0 : let param_cutoff = Lsn(last_record_lsn.0.saturating_sub(max_retention_period));
191 0 : if next_gc_cutoff < param_cutoff {
192 0 : next_gc_cutoff = param_cutoff;
193 0 : }
194 0 : Some(param_cutoff)
195 : } else {
196 120 : None
197 : };
198 :
199 : // next_gc_cutoff in parent branch are not of interest (right now at least), nor do we
200 : // want to query any logical size before initdb_lsn.
201 120 : let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);
202 120 :
203 120 : // Build "interesting LSNs" on this timeline
204 120 : let mut lsns: Vec<(Lsn, LsnKind)> = gc_info
205 120 : .retain_lsns
206 120 : .iter()
207 120 : .filter(|&&lsn| lsn > ancestor_lsn)
208 120 : .copied()
209 120 : // this assumes there are no other retain_lsns than the branchpoints
210 120 : .map(|lsn| (lsn, LsnKind::BranchPoint))
211 120 : .collect::<Vec<_>>();
212 :
213 : // Add branch points we collected earlier, just in case there were any that were
214 : // not present in retain_lsns. We will remove any duplicates below later.
215 120 : if let Some(this_branchpoints) = branchpoints.get(&timeline_id) {
216 36 : lsns.extend(
217 36 : this_branchpoints
218 36 : .iter()
219 41 : .map(|lsn| (*lsn, LsnKind::BranchPoint)),
220 36 : )
221 84 : }
222 :
223 : // Add a point for the GC cutoff
224 120 : let branch_start_needed = next_gc_cutoff <= branch_start_lsn;
225 120 : if !branch_start_needed {
226 28 : lsns.push((next_gc_cutoff, LsnKind::GcCutOff));
227 92 : }
228 :
229 120 : lsns.sort_unstable();
230 120 : lsns.dedup();
231 120 :
232 120 : //
233 120 : // Create Segments for the interesting points.
234 120 : //
235 120 :
236 120 : // Timeline start point
237 120 : let ancestor = timeline
238 120 : .get_ancestor_timeline_id()
239 120 : .map(|ancestor_id| (ancestor_id, ancestor_lsn));
240 120 : branchstart_segments.push((timeline_id, segments.len(), ancestor));
241 120 : segments.push(SegmentMeta {
242 120 : segment: Segment {
243 120 : parent: None, // filled in later
244 120 : lsn: branch_start_lsn.0,
245 120 : size: None, // filled in later
246 120 : needed: branch_start_needed,
247 120 : },
248 120 : timeline_id: timeline.timeline_id,
249 120 : kind: LsnKind::BranchStart,
250 120 : });
251 120 :
252 120 : // GC cutoff point, and any branch points, i.e. points where
253 120 : // other timelines branch off from this timeline.
254 120 : let mut parent = segments.len() - 1;
255 189 : for (lsn, kind) in lsns {
256 69 : if kind == LsnKind::BranchPoint {
257 41 : branchpoint_segments.insert((timeline_id, lsn), segments.len());
258 41 : }
259 69 : segments.push(SegmentMeta {
260 69 : segment: Segment {
261 69 : parent: Some(parent),
262 69 : lsn: lsn.0,
263 69 : size: None,
264 69 : needed: lsn > next_gc_cutoff,
265 69 : },
266 69 : timeline_id: timeline.timeline_id,
267 69 : kind,
268 69 : });
269 69 : parent += 1;
270 : }
271 :
272 : // Current end of the timeline
273 120 : segments.push(SegmentMeta {
274 120 : segment: Segment {
275 120 : parent: Some(parent),
276 120 : lsn: last_record_lsn.0,
277 120 : size: None, // Filled in later, if necessary
278 120 : needed: true,
279 120 : },
280 120 : timeline_id: timeline.timeline_id,
281 120 : kind: LsnKind::BranchEnd,
282 120 : });
283 120 :
284 120 : timeline_inputs.push(TimelineInputs {
285 120 : timeline_id: timeline.timeline_id,
286 120 : ancestor_id: timeline.get_ancestor_timeline_id(),
287 120 : ancestor_lsn,
288 120 : last_record: last_record_lsn,
289 120 : // this is not used above, because it might not have updated recently enough
290 120 : latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
291 120 : horizon_cutoff: gc_info.horizon_cutoff,
292 120 : pitr_cutoff: gc_info.pitr_cutoff,
293 120 : next_gc_cutoff,
294 120 : retention_param_cutoff,
295 120 : });
296 : }
297 :
298 : // We now have all segments from the timelines in 'segments'. The timelines
299 : // haven't been linked to each other yet, though. Do that.
300 199 : for (_timeline_id, seg_id, ancestor) in branchstart_segments {
301 : // Look up the branch point
302 120 : if let Some(ancestor) = ancestor {
303 41 : let parent_id = *branchpoint_segments.get(&ancestor).unwrap();
304 41 : segments[seg_id].segment.parent = Some(parent_id);
305 79 : }
306 : }
307 :
308 : // We left the 'size' field empty in all of the Segments so far.
309 : // Now find logical sizes for all of the points that might need or benefit from them.
310 79 : fill_logical_sizes(
311 79 : &timelines,
312 79 : &mut segments,
313 79 : limit,
314 79 : logical_size_cache,
315 79 : cause,
316 79 : ctx,
317 79 : )
318 35 : .await?;
319 :
320 79 : Ok(ModelInputs {
321 79 : segments,
322 79 : timeline_inputs,
323 79 : })
324 82 : }
325 :
326 : /// Augment 'segments' with logical sizes
327 : ///
328 : /// this will probably conflict with on-demand downloaded layers, or at least force them all
329 : /// to be downloaded
330 : ///
331 79 : async fn fill_logical_sizes(
332 79 : timelines: &[Arc<Timeline>],
333 79 : segments: &mut [SegmentMeta],
334 79 : limit: &Arc<Semaphore>,
335 79 : logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
336 79 : cause: LogicalSizeCalculationCause,
337 79 : ctx: &RequestContext,
338 79 : ) -> anyhow::Result<()> {
339 79 : let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(
340 79 : timelines
341 79 : .iter()
342 120 : .map(|timeline| (timeline.timeline_id, Arc::clone(timeline))),
343 79 : );
344 79 :
345 79 : // record the used/inserted cache keys here, to remove extras not to start leaking
346 79 : // after initial run the cache should be quite stable, but live timelines will eventually
347 79 : // require new lsns to be inspected.
348 79 : let mut sizes_needed = HashMap::<(TimelineId, Lsn), Option<u64>>::new();
349 79 :
350 79 : // with joinset, on drop, all of the tasks will just be de-scheduled, which we can use to
351 79 : // our advantage with `?` error handling.
352 79 : let mut joinset = tokio::task::JoinSet::new();
353 :
354 : // For each point that would benefit from having a logical size available,
355 : // spawn a Task to fetch it, unless we have it cached already.
356 309 : for seg in segments.iter() {
357 309 : if !seg.size_needed() {
358 177 : continue;
359 132 : }
360 132 :
361 132 : let timeline_id = seg.timeline_id;
362 132 : let lsn = Lsn(seg.segment.lsn);
363 :
364 132 : if let Entry::Vacant(e) = sizes_needed.entry((timeline_id, lsn)) {
365 132 : let cached_size = logical_size_cache.get(&(timeline_id, lsn)).cloned();
366 132 : if cached_size.is_none() {
367 35 : let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap());
368 35 : let parallel_size_calcs = Arc::clone(limit);
369 35 : let ctx = ctx.attached_child();
370 35 : joinset.spawn(
371 35 : calculate_logical_size(parallel_size_calcs, timeline, lsn, cause, ctx)
372 35 : .in_current_span(),
373 35 : );
374 97 : }
375 132 : e.insert(cached_size);
376 0 : }
377 : }
378 :
379 : // Perform the size lookups
380 79 : let mut have_any_error = false;
381 114 : while let Some(res) = joinset.join_next().await {
382 : // each of these come with Result<anyhow::Result<_>, JoinError>
383 : // because of spawn + spawn_blocking
384 35 : match res {
385 0 : Err(join_error) if join_error.is_cancelled() => {
386 0 : unreachable!("we are not cancelling any of the futures, nor should be");
387 : }
388 0 : Err(join_error) => {
389 : // cannot really do anything, as this panic is likely a bug
390 0 : error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
391 0 : have_any_error = true;
392 : }
393 0 : Ok(Err(recv_result_error)) => {
394 : // cannot really do anything, as this panic is likely a bug
395 0 : error!("failed to receive logical size query result: {recv_result_error:#}");
396 0 : have_any_error = true;
397 : }
398 0 : Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Err(error)))) => {
399 0 : if !matches!(error, CalculateLogicalSizeError::Cancelled) {
400 0 : warn!(
401 0 : timeline_id=%timeline.timeline_id,
402 0 : "failed to calculate logical size at {lsn}: {error:#}"
403 0 : );
404 0 : }
405 0 : have_any_error = true;
406 : }
407 35 : Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Ok(size)))) => {
408 0 : debug!(timeline_id=%timeline.timeline_id, %lsn, size, "size calculated");
409 :
410 35 : logical_size_cache.insert((timeline.timeline_id, lsn), size);
411 35 : sizes_needed.insert((timeline.timeline_id, lsn), Some(size));
412 : }
413 : }
414 : }
415 :
416 : // prune any keys not needed anymore; we record every used key and added key.
417 139 : logical_size_cache.retain(|key, _| sizes_needed.contains_key(key));
418 79 :
419 79 : if have_any_error {
420 : // we cannot complete this round, because we are missing data.
421 : // we have however cached all we were able to request calculation on.
422 0 : anyhow::bail!("failed to calculate some logical_sizes");
423 79 : }
424 :
425 : // Insert the looked up sizes to the Segments
426 309 : for seg in segments.iter_mut() {
427 309 : if !seg.size_needed() {
428 177 : continue;
429 132 : }
430 132 :
431 132 : let timeline_id = seg.timeline_id;
432 132 : let lsn = Lsn(seg.segment.lsn);
433 :
434 132 : if let Some(Some(size)) = sizes_needed.get(&(timeline_id, lsn)) {
435 132 : seg.segment.size = Some(*size);
436 132 : } else {
437 0 : bail!("could not find size at {} in timeline {}", lsn, timeline_id);
438 : }
439 : }
440 79 : Ok(())
441 79 : }
442 :
443 : impl ModelInputs {
444 80 : pub fn calculate_model(&self) -> anyhow::Result<tenant_size_model::StorageModel> {
445 80 : // Convert SegmentMetas into plain Segments
446 80 : let storage = StorageModel {
447 80 : segments: self
448 80 : .segments
449 80 : .iter()
450 325 : .map(|seg| seg.segment.clone())
451 80 : .collect(),
452 80 : };
453 80 :
454 80 : Ok(storage)
455 80 : }
456 :
457 : // calculate total project size
458 25 : pub fn calculate(&self) -> anyhow::Result<u64> {
459 25 : let storage = self.calculate_model()?;
460 25 : let sizes = storage.calculate();
461 25 :
462 25 : Ok(sizes.total_size)
463 25 : }
464 : }
465 :
466 : /// Newtype around the tuple that carries the timeline at lsn logical size calculation.
467 : struct TimelineAtLsnSizeResult(
468 : Arc<crate::tenant::Timeline>,
469 : utils::lsn::Lsn,
470 : Result<u64, CalculateLogicalSizeError>,
471 : );
472 :
473 35 : #[instrument(skip_all, fields(timeline_id=%timeline.timeline_id, lsn=%lsn))]
474 : async fn calculate_logical_size(
475 : limit: Arc<tokio::sync::Semaphore>,
476 : timeline: Arc<crate::tenant::Timeline>,
477 : lsn: utils::lsn::Lsn,
478 : cause: LogicalSizeCalculationCause,
479 : ctx: RequestContext,
480 : ) -> Result<TimelineAtLsnSizeResult, RecvError> {
481 : let _permit = tokio::sync::Semaphore::acquire_owned(limit)
482 : .await
483 : .expect("global semaphore should not had been closed");
484 :
485 : let size_res = timeline
486 : .spawn_ondemand_logical_size_calculation(lsn, cause, ctx)
487 : .instrument(info_span!("spawn_ondemand_logical_size_calculation"))
488 : .await?;
489 : Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
490 : }
491 :
492 2 : #[test]
493 2 : fn verify_size_for_multiple_branches() {
494 2 : // this is generated from integration test test_tenant_size_with_multiple_branches, but this way
495 2 : // it has the stable lsn's
496 2 : //
497 2 : // The timeline_inputs don't participate in the size calculation, and are here just to explain
498 2 : // the inputs.
499 2 : let doc = r#"
500 2 : {
501 2 : "segments": [
502 2 : {
503 2 : "segment": {
504 2 : "parent": 9,
505 2 : "lsn": 26033560,
506 2 : "size": null,
507 2 : "needed": false
508 2 : },
509 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
510 2 : "kind": "BranchStart"
511 2 : },
512 2 : {
513 2 : "segment": {
514 2 : "parent": 0,
515 2 : "lsn": 35720400,
516 2 : "size": 25206784,
517 2 : "needed": false
518 2 : },
519 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
520 2 : "kind": "GcCutOff"
521 2 : },
522 2 : {
523 2 : "segment": {
524 2 : "parent": 1,
525 2 : "lsn": 35851472,
526 2 : "size": null,
527 2 : "needed": true
528 2 : },
529 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
530 2 : "kind": "BranchEnd"
531 2 : },
532 2 : {
533 2 : "segment": {
534 2 : "parent": 7,
535 2 : "lsn": 24566168,
536 2 : "size": null,
537 2 : "needed": false
538 2 : },
539 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
540 2 : "kind": "BranchStart"
541 2 : },
542 2 : {
543 2 : "segment": {
544 2 : "parent": 3,
545 2 : "lsn": 25261936,
546 2 : "size": 26050560,
547 2 : "needed": false
548 2 : },
549 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
550 2 : "kind": "GcCutOff"
551 2 : },
552 2 : {
553 2 : "segment": {
554 2 : "parent": 4,
555 2 : "lsn": 25393008,
556 2 : "size": null,
557 2 : "needed": true
558 2 : },
559 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
560 2 : "kind": "BranchEnd"
561 2 : },
562 2 : {
563 2 : "segment": {
564 2 : "parent": null,
565 2 : "lsn": 23694408,
566 2 : "size": null,
567 2 : "needed": false
568 2 : },
569 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
570 2 : "kind": "BranchStart"
571 2 : },
572 2 : {
573 2 : "segment": {
574 2 : "parent": 6,
575 2 : "lsn": 24566168,
576 2 : "size": 25739264,
577 2 : "needed": false
578 2 : },
579 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
580 2 : "kind": "BranchPoint"
581 2 : },
582 2 : {
583 2 : "segment": {
584 2 : "parent": 7,
585 2 : "lsn": 25902488,
586 2 : "size": 26402816,
587 2 : "needed": false
588 2 : },
589 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
590 2 : "kind": "GcCutOff"
591 2 : },
592 2 : {
593 2 : "segment": {
594 2 : "parent": 8,
595 2 : "lsn": 26033560,
596 2 : "size": 26468352,
597 2 : "needed": true
598 2 : },
599 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
600 2 : "kind": "BranchPoint"
601 2 : },
602 2 : {
603 2 : "segment": {
604 2 : "parent": 9,
605 2 : "lsn": 26033560,
606 2 : "size": null,
607 2 : "needed": true
608 2 : },
609 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
610 2 : "kind": "BranchEnd"
611 2 : }
612 2 : ],
613 2 : "timeline_inputs": [
614 2 : {
615 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
616 2 : "ancestor_lsn": "0/18D3D98",
617 2 : "last_record": "0/2230CD0",
618 2 : "latest_gc_cutoff": "0/1698C48",
619 2 : "horizon_cutoff": "0/2210CD0",
620 2 : "pitr_cutoff": "0/2210CD0",
621 2 : "next_gc_cutoff": "0/2210CD0",
622 2 : "retention_param_cutoff": null
623 2 : },
624 2 : {
625 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
626 2 : "ancestor_lsn": "0/176D998",
627 2 : "last_record": "0/1837770",
628 2 : "latest_gc_cutoff": "0/1698C48",
629 2 : "horizon_cutoff": "0/1817770",
630 2 : "pitr_cutoff": "0/1817770",
631 2 : "next_gc_cutoff": "0/1817770",
632 2 : "retention_param_cutoff": null
633 2 : },
634 2 : {
635 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
636 2 : "ancestor_lsn": "0/0",
637 2 : "last_record": "0/18D3D98",
638 2 : "latest_gc_cutoff": "0/1698C48",
639 2 : "horizon_cutoff": "0/18B3D98",
640 2 : "pitr_cutoff": "0/18B3D98",
641 2 : "next_gc_cutoff": "0/18B3D98",
642 2 : "retention_param_cutoff": null
643 2 : }
644 2 : ]
645 2 : }
646 2 : "#;
647 2 : let inputs: ModelInputs = serde_json::from_str(doc).unwrap();
648 2 :
649 2 : assert_eq!(inputs.calculate().unwrap(), 37_851_408);
650 2 : }
651 :
652 2 : #[test]
653 2 : fn verify_size_for_one_branch() {
654 2 : let doc = r#"
655 2 : {
656 2 : "segments": [
657 2 : {
658 2 : "segment": {
659 2 : "parent": null,
660 2 : "lsn": 0,
661 2 : "size": null,
662 2 : "needed": false
663 2 : },
664 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
665 2 : "kind": "BranchStart"
666 2 : },
667 2 : {
668 2 : "segment": {
669 2 : "parent": 0,
670 2 : "lsn": 305547335776,
671 2 : "size": 220054675456,
672 2 : "needed": false
673 2 : },
674 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
675 2 : "kind": "GcCutOff"
676 2 : },
677 2 : {
678 2 : "segment": {
679 2 : "parent": 1,
680 2 : "lsn": 305614444640,
681 2 : "size": null,
682 2 : "needed": true
683 2 : },
684 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
685 2 : "kind": "BranchEnd"
686 2 : }
687 2 : ],
688 2 : "timeline_inputs": [
689 2 : {
690 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
691 2 : "ancestor_lsn": "0/0",
692 2 : "last_record": "47/280A5860",
693 2 : "latest_gc_cutoff": "47/240A5860",
694 2 : "horizon_cutoff": "47/240A5860",
695 2 : "pitr_cutoff": "47/240A5860",
696 2 : "next_gc_cutoff": "47/240A5860",
697 2 : "retention_param_cutoff": "0/0"
698 2 : }
699 2 : ]
700 2 : }"#;
701 2 :
702 2 : let model: ModelInputs = serde_json::from_str(doc).unwrap();
703 2 :
704 2 : let res = model.calculate_model().unwrap().calculate();
705 2 :
706 2 : println!("calculated synthetic size: {}", res.total_size);
707 2 : println!("result: {:?}", serde_json::to_string(&res.segments));
708 2 :
709 2 : use utils::lsn::Lsn;
710 2 : let latest_gc_cutoff_lsn: Lsn = "47/240A5860".parse().unwrap();
711 2 : let last_lsn: Lsn = "47/280A5860".parse().unwrap();
712 2 : println!(
713 2 : "latest_gc_cutoff lsn 47/240A5860 is {}, last_lsn lsn 47/280A5860 is {}",
714 2 : u64::from(latest_gc_cutoff_lsn),
715 2 : u64::from(last_lsn)
716 2 : );
717 2 : assert_eq!(res.total_size, 220121784320);
718 2 : }
|