Line data Source code
1 : use std::cmp;
2 : use std::collections::hash_map::Entry;
3 : use std::collections::{HashMap, HashSet};
4 : use std::sync::Arc;
5 :
6 : use tokio::sync::oneshot::error::RecvError;
7 : use tokio::sync::Semaphore;
8 : use tokio_util::sync::CancellationToken;
9 :
10 : use crate::context::RequestContext;
11 : use crate::pgdatadir_mapping::CalculateLogicalSizeError;
12 :
13 : use super::{GcError, LogicalSizeCalculationCause, Tenant};
14 : use crate::tenant::Timeline;
15 : use utils::id::TimelineId;
16 : use utils::lsn::Lsn;
17 :
18 : use tracing::*;
19 :
20 : use tenant_size_model::{Segment, StorageModel};
21 :
22 : /// Inputs to the actual tenant sizing model
23 : ///
24 : /// Implements [`serde::Serialize`] but is not meant to be part of the public API, instead meant to
25 : /// be a transferrable format between execution environments and developer.
26 : ///
27 : /// This tracks more information than the actual StorageModel that calculation
28 : /// needs. We will convert this into a StorageModel when it's time to perform
29 : /// the calculation.
30 : ///
31 12 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
32 : pub struct ModelInputs {
33 : pub segments: Vec<SegmentMeta>,
34 : pub timeline_inputs: Vec<TimelineInputs>,
35 : }
36 :
37 : /// A [`Segment`], with some extra information for display purposes
38 112 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
39 : pub struct SegmentMeta {
40 : pub segment: Segment,
41 : pub timeline_id: TimelineId,
42 : pub kind: LsnKind,
43 : }
44 :
45 0 : #[derive(thiserror::Error, Debug)]
46 : pub(crate) enum CalculateSyntheticSizeError {
47 : /// Something went wrong internally to the calculation of logical size at a particular branch point
48 : #[error("Failed to calculated logical size on timeline {timeline_id} at {lsn}: {error}")]
49 : LogicalSize {
50 : timeline_id: TimelineId,
51 : lsn: Lsn,
52 : error: CalculateLogicalSizeError,
53 : },
54 :
55 : /// Something went wrong internally when calculating GC parameters at start of size calculation
56 : #[error(transparent)]
57 : GcInfo(GcError),
58 :
59 : /// Totally unexpected errors, like panics joining a task
60 : #[error(transparent)]
61 : Fatal(anyhow::Error),
62 :
63 : /// Tenant shut down while calculating size
64 : #[error("Cancelled")]
65 : Cancelled,
66 : }
67 :
68 : impl From<GcError> for CalculateSyntheticSizeError {
69 0 : fn from(value: GcError) -> Self {
70 0 : match value {
71 : GcError::TenantCancelled | GcError::TimelineCancelled => {
72 0 : CalculateSyntheticSizeError::Cancelled
73 : }
74 0 : other => CalculateSyntheticSizeError::GcInfo(other),
75 : }
76 0 : }
77 : }
78 :
79 : impl SegmentMeta {
80 0 : fn size_needed(&self) -> bool {
81 0 : match self.kind {
82 : LsnKind::BranchStart => {
83 : // If we don't have a later GcCutoff point on this branch, and
84 : // no ancestor, calculate size for the branch start point.
85 0 : self.segment.needed && self.segment.parent.is_none()
86 : }
87 0 : LsnKind::BranchPoint => true,
88 0 : LsnKind::GcCutOff => true,
89 0 : LsnKind::BranchEnd => false,
90 : }
91 0 : }
92 : }
93 :
94 : #[derive(
95 56 : Debug, Clone, Copy, Eq, Ord, PartialEq, PartialOrd, serde::Serialize, serde::Deserialize,
96 : )]
97 : pub enum LsnKind {
98 : /// A timeline starting here
99 : BranchStart,
100 : /// A child timeline branches off from here
101 : BranchPoint,
102 : /// GC cutoff point
103 : GcCutOff,
104 : /// Last record LSN
105 : BranchEnd,
106 : }
107 :
108 : /// Collect all relevant LSNs to the inputs. These will only be helpful in the serialized form as
109 : /// part of [`ModelInputs`] from the HTTP api, explaining the inputs.
110 72 : #[derive(Debug, serde::Serialize, serde::Deserialize)]
111 : pub struct TimelineInputs {
112 : pub timeline_id: TimelineId,
113 :
114 : pub ancestor_id: Option<TimelineId>,
115 :
116 : ancestor_lsn: Lsn,
117 : last_record: Lsn,
118 : latest_gc_cutoff: Lsn,
119 : horizon_cutoff: Lsn,
120 : pitr_cutoff: Lsn,
121 :
122 : /// Cutoff point based on GC settings
123 : next_gc_cutoff: Lsn,
124 :
125 : /// Cutoff point calculated from the user-supplied 'max_retention_period'
126 : retention_param_cutoff: Option<Lsn>,
127 : }
128 :
129 : /// Gathers the inputs for the tenant sizing model.
130 : ///
131 : /// Tenant size does not consider the latest state, but only the state until next_gc_cutoff, which
132 : /// is updated on-demand, during the start of this calculation and separate from the
133 : /// [`TimelineInputs::latest_gc_cutoff`].
134 : ///
135 : /// For timelines in general:
136 : ///
137 : /// ```text
138 : /// 0-----|---------|----|------------| · · · · · |·> lsn
139 : /// initdb_lsn branchpoints* next_gc_cutoff latest
140 : /// ```
141 : ///
142 : /// Until gc_horizon_cutoff > `Timeline::last_record_lsn` for any of the tenant's timelines, the
143 : /// tenant size will be zero.
144 0 : pub(super) async fn gather_inputs(
145 0 : tenant: &Tenant,
146 0 : limit: &Arc<Semaphore>,
147 0 : max_retention_period: Option<u64>,
148 0 : logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
149 0 : cause: LogicalSizeCalculationCause,
150 0 : cancel: &CancellationToken,
151 0 : ctx: &RequestContext,
152 0 : ) -> Result<ModelInputs, CalculateSyntheticSizeError> {
153 0 : // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
154 0 : tenant.refresh_gc_info(cancel, ctx).await?;
155 :
156 : // Collect information about all the timelines
157 0 : let mut timelines = tenant.list_timelines();
158 0 :
159 0 : if timelines.is_empty() {
160 : // perhaps the tenant has just been created, and as such doesn't have any data yet
161 0 : return Ok(ModelInputs {
162 0 : segments: vec![],
163 0 : timeline_inputs: Vec::new(),
164 0 : });
165 0 : }
166 0 :
167 0 : // Filter out timelines that are not active
168 0 : //
169 0 : // There may be a race when a timeline is dropped,
170 0 : // but it is unlikely to cause any issues. In the worst case,
171 0 : // the calculation will error out.
172 0 : timelines.retain(|t| t.is_active());
173 0 :
174 0 : // Build a map of branch points.
175 0 : let mut branchpoints: HashMap<TimelineId, HashSet<Lsn>> = HashMap::new();
176 0 : for timeline in timelines.iter() {
177 0 : if let Some(ancestor_id) = timeline.get_ancestor_timeline_id() {
178 0 : branchpoints
179 0 : .entry(ancestor_id)
180 0 : .or_default()
181 0 : .insert(timeline.get_ancestor_lsn());
182 0 : }
183 : }
184 :
185 : // These become the final result.
186 0 : let mut timeline_inputs = Vec::with_capacity(timelines.len());
187 0 : let mut segments: Vec<SegmentMeta> = Vec::new();
188 0 :
189 0 : //
190 0 : // Build Segments representing each timeline. As we do that, also remember
191 0 : // the branchpoints and branch startpoints in 'branchpoint_segments' and
192 0 : // 'branchstart_segments'
193 0 : //
194 0 :
195 0 : // BranchPoint segments of each timeline
196 0 : // (timeline, branchpoint LSN) -> segment_id
197 0 : let mut branchpoint_segments: HashMap<(TimelineId, Lsn), usize> = HashMap::new();
198 0 :
199 0 : // timeline, Branchpoint seg id, (ancestor, ancestor LSN)
200 0 : type BranchStartSegment = (TimelineId, usize, Option<(TimelineId, Lsn)>);
201 0 : let mut branchstart_segments: Vec<BranchStartSegment> = Vec::new();
202 :
203 0 : for timeline in timelines.iter() {
204 0 : let timeline_id = timeline.timeline_id;
205 0 : let last_record_lsn = timeline.get_last_record_lsn();
206 0 : let ancestor_lsn = timeline.get_ancestor_lsn();
207 0 :
208 0 : // there's a race between the update (holding tenant.gc_lock) and this read but it
209 0 : // might not be an issue, because it's not for Timeline::gc
210 0 : let gc_info = timeline.gc_info.read().unwrap();
211 0 :
212 0 : // similar to gc, but Timeline::get_latest_gc_cutoff_lsn() will not be updated before a
213 0 : // new gc run, which we have no control over. however differently from `Timeline::gc`
214 0 : // we don't consider the `Timeline::disk_consistent_lsn` at all, because we are not
215 0 : // actually removing files.
216 0 : //
217 0 : // We only consider [`GcInfo::pitr_cutoff`], and not [`GcInfo::horizon_cutoff`], because from
218 0 : // a user's perspective they have only requested retention up to the time bound (pitr_cutoff), rather
219 0 : // than a space bound (horizon cutoff). This means that if someone drops a database and waits for their
220 0 : // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
221 0 : // horizon_cutoff.
222 0 : let pitr_cutoff = gc_info.cutoffs.pitr;
223 0 : let horizon_cutoff = gc_info.cutoffs.horizon;
224 0 : let mut next_gc_cutoff = pitr_cutoff;
225 :
226 : // If the caller provided a shorter retention period, use that instead of the GC cutoff.
227 0 : let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
228 0 : let param_cutoff = Lsn(last_record_lsn.0.saturating_sub(max_retention_period));
229 0 : if next_gc_cutoff < param_cutoff {
230 0 : next_gc_cutoff = param_cutoff;
231 0 : }
232 0 : Some(param_cutoff)
233 : } else {
234 0 : None
235 : };
236 :
237 : // next_gc_cutoff in parent branch are not of interest (right now at least), nor do we
238 : // want to query any logical size before initdb_lsn.
239 0 : let branch_start_lsn = cmp::max(ancestor_lsn, timeline.initdb_lsn);
240 0 :
241 0 : // Build "interesting LSNs" on this timeline
242 0 : let mut lsns: Vec<(Lsn, LsnKind)> = gc_info
243 0 : .retain_lsns
244 0 : .iter()
245 0 : .filter(|&&lsn| lsn > ancestor_lsn)
246 0 : .copied()
247 0 : // this assumes there are no other retain_lsns than the branchpoints
248 0 : .map(|lsn| (lsn, LsnKind::BranchPoint))
249 0 : .collect::<Vec<_>>();
250 0 :
251 0 : drop(gc_info);
252 :
253 : // Add branch points we collected earlier, just in case there were any that were
254 : // not present in retain_lsns. We will remove any duplicates below later.
255 0 : if let Some(this_branchpoints) = branchpoints.get(&timeline_id) {
256 0 : lsns.extend(
257 0 : this_branchpoints
258 0 : .iter()
259 0 : .map(|lsn| (*lsn, LsnKind::BranchPoint)),
260 0 : )
261 0 : }
262 :
263 : // Add a point for the GC cutoff
264 0 : let branch_start_needed = next_gc_cutoff <= branch_start_lsn;
265 0 : if !branch_start_needed {
266 0 : lsns.push((next_gc_cutoff, LsnKind::GcCutOff));
267 0 : }
268 :
269 0 : lsns.sort_unstable();
270 0 : lsns.dedup();
271 0 :
272 0 : //
273 0 : // Create Segments for the interesting points.
274 0 : //
275 0 :
276 0 : // Timeline start point
277 0 : let ancestor = timeline
278 0 : .get_ancestor_timeline_id()
279 0 : .map(|ancestor_id| (ancestor_id, ancestor_lsn));
280 0 : branchstart_segments.push((timeline_id, segments.len(), ancestor));
281 0 : segments.push(SegmentMeta {
282 0 : segment: Segment {
283 0 : parent: None, // filled in later
284 0 : lsn: branch_start_lsn.0,
285 0 : size: None, // filled in later
286 0 : needed: branch_start_needed,
287 0 : },
288 0 : timeline_id: timeline.timeline_id,
289 0 : kind: LsnKind::BranchStart,
290 0 : });
291 0 :
292 0 : // GC cutoff point, and any branch points, i.e. points where
293 0 : // other timelines branch off from this timeline.
294 0 : let mut parent = segments.len() - 1;
295 0 : for (lsn, kind) in lsns {
296 0 : if kind == LsnKind::BranchPoint {
297 0 : branchpoint_segments.insert((timeline_id, lsn), segments.len());
298 0 : }
299 0 : segments.push(SegmentMeta {
300 0 : segment: Segment {
301 0 : parent: Some(parent),
302 0 : lsn: lsn.0,
303 0 : size: None,
304 0 : needed: lsn > next_gc_cutoff,
305 0 : },
306 0 : timeline_id: timeline.timeline_id,
307 0 : kind,
308 0 : });
309 0 : parent += 1;
310 : }
311 :
312 : // Current end of the timeline
313 0 : segments.push(SegmentMeta {
314 0 : segment: Segment {
315 0 : parent: Some(parent),
316 0 : lsn: last_record_lsn.0,
317 0 : size: None, // Filled in later, if necessary
318 0 : needed: true,
319 0 : },
320 0 : timeline_id: timeline.timeline_id,
321 0 : kind: LsnKind::BranchEnd,
322 0 : });
323 0 :
324 0 : timeline_inputs.push(TimelineInputs {
325 0 : timeline_id: timeline.timeline_id,
326 0 : ancestor_id: timeline.get_ancestor_timeline_id(),
327 0 : ancestor_lsn,
328 0 : last_record: last_record_lsn,
329 0 : // this is not used above, because it might not have updated recently enough
330 0 : latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
331 0 : horizon_cutoff,
332 0 : pitr_cutoff,
333 0 : next_gc_cutoff,
334 0 : retention_param_cutoff,
335 0 : });
336 : }
337 :
338 : // We now have all segments from the timelines in 'segments'. The timelines
339 : // haven't been linked to each other yet, though. Do that.
340 0 : for (_timeline_id, seg_id, ancestor) in branchstart_segments {
341 : // Look up the branch point
342 0 : if let Some(ancestor) = ancestor {
343 0 : let parent_id = *branchpoint_segments.get(&ancestor).unwrap();
344 0 : segments[seg_id].segment.parent = Some(parent_id);
345 0 : }
346 : }
347 :
348 : // We left the 'size' field empty in all of the Segments so far.
349 : // Now find logical sizes for all of the points that might need or benefit from them.
350 0 : fill_logical_sizes(
351 0 : &timelines,
352 0 : &mut segments,
353 0 : limit,
354 0 : logical_size_cache,
355 0 : cause,
356 0 : ctx,
357 0 : )
358 0 : .await?;
359 :
360 0 : if tenant.cancel.is_cancelled() {
361 : // If we're shutting down, return an error rather than a sparse result that might include some
362 : // timelines from before we started shutting down
363 0 : return Err(CalculateSyntheticSizeError::Cancelled);
364 0 : }
365 0 :
366 0 : Ok(ModelInputs {
367 0 : segments,
368 0 : timeline_inputs,
369 0 : })
370 0 : }
371 :
372 : /// Augment 'segments' with logical sizes
373 : ///
374 : /// This will leave segments' sizes as None if the Timeline associated with the segment is deleted concurrently
375 : /// (i.e. we cannot read its logical size at a particular LSN).
376 0 : async fn fill_logical_sizes(
377 0 : timelines: &[Arc<Timeline>],
378 0 : segments: &mut [SegmentMeta],
379 0 : limit: &Arc<Semaphore>,
380 0 : logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
381 0 : cause: LogicalSizeCalculationCause,
382 0 : ctx: &RequestContext,
383 0 : ) -> Result<(), CalculateSyntheticSizeError> {
384 0 : let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(
385 0 : timelines
386 0 : .iter()
387 0 : .map(|timeline| (timeline.timeline_id, Arc::clone(timeline))),
388 0 : );
389 0 :
390 0 : // record the used/inserted cache keys here, to remove extras not to start leaking
391 0 : // after initial run the cache should be quite stable, but live timelines will eventually
392 0 : // require new lsns to be inspected.
393 0 : let mut sizes_needed = HashMap::<(TimelineId, Lsn), Option<u64>>::new();
394 0 :
395 0 : // with joinset, on drop, all of the tasks will just be de-scheduled, which we can use to
396 0 : // our advantage with `?` error handling.
397 0 : let mut joinset = tokio::task::JoinSet::new();
398 :
399 : // For each point that would benefit from having a logical size available,
400 : // spawn a Task to fetch it, unless we have it cached already.
401 0 : for seg in segments.iter() {
402 0 : if !seg.size_needed() {
403 0 : continue;
404 0 : }
405 0 :
406 0 : let timeline_id = seg.timeline_id;
407 0 : let lsn = Lsn(seg.segment.lsn);
408 :
409 0 : if let Entry::Vacant(e) = sizes_needed.entry((timeline_id, lsn)) {
410 0 : let cached_size = logical_size_cache.get(&(timeline_id, lsn)).cloned();
411 0 : if cached_size.is_none() {
412 0 : let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap());
413 0 : let parallel_size_calcs = Arc::clone(limit);
414 0 : let ctx = ctx.attached_child();
415 0 : joinset.spawn(
416 0 : calculate_logical_size(parallel_size_calcs, timeline, lsn, cause, ctx)
417 0 : .in_current_span(),
418 0 : );
419 0 : }
420 0 : e.insert(cached_size);
421 0 : }
422 : }
423 :
424 : // Perform the size lookups
425 0 : let mut have_any_error = None;
426 0 : while let Some(res) = joinset.join_next().await {
427 : // each of these come with Result<anyhow::Result<_>, JoinError>
428 : // because of spawn + spawn_blocking
429 0 : match res {
430 0 : Err(join_error) if join_error.is_cancelled() => {
431 0 : unreachable!("we are not cancelling any of the futures, nor should be");
432 : }
433 0 : Err(join_error) => {
434 0 : // cannot really do anything, as this panic is likely a bug
435 0 : error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
436 :
437 0 : have_any_error = Some(CalculateSyntheticSizeError::Fatal(
438 0 : anyhow::anyhow!(join_error)
439 0 : .context("task that calls spawn_ondemand_logical_size_calculation"),
440 0 : ));
441 : }
442 0 : Ok(Err(recv_result_error)) => {
443 0 : // cannot really do anything, as this panic is likely a bug
444 0 : error!("failed to receive logical size query result: {recv_result_error:#}");
445 0 : have_any_error = Some(CalculateSyntheticSizeError::Fatal(
446 0 : anyhow::anyhow!(recv_result_error)
447 0 : .context("Receiving logical size query result"),
448 0 : ));
449 : }
450 0 : Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Err(error)))) => {
451 0 : if matches!(error, CalculateLogicalSizeError::Cancelled) {
452 : // Skip this: it's okay if one timeline among many is shutting down while we
453 : // calculate inputs for the overall tenant.
454 0 : continue;
455 : } else {
456 0 : warn!(
457 0 : timeline_id=%timeline.timeline_id,
458 0 : "failed to calculate logical size at {lsn}: {error:#}"
459 : );
460 0 : have_any_error = Some(CalculateSyntheticSizeError::LogicalSize {
461 0 : timeline_id: timeline.timeline_id,
462 0 : lsn,
463 0 : error,
464 0 : });
465 : }
466 : }
467 0 : Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Ok(size)))) => {
468 0 : debug!(timeline_id=%timeline.timeline_id, %lsn, size, "size calculated");
469 :
470 0 : logical_size_cache.insert((timeline.timeline_id, lsn), size);
471 0 : sizes_needed.insert((timeline.timeline_id, lsn), Some(size));
472 : }
473 : }
474 : }
475 :
476 : // prune any keys not needed anymore; we record every used key and added key.
477 0 : logical_size_cache.retain(|key, _| sizes_needed.contains_key(key));
478 :
479 0 : if let Some(error) = have_any_error {
480 : // we cannot complete this round, because we are missing data.
481 : // we have however cached all we were able to request calculation on.
482 0 : return Err(error);
483 0 : }
484 :
485 : // Insert the looked up sizes to the Segments
486 0 : for seg in segments.iter_mut() {
487 0 : if !seg.size_needed() {
488 0 : continue;
489 0 : }
490 0 :
491 0 : let timeline_id = seg.timeline_id;
492 0 : let lsn = Lsn(seg.segment.lsn);
493 :
494 0 : if let Some(Some(size)) = sizes_needed.get(&(timeline_id, lsn)) {
495 0 : seg.segment.size = Some(*size);
496 0 : }
497 : }
498 0 : Ok(())
499 0 : }
500 :
501 : impl ModelInputs {
502 4 : pub fn calculate_model(&self) -> tenant_size_model::StorageModel {
503 4 : // Convert SegmentMetas into plain Segments
504 4 : StorageModel {
505 4 : segments: self
506 4 : .segments
507 4 : .iter()
508 28 : .map(|seg| seg.segment.clone())
509 4 : .collect(),
510 4 : }
511 4 : }
512 :
513 : // calculate total project size
514 2 : pub fn calculate(&self) -> u64 {
515 2 : let storage = self.calculate_model();
516 2 : let sizes = storage.calculate();
517 2 : sizes.total_size
518 2 : }
519 : }
520 :
521 : /// Newtype around the tuple that carries the timeline at lsn logical size calculation.
522 : struct TimelineAtLsnSizeResult(
523 : Arc<crate::tenant::Timeline>,
524 : utils::lsn::Lsn,
525 : Result<u64, CalculateLogicalSizeError>,
526 : );
527 :
528 0 : #[instrument(skip_all, fields(timeline_id=%timeline.timeline_id, lsn=%lsn))]
529 : async fn calculate_logical_size(
530 : limit: Arc<tokio::sync::Semaphore>,
531 : timeline: Arc<crate::tenant::Timeline>,
532 : lsn: utils::lsn::Lsn,
533 : cause: LogicalSizeCalculationCause,
534 : ctx: RequestContext,
535 : ) -> Result<TimelineAtLsnSizeResult, RecvError> {
536 : let _permit = tokio::sync::Semaphore::acquire_owned(limit)
537 : .await
538 : .expect("global semaphore should not had been closed");
539 :
540 : let size_res = timeline
541 : .spawn_ondemand_logical_size_calculation(lsn, cause, ctx)
542 : .instrument(info_span!("spawn_ondemand_logical_size_calculation"))
543 : .await?;
544 : Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
545 : }
546 :
547 : #[test]
548 2 : fn verify_size_for_multiple_branches() {
549 2 : // this is generated from integration test test_tenant_size_with_multiple_branches, but this way
550 2 : // it has the stable lsn's
551 2 : //
552 2 : // The timeline_inputs don't participate in the size calculation, and are here just to explain
553 2 : // the inputs.
554 2 : let doc = r#"
555 2 : {
556 2 : "segments": [
557 2 : {
558 2 : "segment": {
559 2 : "parent": 9,
560 2 : "lsn": 26033560,
561 2 : "size": null,
562 2 : "needed": false
563 2 : },
564 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
565 2 : "kind": "BranchStart"
566 2 : },
567 2 : {
568 2 : "segment": {
569 2 : "parent": 0,
570 2 : "lsn": 35720400,
571 2 : "size": 25206784,
572 2 : "needed": false
573 2 : },
574 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
575 2 : "kind": "GcCutOff"
576 2 : },
577 2 : {
578 2 : "segment": {
579 2 : "parent": 1,
580 2 : "lsn": 35851472,
581 2 : "size": null,
582 2 : "needed": true
583 2 : },
584 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
585 2 : "kind": "BranchEnd"
586 2 : },
587 2 : {
588 2 : "segment": {
589 2 : "parent": 7,
590 2 : "lsn": 24566168,
591 2 : "size": null,
592 2 : "needed": false
593 2 : },
594 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
595 2 : "kind": "BranchStart"
596 2 : },
597 2 : {
598 2 : "segment": {
599 2 : "parent": 3,
600 2 : "lsn": 25261936,
601 2 : "size": 26050560,
602 2 : "needed": false
603 2 : },
604 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
605 2 : "kind": "GcCutOff"
606 2 : },
607 2 : {
608 2 : "segment": {
609 2 : "parent": 4,
610 2 : "lsn": 25393008,
611 2 : "size": null,
612 2 : "needed": true
613 2 : },
614 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
615 2 : "kind": "BranchEnd"
616 2 : },
617 2 : {
618 2 : "segment": {
619 2 : "parent": null,
620 2 : "lsn": 23694408,
621 2 : "size": null,
622 2 : "needed": false
623 2 : },
624 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
625 2 : "kind": "BranchStart"
626 2 : },
627 2 : {
628 2 : "segment": {
629 2 : "parent": 6,
630 2 : "lsn": 24566168,
631 2 : "size": 25739264,
632 2 : "needed": false
633 2 : },
634 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
635 2 : "kind": "BranchPoint"
636 2 : },
637 2 : {
638 2 : "segment": {
639 2 : "parent": 7,
640 2 : "lsn": 25902488,
641 2 : "size": 26402816,
642 2 : "needed": false
643 2 : },
644 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
645 2 : "kind": "GcCutOff"
646 2 : },
647 2 : {
648 2 : "segment": {
649 2 : "parent": 8,
650 2 : "lsn": 26033560,
651 2 : "size": 26468352,
652 2 : "needed": true
653 2 : },
654 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
655 2 : "kind": "BranchPoint"
656 2 : },
657 2 : {
658 2 : "segment": {
659 2 : "parent": 9,
660 2 : "lsn": 26033560,
661 2 : "size": null,
662 2 : "needed": true
663 2 : },
664 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
665 2 : "kind": "BranchEnd"
666 2 : }
667 2 : ],
668 2 : "timeline_inputs": [
669 2 : {
670 2 : "timeline_id": "20b129c9b50cff7213e6503a31b2a5ce",
671 2 : "ancestor_lsn": "0/18D3D98",
672 2 : "last_record": "0/2230CD0",
673 2 : "latest_gc_cutoff": "0/1698C48",
674 2 : "horizon_cutoff": "0/2210CD0",
675 2 : "pitr_cutoff": "0/2210CD0",
676 2 : "next_gc_cutoff": "0/2210CD0",
677 2 : "retention_param_cutoff": null
678 2 : },
679 2 : {
680 2 : "timeline_id": "454626700469f0a9914949b9d018e876",
681 2 : "ancestor_lsn": "0/176D998",
682 2 : "last_record": "0/1837770",
683 2 : "latest_gc_cutoff": "0/1698C48",
684 2 : "horizon_cutoff": "0/1817770",
685 2 : "pitr_cutoff": "0/1817770",
686 2 : "next_gc_cutoff": "0/1817770",
687 2 : "retention_param_cutoff": null
688 2 : },
689 2 : {
690 2 : "timeline_id": "cb5e3cbe60a4afc00d01880e1a37047f",
691 2 : "ancestor_lsn": "0/0",
692 2 : "last_record": "0/18D3D98",
693 2 : "latest_gc_cutoff": "0/1698C48",
694 2 : "horizon_cutoff": "0/18B3D98",
695 2 : "pitr_cutoff": "0/18B3D98",
696 2 : "next_gc_cutoff": "0/18B3D98",
697 2 : "retention_param_cutoff": null
698 2 : }
699 2 : ]
700 2 : }
701 2 : "#;
702 2 : let inputs: ModelInputs = serde_json::from_str(doc).unwrap();
703 2 :
704 2 : assert_eq!(inputs.calculate(), 37_851_408);
705 2 : }
706 :
707 : #[test]
708 2 : fn verify_size_for_one_branch() {
709 2 : let doc = r#"
710 2 : {
711 2 : "segments": [
712 2 : {
713 2 : "segment": {
714 2 : "parent": null,
715 2 : "lsn": 0,
716 2 : "size": null,
717 2 : "needed": false
718 2 : },
719 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
720 2 : "kind": "BranchStart"
721 2 : },
722 2 : {
723 2 : "segment": {
724 2 : "parent": 0,
725 2 : "lsn": 305547335776,
726 2 : "size": 220054675456,
727 2 : "needed": false
728 2 : },
729 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
730 2 : "kind": "GcCutOff"
731 2 : },
732 2 : {
733 2 : "segment": {
734 2 : "parent": 1,
735 2 : "lsn": 305614444640,
736 2 : "size": null,
737 2 : "needed": true
738 2 : },
739 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
740 2 : "kind": "BranchEnd"
741 2 : }
742 2 : ],
743 2 : "timeline_inputs": [
744 2 : {
745 2 : "timeline_id": "f15ae0cf21cce2ba27e4d80c6709a6cd",
746 2 : "ancestor_lsn": "0/0",
747 2 : "last_record": "47/280A5860",
748 2 : "latest_gc_cutoff": "47/240A5860",
749 2 : "horizon_cutoff": "47/240A5860",
750 2 : "pitr_cutoff": "47/240A5860",
751 2 : "next_gc_cutoff": "47/240A5860",
752 2 : "retention_param_cutoff": "0/0"
753 2 : }
754 2 : ]
755 2 : }"#;
756 2 :
757 2 : let model: ModelInputs = serde_json::from_str(doc).unwrap();
758 2 :
759 2 : let res = model.calculate_model().calculate();
760 2 :
761 2 : println!("calculated synthetic size: {}", res.total_size);
762 2 : println!("result: {:?}", serde_json::to_string(&res.segments));
763 2 :
764 2 : use utils::lsn::Lsn;
765 2 : let latest_gc_cutoff_lsn: Lsn = "47/240A5860".parse().unwrap();
766 2 : let last_lsn: Lsn = "47/280A5860".parse().unwrap();
767 2 : println!(
768 2 : "latest_gc_cutoff lsn 47/240A5860 is {}, last_lsn lsn 47/280A5860 is {}",
769 2 : u64::from(latest_gc_cutoff_lsn),
770 2 : u64::from(last_lsn)
771 2 : );
772 2 : assert_eq!(res.total_size, 220121784320);
773 2 : }
|