Line data Source code
1 : pub mod detach_ancestor;
2 : pub mod partitioning;
3 : pub mod utilization;
4 :
5 : use core::ops::Range;
6 : use std::collections::HashMap;
7 : use std::fmt::Display;
8 : use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
9 : use std::str::FromStr;
10 : use std::time::{Duration, SystemTime};
11 :
12 : #[cfg(feature = "testing")]
13 : use camino::Utf8PathBuf;
14 : use postgres_versioninfo::PgMajorVersion;
15 : use serde::{Deserialize, Deserializer, Serialize, Serializer};
16 : use serde_with::serde_as;
17 : pub use utilization::PageserverUtilization;
18 : use utils::id::{NodeId, TenantId, TimelineId};
19 : use utils::lsn::Lsn;
20 : use utils::{completion, serde_system_time};
21 :
22 : use crate::config::Ratio;
23 : use crate::key::{CompactKey, Key};
24 : use crate::shard::{
25 : DEFAULT_STRIPE_SIZE, ShardCount, ShardIdentity, ShardStripeSize, TenantShardId,
26 : };
27 :
28 : /// The state of a tenant in this pageserver.
29 : ///
30 : /// ```mermaid
31 : /// stateDiagram-v2
32 : ///
33 : /// [*] --> Attaching: spawn_attach()
34 : ///
35 : /// Attaching --> Activating: activate()
36 : /// Activating --> Active: infallible
37 : ///
38 : /// Attaching --> Broken: attach() failure
39 : ///
40 : /// Active --> Stopping: set_stopping(), part of shutdown & detach
41 : /// Stopping --> Broken: late error in remove_tenant_from_memory
42 : ///
43 : /// Broken --> [*]: ignore / detach / shutdown
44 : /// Stopping --> [*]: remove_from_memory complete
45 : ///
46 : /// Active --> Broken: cfg(testing)-only tenant break point
47 : /// ```
48 : #[derive(
49 : Clone,
50 : PartialEq,
51 : Eq,
52 0 : serde::Serialize,
53 0 : serde::Deserialize,
54 : strum_macros::Display,
55 : strum_macros::VariantNames,
56 : strum_macros::AsRefStr,
57 : strum_macros::IntoStaticStr,
58 : )]
59 : #[serde(tag = "slug", content = "data")]
60 : pub enum TenantState {
61 : /// This tenant is being attached to the pageserver.
62 : ///
63 : /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
64 : Attaching,
65 : /// The tenant is transitioning from Loading/Attaching to Active.
66 : ///
67 : /// While in this state, the individual timelines are being activated.
68 : ///
69 : /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
70 : Activating(ActivatingFrom),
71 : /// The tenant has finished activating and is open for business.
72 : ///
73 : /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
74 : Active,
75 : /// The tenant is recognized by pageserver, but it is being detached or the
76 : /// system is being shut down.
77 : ///
78 : /// Transitions out of this state are possible through `set_broken()`.
79 : Stopping {
80 : /// The barrier can be used to wait for shutdown to complete. The first caller to set
81 : /// Some(Barrier) is responsible for driving shutdown to completion. Subsequent callers
82 : /// will wait for the first caller's existing barrier.
83 : ///
84 : /// None is set when an attach is cancelled, to signal to shutdown that the attach has in
85 : /// fact cancelled:
86 : ///
87 : /// 1. `shutdown` sees `TenantState::Attaching`, and cancels the tenant.
88 : /// 2. `attach` sets `TenantState::Stopping(None)` and exits.
89 : /// 3. `set_stopping` waits for `TenantState::Stopping(None)` and sets
90 : /// `TenantState::Stopping(Some)` to claim the barrier as the shutdown owner.
91 : //
92 : // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
93 : // otherwise it will not be skipped during deserialization
94 : #[serde(skip)]
95 : progress: Option<completion::Barrier>,
96 : },
97 : /// The tenant is recognized by the pageserver, but can no longer be used for
98 : /// any operations.
99 : ///
100 : /// If the tenant fails to load or attach, it will transition to this state
101 : /// and it is guaranteed that no background tasks are running in its name.
102 : ///
103 : /// The other way to transition into this state is from `Stopping` state
104 : /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
105 : /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
106 : Broken { reason: String, backtrace: String },
107 : }
108 :
109 : impl TenantState {
110 0 : pub fn attachment_status(&self) -> TenantAttachmentStatus {
111 : use TenantAttachmentStatus::*;
112 :
113 : // Below TenantState::Activating is used as "transient" or "transparent" state for
114 : // attachment_status determining.
115 0 : match self {
116 : // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
117 : // So, technically, we can return Attached here.
118 : // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
119 : // But, our attach task might still be fetching the remote timelines, etc.
120 : // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
121 0 : Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
122 : // We only reach Active after successful load / attach.
123 : // So, call atttachment status Attached.
124 0 : Self::Active => Attached,
125 : // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
126 : // However, it also becomes Broken if the regular load fails.
127 : // From Console's perspective there's no practical difference
128 : // because attachment_status is polled by console only during attach operation execution.
129 0 : Self::Broken { reason, .. } => Failed {
130 0 : reason: reason.to_owned(),
131 0 : },
132 : // Why is Stopping a Maybe case? Because, during pageserver shutdown,
133 : // we set the Stopping state irrespective of whether the tenant
134 : // has finished attaching or not.
135 0 : Self::Stopping { .. } => Maybe,
136 : }
137 0 : }
138 :
139 0 : pub fn broken_from_reason(reason: String) -> Self {
140 0 : let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture());
141 0 : Self::Broken {
142 0 : reason,
143 0 : backtrace: backtrace_str,
144 0 : }
145 0 : }
146 : }
147 :
148 : impl std::fmt::Debug for TenantState {
149 2 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
150 2 : match self {
151 2 : Self::Broken { reason, backtrace } if !reason.is_empty() => {
152 2 : write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}")
153 : }
154 0 : _ => write!(f, "{self}"),
155 : }
156 2 : }
157 : }
158 :
159 : /// A temporary lease to a specific lsn inside a timeline.
160 : /// Access to the lsn is guaranteed by the pageserver until the expiration indicated by `valid_until`.
161 : #[serde_as]
162 : #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
163 : pub struct LsnLease {
164 : #[serde_as(as = "SystemTimeAsRfc3339Millis")]
165 : pub valid_until: SystemTime,
166 : }
167 :
168 : serde_with::serde_conv!(
169 : SystemTimeAsRfc3339Millis,
170 : SystemTime,
171 0 : |time: &SystemTime| humantime::format_rfc3339_millis(*time).to_string(),
172 0 : |value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }
173 : );
174 :
175 : impl LsnLease {
176 : /// The default length for an explicit LSN lease request (10 minutes).
177 : pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);
178 :
179 : /// The default length for an implicit LSN lease granted during
180 : /// `get_lsn_by_timestamp` request (1 minutes).
181 : pub const DEFAULT_LENGTH_FOR_TS: Duration = Duration::from_secs(60);
182 :
183 : /// Checks whether the lease is expired.
184 3 : pub fn is_expired(&self, now: &SystemTime) -> bool {
185 3 : now > &self.valid_until
186 3 : }
187 : }
188 :
189 : /// Controls the detach ancestor behavior.
190 : /// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
191 : /// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
192 : #[derive(Debug, Clone, Copy, Default)]
193 : pub enum DetachBehavior {
194 : #[default]
195 : NoAncestorAndReparent,
196 : MultiLevelAndNoReparent,
197 : }
198 :
199 : impl std::str::FromStr for DetachBehavior {
200 : type Err = &'static str;
201 :
202 0 : fn from_str(s: &str) -> Result<Self, Self::Err> {
203 0 : match s {
204 0 : "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
205 0 : "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
206 0 : "v1" => Ok(DetachBehavior::NoAncestorAndReparent),
207 0 : "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
208 0 : _ => Err("cannot parse detach behavior"),
209 : }
210 0 : }
211 : }
212 :
213 : impl std::fmt::Display for DetachBehavior {
214 0 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215 0 : match self {
216 0 : DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
217 0 : DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
218 : }
219 0 : }
220 : }
221 :
222 : /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
223 : ///
224 : /// XXX: We used to have more variants here, but now it's just one, which makes this rather
225 : /// useless. Remove, once we've checked that there's no client code left that looks at this.
226 0 : #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
227 : pub enum ActivatingFrom {
228 : /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
229 : Attaching,
230 : }
231 :
232 : /// A state of a timeline in pageserver's memory.
233 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
234 : pub enum TimelineState {
235 : /// The timeline is recognized by the pageserver but is not yet operational.
236 : /// In particular, the walreceiver connection loop is not running for this timeline.
237 : /// It will eventually transition to state Active or Broken.
238 : Loading,
239 : /// The timeline is fully operational.
240 : /// It can be queried, and the walreceiver connection loop is running.
241 : Active,
242 : /// The timeline was previously Loading or Active but is shutting down.
243 : /// It cannot transition back into any other state.
244 : Stopping,
245 : /// The timeline is broken and not operational (previous states: Loading or Active).
246 : Broken { reason: String, backtrace: String },
247 : }
248 :
249 : #[serde_with::serde_as]
250 0 : #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
251 : pub struct CompactLsnRange {
252 : pub start: Lsn,
253 : pub end: Lsn,
254 : }
255 :
256 : #[serde_with::serde_as]
257 : #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
258 : pub struct CompactKeyRange {
259 : #[serde_as(as = "serde_with::DisplayFromStr")]
260 : pub start: Key,
261 : #[serde_as(as = "serde_with::DisplayFromStr")]
262 : pub end: Key,
263 : }
264 :
265 : impl From<Range<Lsn>> for CompactLsnRange {
266 3 : fn from(range: Range<Lsn>) -> Self {
267 3 : Self {
268 3 : start: range.start,
269 3 : end: range.end,
270 3 : }
271 3 : }
272 : }
273 :
274 : impl From<Range<Key>> for CompactKeyRange {
275 8 : fn from(range: Range<Key>) -> Self {
276 8 : Self {
277 8 : start: range.start,
278 8 : end: range.end,
279 8 : }
280 8 : }
281 : }
282 :
283 : impl From<CompactLsnRange> for Range<Lsn> {
284 5 : fn from(range: CompactLsnRange) -> Self {
285 5 : range.start..range.end
286 5 : }
287 : }
288 :
289 : impl From<CompactKeyRange> for Range<Key> {
290 8 : fn from(range: CompactKeyRange) -> Self {
291 8 : range.start..range.end
292 8 : }
293 : }
294 :
295 : impl CompactLsnRange {
296 2 : pub fn above(lsn: Lsn) -> Self {
297 2 : Self {
298 2 : start: lsn,
299 2 : end: Lsn::MAX,
300 2 : }
301 2 : }
302 : }
303 :
304 : #[derive(Debug, Clone, Serialize)]
305 : pub struct CompactInfoResponse {
306 : pub compact_key_range: Option<CompactKeyRange>,
307 : pub compact_lsn_range: Option<CompactLsnRange>,
308 : pub sub_compaction: bool,
309 : pub running: bool,
310 : pub job_id: usize,
311 : }
312 :
313 0 : #[derive(Serialize, Deserialize, Clone)]
314 : pub struct TimelineCreateRequest {
315 : pub new_timeline_id: TimelineId,
316 : #[serde(flatten)]
317 : pub mode: TimelineCreateRequestMode,
318 : }
319 :
320 : impl TimelineCreateRequest {
321 0 : pub fn mode_tag(&self) -> &'static str {
322 0 : match &self.mode {
323 0 : TimelineCreateRequestMode::Branch { .. } => "branch",
324 0 : TimelineCreateRequestMode::ImportPgdata { .. } => "import",
325 0 : TimelineCreateRequestMode::Bootstrap { .. } => "bootstrap",
326 : }
327 0 : }
328 :
329 0 : pub fn is_import(&self) -> bool {
330 0 : matches!(self.mode, TimelineCreateRequestMode::ImportPgdata { .. })
331 0 : }
332 : }
333 :
334 0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
335 : pub enum ShardImportStatus {
336 : InProgress(Option<ShardImportProgress>),
337 : Done,
338 : Error(String),
339 : }
340 :
341 0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
342 : pub enum ShardImportProgress {
343 : V1(ShardImportProgressV1),
344 : }
345 :
346 0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
347 : pub struct ShardImportProgressV1 {
348 : /// Total number of jobs in the import plan
349 : pub jobs: usize,
350 : /// Number of jobs completed
351 : pub completed: usize,
352 : /// Hash of the plan
353 : pub import_plan_hash: u64,
354 : /// Soft limit for the job size
355 : /// This needs to remain constant throughout the import
356 : pub job_soft_size_limit: usize,
357 : }
358 :
359 : impl ShardImportStatus {
360 0 : pub fn is_terminal(&self) -> bool {
361 0 : match self {
362 0 : ShardImportStatus::InProgress(_) => false,
363 0 : ShardImportStatus::Done | ShardImportStatus::Error(_) => true,
364 : }
365 0 : }
366 : }
367 :
368 : /// Storage controller specific extensions to [`TimelineInfo`].
369 0 : #[derive(Serialize, Deserialize, Clone)]
370 : pub struct TimelineCreateResponseStorcon {
371 : #[serde(flatten)]
372 : pub timeline_info: TimelineInfo,
373 :
374 : pub safekeepers: Option<SafekeepersInfo>,
375 : }
376 :
377 : /// Safekeepers as returned in timeline creation request to storcon or pushed to
378 : /// cplane in the post migration hook.
379 0 : #[derive(Serialize, Deserialize, Clone)]
380 : pub struct SafekeepersInfo {
381 : pub tenant_id: TenantId,
382 : pub timeline_id: TimelineId,
383 : pub generation: u32,
384 : pub safekeepers: Vec<SafekeeperInfo>,
385 : }
386 :
387 0 : #[derive(Serialize, Deserialize, Clone, Debug)]
388 : pub struct SafekeeperInfo {
389 : pub id: NodeId,
390 : pub hostname: String,
391 : }
392 :
393 0 : #[derive(Serialize, Deserialize, Clone)]
394 : #[serde(untagged)]
395 : pub enum TimelineCreateRequestMode {
396 : Branch {
397 : ancestor_timeline_id: TimelineId,
398 : #[serde(default)]
399 : ancestor_start_lsn: Option<Lsn>,
400 : // TODO: cplane sets this, but, the branching code always
401 : // inherits the ancestor's pg_version. Earlier code wasn't
402 : // using a flattened enum, so, it was an accepted field, and
403 : // we continue to accept it by having it here.
404 : pg_version: Option<PgMajorVersion>,
405 : #[serde(default, skip_serializing_if = "std::ops::Not::not")]
406 : read_only: bool,
407 : },
408 : ImportPgdata {
409 : import_pgdata: TimelineCreateRequestModeImportPgdata,
410 : },
411 : // NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
412 : // (serde picks the first matching enum variant, in declaration order).
413 : Bootstrap {
414 : #[serde(default)]
415 : existing_initdb_timeline_id: Option<TimelineId>,
416 : pg_version: Option<PgMajorVersion>,
417 : },
418 : }
419 :
420 0 : #[derive(Serialize, Deserialize, Clone)]
421 : pub struct TimelineCreateRequestModeImportPgdata {
422 : pub location: ImportPgdataLocation,
423 : pub idempotency_key: ImportPgdataIdempotencyKey,
424 : }
425 :
426 0 : #[derive(Serialize, Deserialize, Clone, Debug)]
427 : pub enum ImportPgdataLocation {
428 : #[cfg(feature = "testing")]
429 : LocalFs { path: Utf8PathBuf },
430 : AwsS3 {
431 : region: String,
432 : bucket: String,
433 : /// A better name for this would be `prefix`; changing requires coordination with cplane.
434 : /// See <https://github.com/neondatabase/cloud/issues/20646>.
435 : key: String,
436 : },
437 : }
438 :
439 : #[derive(Serialize, Deserialize, Clone)]
440 : #[serde(transparent)]
441 : pub struct ImportPgdataIdempotencyKey(pub String);
442 :
443 : impl ImportPgdataIdempotencyKey {
444 0 : pub fn random() -> Self {
445 : use rand::Rng;
446 : use rand::distributions::Alphanumeric;
447 0 : Self(
448 0 : rand::thread_rng()
449 0 : .sample_iter(&Alphanumeric)
450 0 : .take(20)
451 0 : .map(char::from)
452 0 : .collect(),
453 0 : )
454 0 : }
455 : }
456 :
457 0 : #[derive(Serialize, Deserialize, Clone)]
458 : pub struct LsnLeaseRequest {
459 : pub lsn: Lsn,
460 : }
461 :
462 0 : #[derive(Serialize, Deserialize)]
463 : pub struct TenantShardSplitRequest {
464 : pub new_shard_count: u8,
465 :
466 : // A tenant's stripe size is only meaningful the first time their shard count goes
467 : // above 1: therefore during a split from 1->N shards, we may modify the stripe size.
468 : //
469 : // If this is set while the stripe count is being increased from an already >1 value,
470 : // then the request will fail with 400.
471 : pub new_stripe_size: Option<ShardStripeSize>,
472 : }
473 :
474 0 : #[derive(Serialize, Deserialize)]
475 : pub struct TenantShardSplitResponse {
476 : pub new_shards: Vec<TenantShardId>,
477 : }
478 :
479 : /// Parameters that apply to all shards in a tenant. Used during tenant creation.
480 0 : #[derive(Clone, Copy, Serialize, Deserialize, Debug)]
481 : #[serde(deny_unknown_fields)]
482 : pub struct ShardParameters {
483 : pub count: ShardCount,
484 : pub stripe_size: ShardStripeSize,
485 : }
486 :
487 : impl ShardParameters {
488 0 : pub fn is_unsharded(&self) -> bool {
489 0 : self.count.is_unsharded()
490 0 : }
491 : }
492 :
493 : impl Default for ShardParameters {
494 120 : fn default() -> Self {
495 120 : Self {
496 120 : count: ShardCount::new(0),
497 120 : stripe_size: DEFAULT_STRIPE_SIZE,
498 120 : }
499 120 : }
500 : }
501 :
502 : impl From<ShardIdentity> for ShardParameters {
503 0 : fn from(identity: ShardIdentity) -> Self {
504 0 : Self {
505 0 : count: identity.count,
506 0 : stripe_size: identity.stripe_size,
507 0 : }
508 0 : }
509 : }
510 :
511 : #[derive(Debug, Default, Clone, Eq, PartialEq)]
512 : pub enum FieldPatch<T> {
513 : Upsert(T),
514 : Remove,
515 : #[default]
516 : Noop,
517 : }
518 :
519 : impl<T> FieldPatch<T> {
520 78 : fn is_noop(&self) -> bool {
521 78 : matches!(self, FieldPatch::Noop)
522 78 : }
523 :
524 39 : pub fn apply(self, target: &mut Option<T>) {
525 39 : match self {
526 1 : Self::Upsert(v) => *target = Some(v),
527 1 : Self::Remove => *target = None,
528 37 : Self::Noop => {}
529 : }
530 39 : }
531 :
532 11 : pub fn map<U, E, F: FnOnce(T) -> Result<U, E>>(self, map: F) -> Result<FieldPatch<U>, E> {
533 11 : match self {
534 0 : Self::Upsert(v) => Ok(FieldPatch::<U>::Upsert(map(v)?)),
535 0 : Self::Remove => Ok(FieldPatch::<U>::Remove),
536 11 : Self::Noop => Ok(FieldPatch::<U>::Noop),
537 : }
538 11 : }
539 : }
540 :
541 : impl<'de, T: Deserialize<'de>> Deserialize<'de> for FieldPatch<T> {
542 2 : fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
543 2 : where
544 2 : D: Deserializer<'de>,
545 : {
546 2 : Option::deserialize(deserializer).map(|opt| match opt {
547 1 : None => FieldPatch::Remove,
548 1 : Some(val) => FieldPatch::Upsert(val),
549 2 : })
550 2 : }
551 : }
552 :
553 : impl<T: Serialize> Serialize for FieldPatch<T> {
554 2 : fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
555 2 : where
556 2 : S: Serializer,
557 : {
558 2 : match self {
559 1 : FieldPatch::Upsert(val) => serializer.serialize_some(val),
560 1 : FieldPatch::Remove => serializer.serialize_none(),
561 0 : FieldPatch::Noop => unreachable!(),
562 : }
563 2 : }
564 : }
565 :
566 0 : #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
567 : #[serde(default)]
568 : pub struct TenantConfigPatch {
569 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
570 : pub checkpoint_distance: FieldPatch<u64>,
571 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
572 : pub checkpoint_timeout: FieldPatch<String>,
573 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
574 : pub compaction_target_size: FieldPatch<u64>,
575 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
576 : pub compaction_period: FieldPatch<String>,
577 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
578 : pub compaction_threshold: FieldPatch<usize>,
579 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
580 : pub compaction_upper_limit: FieldPatch<usize>,
581 : // defer parsing compaction_algorithm, like eviction_policy
582 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
583 : pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
584 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
585 : pub compaction_shard_ancestor: FieldPatch<bool>,
586 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
587 : pub compaction_l0_first: FieldPatch<bool>,
588 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
589 : pub compaction_l0_semaphore: FieldPatch<bool>,
590 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
591 : pub l0_flush_delay_threshold: FieldPatch<usize>,
592 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
593 : pub l0_flush_stall_threshold: FieldPatch<usize>,
594 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
595 : pub gc_horizon: FieldPatch<u64>,
596 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
597 : pub gc_period: FieldPatch<String>,
598 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
599 : pub image_creation_threshold: FieldPatch<usize>,
600 : // HADRON
601 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
602 : pub image_layer_force_creation_period: FieldPatch<String>,
603 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
604 : pub pitr_interval: FieldPatch<String>,
605 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
606 : pub walreceiver_connect_timeout: FieldPatch<String>,
607 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
608 : pub lagging_wal_timeout: FieldPatch<String>,
609 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
610 : pub max_lsn_wal_lag: FieldPatch<NonZeroU64>,
611 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
612 : pub eviction_policy: FieldPatch<EvictionPolicy>,
613 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
614 : pub min_resident_size_override: FieldPatch<u64>,
615 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
616 : pub evictions_low_residence_duration_metric_threshold: FieldPatch<String>,
617 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
618 : pub heatmap_period: FieldPatch<String>,
619 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
620 : pub lazy_slru_download: FieldPatch<bool>,
621 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
622 : pub timeline_get_throttle: FieldPatch<ThrottleConfig>,
623 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
624 : pub image_layer_creation_check_threshold: FieldPatch<u8>,
625 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
626 : pub image_creation_preempt_threshold: FieldPatch<usize>,
627 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
628 : pub lsn_lease_length: FieldPatch<String>,
629 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
630 : pub lsn_lease_length_for_ts: FieldPatch<String>,
631 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
632 : pub timeline_offloading: FieldPatch<bool>,
633 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
634 : pub rel_size_v2_enabled: FieldPatch<bool>,
635 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
636 : pub gc_compaction_enabled: FieldPatch<bool>,
637 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
638 : pub gc_compaction_verification: FieldPatch<bool>,
639 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
640 : pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,
641 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
642 : pub gc_compaction_ratio_percent: FieldPatch<u64>,
643 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
644 : pub sampling_ratio: FieldPatch<Option<Ratio>>,
645 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
646 : pub relsize_snapshot_cache_capacity: FieldPatch<usize>,
647 : #[serde(skip_serializing_if = "FieldPatch::is_noop")]
648 : pub basebackup_cache_enabled: FieldPatch<bool>,
649 : }
650 :
651 : /// Like [`crate::config::TenantConfigToml`], but preserves the information
652 : /// about which parameters are set and which are not.
653 : ///
654 : /// Used in many places, including durably stored ones.
655 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
656 : #[serde(default)] // this maps omitted fields in deserialization to None
657 : pub struct TenantConfig {
658 : #[serde(skip_serializing_if = "Option::is_none")]
659 : pub checkpoint_distance: Option<u64>,
660 :
661 : #[serde(skip_serializing_if = "Option::is_none")]
662 : #[serde(with = "humantime_serde")]
663 : pub checkpoint_timeout: Option<Duration>,
664 :
665 : #[serde(skip_serializing_if = "Option::is_none")]
666 : pub compaction_target_size: Option<u64>,
667 :
668 : #[serde(skip_serializing_if = "Option::is_none")]
669 : #[serde(with = "humantime_serde")]
670 : pub compaction_period: Option<Duration>,
671 :
672 : #[serde(skip_serializing_if = "Option::is_none")]
673 : pub compaction_threshold: Option<usize>,
674 :
675 : #[serde(skip_serializing_if = "Option::is_none")]
676 : pub compaction_upper_limit: Option<usize>,
677 :
678 : #[serde(skip_serializing_if = "Option::is_none")]
679 : pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
680 :
681 : #[serde(skip_serializing_if = "Option::is_none")]
682 : pub compaction_shard_ancestor: Option<bool>,
683 :
684 : #[serde(skip_serializing_if = "Option::is_none")]
685 : pub compaction_l0_first: Option<bool>,
686 :
687 : #[serde(skip_serializing_if = "Option::is_none")]
688 : pub compaction_l0_semaphore: Option<bool>,
689 :
690 : #[serde(skip_serializing_if = "Option::is_none")]
691 : pub l0_flush_delay_threshold: Option<usize>,
692 :
693 : #[serde(skip_serializing_if = "Option::is_none")]
694 : pub l0_flush_stall_threshold: Option<usize>,
695 :
696 : #[serde(skip_serializing_if = "Option::is_none")]
697 : pub gc_horizon: Option<u64>,
698 :
699 : #[serde(skip_serializing_if = "Option::is_none")]
700 : #[serde(with = "humantime_serde")]
701 : pub gc_period: Option<Duration>,
702 :
703 : #[serde(skip_serializing_if = "Option::is_none")]
704 : pub image_creation_threshold: Option<usize>,
705 :
706 : // HADRON
707 : #[serde(skip_serializing_if = "Option::is_none")]
708 : #[serde(with = "humantime_serde")]
709 : pub image_layer_force_creation_period: Option<Duration>,
710 :
711 : #[serde(skip_serializing_if = "Option::is_none")]
712 : #[serde(with = "humantime_serde")]
713 : pub pitr_interval: Option<Duration>,
714 :
715 : #[serde(skip_serializing_if = "Option::is_none")]
716 : #[serde(with = "humantime_serde")]
717 : pub walreceiver_connect_timeout: Option<Duration>,
718 :
719 : #[serde(skip_serializing_if = "Option::is_none")]
720 : #[serde(with = "humantime_serde")]
721 : pub lagging_wal_timeout: Option<Duration>,
722 :
723 : #[serde(skip_serializing_if = "Option::is_none")]
724 : pub max_lsn_wal_lag: Option<NonZeroU64>,
725 :
726 : #[serde(skip_serializing_if = "Option::is_none")]
727 : pub eviction_policy: Option<EvictionPolicy>,
728 :
729 : #[serde(skip_serializing_if = "Option::is_none")]
730 : pub min_resident_size_override: Option<u64>,
731 :
732 : #[serde(skip_serializing_if = "Option::is_none")]
733 : #[serde(with = "humantime_serde")]
734 : pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
735 :
736 : #[serde(skip_serializing_if = "Option::is_none")]
737 : #[serde(with = "humantime_serde")]
738 : pub heatmap_period: Option<Duration>,
739 :
740 : #[serde(skip_serializing_if = "Option::is_none")]
741 : pub lazy_slru_download: Option<bool>,
742 :
743 : #[serde(skip_serializing_if = "Option::is_none")]
744 : pub timeline_get_throttle: Option<ThrottleConfig>,
745 :
746 : #[serde(skip_serializing_if = "Option::is_none")]
747 : pub image_layer_creation_check_threshold: Option<u8>,
748 :
749 : #[serde(skip_serializing_if = "Option::is_none")]
750 : pub image_creation_preempt_threshold: Option<usize>,
751 :
752 : #[serde(skip_serializing_if = "Option::is_none")]
753 : #[serde(with = "humantime_serde")]
754 : pub lsn_lease_length: Option<Duration>,
755 :
756 : #[serde(skip_serializing_if = "Option::is_none")]
757 : #[serde(with = "humantime_serde")]
758 : pub lsn_lease_length_for_ts: Option<Duration>,
759 :
760 : #[serde(skip_serializing_if = "Option::is_none")]
761 : pub timeline_offloading: Option<bool>,
762 :
763 : #[serde(skip_serializing_if = "Option::is_none")]
764 : pub rel_size_v2_enabled: Option<bool>,
765 :
766 : #[serde(skip_serializing_if = "Option::is_none")]
767 : pub gc_compaction_enabled: Option<bool>,
768 :
769 : #[serde(skip_serializing_if = "Option::is_none")]
770 : pub gc_compaction_verification: Option<bool>,
771 :
772 : #[serde(skip_serializing_if = "Option::is_none")]
773 : pub gc_compaction_initial_threshold_kb: Option<u64>,
774 :
775 : #[serde(skip_serializing_if = "Option::is_none")]
776 : pub gc_compaction_ratio_percent: Option<u64>,
777 :
778 : #[serde(skip_serializing_if = "Option::is_none")]
779 : pub sampling_ratio: Option<Option<Ratio>>,
780 :
781 : #[serde(skip_serializing_if = "Option::is_none")]
782 : pub relsize_snapshot_cache_capacity: Option<usize>,
783 :
784 : #[serde(skip_serializing_if = "Option::is_none")]
785 : pub basebackup_cache_enabled: Option<bool>,
786 : }
787 :
788 : impl TenantConfig {
789 1 : pub fn apply_patch(
790 1 : self,
791 1 : patch: TenantConfigPatch,
792 1 : ) -> Result<TenantConfig, humantime::DurationError> {
793 : let Self {
794 1 : mut checkpoint_distance,
795 1 : mut checkpoint_timeout,
796 1 : mut compaction_target_size,
797 1 : mut compaction_period,
798 1 : mut compaction_threshold,
799 1 : mut compaction_upper_limit,
800 1 : mut compaction_algorithm,
801 1 : mut compaction_shard_ancestor,
802 1 : mut compaction_l0_first,
803 1 : mut compaction_l0_semaphore,
804 1 : mut l0_flush_delay_threshold,
805 1 : mut l0_flush_stall_threshold,
806 1 : mut gc_horizon,
807 1 : mut gc_period,
808 1 : mut image_creation_threshold,
809 1 : mut image_layer_force_creation_period,
810 1 : mut pitr_interval,
811 1 : mut walreceiver_connect_timeout,
812 1 : mut lagging_wal_timeout,
813 1 : mut max_lsn_wal_lag,
814 1 : mut eviction_policy,
815 1 : mut min_resident_size_override,
816 1 : mut evictions_low_residence_duration_metric_threshold,
817 1 : mut heatmap_period,
818 1 : mut lazy_slru_download,
819 1 : mut timeline_get_throttle,
820 1 : mut image_layer_creation_check_threshold,
821 1 : mut image_creation_preempt_threshold,
822 1 : mut lsn_lease_length,
823 1 : mut lsn_lease_length_for_ts,
824 1 : mut timeline_offloading,
825 1 : mut rel_size_v2_enabled,
826 1 : mut gc_compaction_enabled,
827 1 : mut gc_compaction_verification,
828 1 : mut gc_compaction_initial_threshold_kb,
829 1 : mut gc_compaction_ratio_percent,
830 1 : mut sampling_ratio,
831 1 : mut relsize_snapshot_cache_capacity,
832 1 : mut basebackup_cache_enabled,
833 1 : } = self;
834 :
835 1 : patch.checkpoint_distance.apply(&mut checkpoint_distance);
836 1 : patch
837 1 : .checkpoint_timeout
838 1 : .map(|v| humantime::parse_duration(&v))?
839 1 : .apply(&mut checkpoint_timeout);
840 1 : patch
841 1 : .compaction_target_size
842 1 : .apply(&mut compaction_target_size);
843 1 : patch
844 1 : .compaction_period
845 1 : .map(|v| humantime::parse_duration(&v))?
846 1 : .apply(&mut compaction_period);
847 1 : patch.compaction_threshold.apply(&mut compaction_threshold);
848 1 : patch
849 1 : .compaction_upper_limit
850 1 : .apply(&mut compaction_upper_limit);
851 1 : patch.compaction_algorithm.apply(&mut compaction_algorithm);
852 1 : patch
853 1 : .compaction_shard_ancestor
854 1 : .apply(&mut compaction_shard_ancestor);
855 1 : patch.compaction_l0_first.apply(&mut compaction_l0_first);
856 1 : patch
857 1 : .compaction_l0_semaphore
858 1 : .apply(&mut compaction_l0_semaphore);
859 1 : patch
860 1 : .l0_flush_delay_threshold
861 1 : .apply(&mut l0_flush_delay_threshold);
862 1 : patch
863 1 : .l0_flush_stall_threshold
864 1 : .apply(&mut l0_flush_stall_threshold);
865 1 : patch.gc_horizon.apply(&mut gc_horizon);
866 1 : patch
867 1 : .gc_period
868 1 : .map(|v| humantime::parse_duration(&v))?
869 1 : .apply(&mut gc_period);
870 1 : patch
871 1 : .image_creation_threshold
872 1 : .apply(&mut image_creation_threshold);
873 : // HADRON
874 1 : patch
875 1 : .image_layer_force_creation_period
876 1 : .map(|v| humantime::parse_duration(&v))?
877 1 : .apply(&mut image_layer_force_creation_period);
878 1 : patch
879 1 : .pitr_interval
880 1 : .map(|v| humantime::parse_duration(&v))?
881 1 : .apply(&mut pitr_interval);
882 1 : patch
883 1 : .walreceiver_connect_timeout
884 1 : .map(|v| humantime::parse_duration(&v))?
885 1 : .apply(&mut walreceiver_connect_timeout);
886 1 : patch
887 1 : .lagging_wal_timeout
888 1 : .map(|v| humantime::parse_duration(&v))?
889 1 : .apply(&mut lagging_wal_timeout);
890 1 : patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
891 1 : patch.eviction_policy.apply(&mut eviction_policy);
892 1 : patch
893 1 : .min_resident_size_override
894 1 : .apply(&mut min_resident_size_override);
895 1 : patch
896 1 : .evictions_low_residence_duration_metric_threshold
897 1 : .map(|v| humantime::parse_duration(&v))?
898 1 : .apply(&mut evictions_low_residence_duration_metric_threshold);
899 1 : patch
900 1 : .heatmap_period
901 1 : .map(|v| humantime::parse_duration(&v))?
902 1 : .apply(&mut heatmap_period);
903 1 : patch.lazy_slru_download.apply(&mut lazy_slru_download);
904 1 : patch
905 1 : .timeline_get_throttle
906 1 : .apply(&mut timeline_get_throttle);
907 1 : patch
908 1 : .image_layer_creation_check_threshold
909 1 : .apply(&mut image_layer_creation_check_threshold);
910 1 : patch
911 1 : .image_creation_preempt_threshold
912 1 : .apply(&mut image_creation_preempt_threshold);
913 1 : patch
914 1 : .lsn_lease_length
915 1 : .map(|v| humantime::parse_duration(&v))?
916 1 : .apply(&mut lsn_lease_length);
917 1 : patch
918 1 : .lsn_lease_length_for_ts
919 1 : .map(|v| humantime::parse_duration(&v))?
920 1 : .apply(&mut lsn_lease_length_for_ts);
921 1 : patch.timeline_offloading.apply(&mut timeline_offloading);
922 1 : patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);
923 1 : patch
924 1 : .gc_compaction_enabled
925 1 : .apply(&mut gc_compaction_enabled);
926 1 : patch
927 1 : .gc_compaction_verification
928 1 : .apply(&mut gc_compaction_verification);
929 1 : patch
930 1 : .gc_compaction_initial_threshold_kb
931 1 : .apply(&mut gc_compaction_initial_threshold_kb);
932 1 : patch
933 1 : .gc_compaction_ratio_percent
934 1 : .apply(&mut gc_compaction_ratio_percent);
935 1 : patch.sampling_ratio.apply(&mut sampling_ratio);
936 1 : patch
937 1 : .relsize_snapshot_cache_capacity
938 1 : .apply(&mut relsize_snapshot_cache_capacity);
939 1 : patch
940 1 : .basebackup_cache_enabled
941 1 : .apply(&mut basebackup_cache_enabled);
942 :
943 1 : Ok(Self {
944 1 : checkpoint_distance,
945 1 : checkpoint_timeout,
946 1 : compaction_target_size,
947 1 : compaction_period,
948 1 : compaction_threshold,
949 1 : compaction_upper_limit,
950 1 : compaction_algorithm,
951 1 : compaction_shard_ancestor,
952 1 : compaction_l0_first,
953 1 : compaction_l0_semaphore,
954 1 : l0_flush_delay_threshold,
955 1 : l0_flush_stall_threshold,
956 1 : gc_horizon,
957 1 : gc_period,
958 1 : image_creation_threshold,
959 1 : image_layer_force_creation_period,
960 1 : pitr_interval,
961 1 : walreceiver_connect_timeout,
962 1 : lagging_wal_timeout,
963 1 : max_lsn_wal_lag,
964 1 : eviction_policy,
965 1 : min_resident_size_override,
966 1 : evictions_low_residence_duration_metric_threshold,
967 1 : heatmap_period,
968 1 : lazy_slru_download,
969 1 : timeline_get_throttle,
970 1 : image_layer_creation_check_threshold,
971 1 : image_creation_preempt_threshold,
972 1 : lsn_lease_length,
973 1 : lsn_lease_length_for_ts,
974 1 : timeline_offloading,
975 1 : rel_size_v2_enabled,
976 1 : gc_compaction_enabled,
977 1 : gc_compaction_verification,
978 1 : gc_compaction_initial_threshold_kb,
979 1 : gc_compaction_ratio_percent,
980 1 : sampling_ratio,
981 1 : relsize_snapshot_cache_capacity,
982 1 : basebackup_cache_enabled,
983 1 : })
984 1 : }
985 :
986 0 : pub fn merge(
987 0 : &self,
988 0 : global_conf: crate::config::TenantConfigToml,
989 0 : ) -> crate::config::TenantConfigToml {
990 0 : crate::config::TenantConfigToml {
991 0 : checkpoint_distance: self
992 0 : .checkpoint_distance
993 0 : .unwrap_or(global_conf.checkpoint_distance),
994 0 : checkpoint_timeout: self
995 0 : .checkpoint_timeout
996 0 : .unwrap_or(global_conf.checkpoint_timeout),
997 0 : compaction_target_size: self
998 0 : .compaction_target_size
999 0 : .unwrap_or(global_conf.compaction_target_size),
1000 0 : compaction_period: self
1001 0 : .compaction_period
1002 0 : .unwrap_or(global_conf.compaction_period),
1003 0 : compaction_threshold: self
1004 0 : .compaction_threshold
1005 0 : .unwrap_or(global_conf.compaction_threshold),
1006 0 : compaction_upper_limit: self
1007 0 : .compaction_upper_limit
1008 0 : .unwrap_or(global_conf.compaction_upper_limit),
1009 0 : compaction_algorithm: self
1010 0 : .compaction_algorithm
1011 0 : .as_ref()
1012 0 : .unwrap_or(&global_conf.compaction_algorithm)
1013 0 : .clone(),
1014 0 : compaction_shard_ancestor: self
1015 0 : .compaction_shard_ancestor
1016 0 : .unwrap_or(global_conf.compaction_shard_ancestor),
1017 0 : compaction_l0_first: self
1018 0 : .compaction_l0_first
1019 0 : .unwrap_or(global_conf.compaction_l0_first),
1020 0 : compaction_l0_semaphore: self
1021 0 : .compaction_l0_semaphore
1022 0 : .unwrap_or(global_conf.compaction_l0_semaphore),
1023 0 : l0_flush_delay_threshold: self
1024 0 : .l0_flush_delay_threshold
1025 0 : .or(global_conf.l0_flush_delay_threshold),
1026 0 : l0_flush_stall_threshold: self
1027 0 : .l0_flush_stall_threshold
1028 0 : .or(global_conf.l0_flush_stall_threshold),
1029 0 : gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
1030 0 : gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
1031 0 : image_creation_threshold: self
1032 0 : .image_creation_threshold
1033 0 : .unwrap_or(global_conf.image_creation_threshold),
1034 0 : image_layer_force_creation_period: self
1035 0 : .image_layer_force_creation_period
1036 0 : .or(global_conf.image_layer_force_creation_period),
1037 0 : pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),
1038 0 : walreceiver_connect_timeout: self
1039 0 : .walreceiver_connect_timeout
1040 0 : .unwrap_or(global_conf.walreceiver_connect_timeout),
1041 0 : lagging_wal_timeout: self
1042 0 : .lagging_wal_timeout
1043 0 : .unwrap_or(global_conf.lagging_wal_timeout),
1044 0 : max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),
1045 0 : eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
1046 0 : min_resident_size_override: self
1047 0 : .min_resident_size_override
1048 0 : .or(global_conf.min_resident_size_override),
1049 0 : evictions_low_residence_duration_metric_threshold: self
1050 0 : .evictions_low_residence_duration_metric_threshold
1051 0 : .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
1052 0 : heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
1053 0 : lazy_slru_download: self
1054 0 : .lazy_slru_download
1055 0 : .unwrap_or(global_conf.lazy_slru_download),
1056 0 : timeline_get_throttle: self
1057 0 : .timeline_get_throttle
1058 0 : .clone()
1059 0 : .unwrap_or(global_conf.timeline_get_throttle),
1060 0 : image_layer_creation_check_threshold: self
1061 0 : .image_layer_creation_check_threshold
1062 0 : .unwrap_or(global_conf.image_layer_creation_check_threshold),
1063 0 : image_creation_preempt_threshold: self
1064 0 : .image_creation_preempt_threshold
1065 0 : .unwrap_or(global_conf.image_creation_preempt_threshold),
1066 0 : lsn_lease_length: self
1067 0 : .lsn_lease_length
1068 0 : .unwrap_or(global_conf.lsn_lease_length),
1069 0 : lsn_lease_length_for_ts: self
1070 0 : .lsn_lease_length_for_ts
1071 0 : .unwrap_or(global_conf.lsn_lease_length_for_ts),
1072 0 : timeline_offloading: self
1073 0 : .timeline_offloading
1074 0 : .unwrap_or(global_conf.timeline_offloading),
1075 0 : rel_size_v2_enabled: self
1076 0 : .rel_size_v2_enabled
1077 0 : .unwrap_or(global_conf.rel_size_v2_enabled),
1078 0 : gc_compaction_enabled: self
1079 0 : .gc_compaction_enabled
1080 0 : .unwrap_or(global_conf.gc_compaction_enabled),
1081 0 : gc_compaction_verification: self
1082 0 : .gc_compaction_verification
1083 0 : .unwrap_or(global_conf.gc_compaction_verification),
1084 0 : gc_compaction_initial_threshold_kb: self
1085 0 : .gc_compaction_initial_threshold_kb
1086 0 : .unwrap_or(global_conf.gc_compaction_initial_threshold_kb),
1087 0 : gc_compaction_ratio_percent: self
1088 0 : .gc_compaction_ratio_percent
1089 0 : .unwrap_or(global_conf.gc_compaction_ratio_percent),
1090 0 : sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
1091 0 : relsize_snapshot_cache_capacity: self
1092 0 : .relsize_snapshot_cache_capacity
1093 0 : .unwrap_or(global_conf.relsize_snapshot_cache_capacity),
1094 0 : basebackup_cache_enabled: self
1095 0 : .basebackup_cache_enabled
1096 0 : .unwrap_or(global_conf.basebackup_cache_enabled),
1097 0 : }
1098 0 : }
1099 : }
1100 :
1101 : /// The policy for the aux file storage.
1102 : ///
1103 : /// It can be switched through `switch_aux_file_policy` tenant config.
1104 : /// When the first aux file written, the policy will be persisted in the
1105 : /// `index_part.json` file and has a limited migration path.
1106 : ///
1107 : /// Currently, we only allow the following migration path:
1108 : ///
1109 : /// Unset -> V1
1110 : /// -> V2
1111 : /// -> CrossValidation -> V2
1112 : #[derive(
1113 : Eq,
1114 : PartialEq,
1115 : Debug,
1116 : Copy,
1117 : Clone,
1118 : strum_macros::EnumString,
1119 : strum_macros::Display,
1120 0 : serde_with::DeserializeFromStr,
1121 : serde_with::SerializeDisplay,
1122 : )]
1123 : #[strum(serialize_all = "kebab-case")]
1124 : pub enum AuxFilePolicy {
1125 : /// V1 aux file policy: store everything in AUX_FILE_KEY
1126 : #[strum(ascii_case_insensitive)]
1127 : V1,
1128 : /// V2 aux file policy: store in the AUX_FILE keyspace
1129 : #[strum(ascii_case_insensitive)]
1130 : V2,
1131 : /// Cross validation runs both formats on the write path and does validation
1132 : /// on the read path.
1133 : #[strum(ascii_case_insensitive)]
1134 : CrossValidation,
1135 : }
1136 :
1137 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1138 : #[serde(tag = "kind")]
1139 : pub enum EvictionPolicy {
1140 : NoEviction,
1141 : LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
1142 : OnlyImitiate(EvictionPolicyLayerAccessThreshold),
1143 : }
1144 :
1145 : impl EvictionPolicy {
1146 0 : pub fn discriminant_str(&self) -> &'static str {
1147 0 : match self {
1148 0 : EvictionPolicy::NoEviction => "NoEviction",
1149 0 : EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
1150 0 : EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate",
1151 : }
1152 0 : }
1153 : }
1154 :
1155 : #[derive(
1156 : Eq,
1157 : PartialEq,
1158 : Debug,
1159 : Copy,
1160 : Clone,
1161 : strum_macros::EnumString,
1162 : strum_macros::Display,
1163 0 : serde_with::DeserializeFromStr,
1164 : serde_with::SerializeDisplay,
1165 : )]
1166 : #[strum(serialize_all = "kebab-case")]
1167 : pub enum CompactionAlgorithm {
1168 : Legacy,
1169 : Tiered,
1170 : }
1171 :
1172 : #[derive(
1173 0 : Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,
1174 : )]
1175 : pub enum ImageCompressionAlgorithm {
1176 : // Disabled for writes, support decompressing during read path
1177 : Disabled,
1178 : /// Zstandard compression. Level 0 means and None mean the same (default level). Levels can be negative as well.
1179 : /// For details, see the [manual](http://facebook.github.io/zstd/zstd_manual.html).
1180 : Zstd {
1181 : level: Option<i8>,
1182 : },
1183 : }
1184 :
1185 : impl FromStr for ImageCompressionAlgorithm {
1186 : type Err = anyhow::Error;
1187 8 : fn from_str(s: &str) -> Result<Self, Self::Err> {
1188 8 : let mut components = s.split(['(', ')']);
1189 8 : let first = components
1190 8 : .next()
1191 8 : .ok_or_else(|| anyhow::anyhow!("empty string"))?;
1192 8 : match first {
1193 8 : "disabled" => Ok(ImageCompressionAlgorithm::Disabled),
1194 6 : "zstd" => {
1195 6 : let level = if let Some(v) = components.next() {
1196 4 : let v: i8 = v.parse()?;
1197 4 : Some(v)
1198 : } else {
1199 2 : None
1200 : };
1201 :
1202 6 : Ok(ImageCompressionAlgorithm::Zstd { level })
1203 : }
1204 0 : _ => anyhow::bail!("invalid specifier '{first}'"),
1205 : }
1206 8 : }
1207 : }
1208 :
1209 : impl Display for ImageCompressionAlgorithm {
1210 12 : fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1211 12 : match self {
1212 3 : ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
1213 9 : ImageCompressionAlgorithm::Zstd { level } => {
1214 9 : if let Some(level) = level {
1215 6 : write!(f, "zstd({level})")
1216 : } else {
1217 3 : write!(f, "zstd")
1218 : }
1219 : }
1220 : }
1221 12 : }
1222 : }
1223 :
1224 0 : #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
1225 : pub struct CompactionAlgorithmSettings {
1226 : pub kind: CompactionAlgorithm,
1227 : }
1228 :
1229 0 : #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
1230 : #[serde(tag = "mode", rename_all = "kebab-case")]
1231 : pub enum L0FlushConfig {
1232 : #[serde(rename_all = "snake_case")]
1233 : Direct { max_concurrency: NonZeroUsize },
1234 : }
1235 :
1236 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1237 : pub struct EvictionPolicyLayerAccessThreshold {
1238 : #[serde(with = "humantime_serde")]
1239 : pub period: Duration,
1240 : #[serde(with = "humantime_serde")]
1241 : pub threshold: Duration,
1242 : }
1243 :
1244 : #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
1245 : pub struct ThrottleConfig {
1246 : /// See [`ThrottleConfigTaskKinds`] for why we do the serde `rename`.
1247 : #[serde(rename = "task_kinds")]
1248 : pub enabled: ThrottleConfigTaskKinds,
1249 : pub initial: u32,
1250 : #[serde(with = "humantime_serde")]
1251 : pub refill_interval: Duration,
1252 : pub refill_amount: NonZeroU32,
1253 : pub max: u32,
1254 : }
1255 :
1256 : /// Before <https://github.com/neondatabase/neon/pull/9962>
1257 : /// the throttle was a per `Timeline::get`/`Timeline::get_vectored` call.
1258 : /// The `task_kinds` field controlled which Pageserver "Task Kind"s
1259 : /// were subject to the throttle.
1260 : ///
1261 : /// After that PR, the throttle is applied at pagestream request level
1262 : /// and the `task_kinds` field does not apply since the only task kind
1263 : /// that us subject to the throttle is that of the page service.
1264 : ///
1265 : /// However, we don't want to make a breaking config change right now
1266 : /// because it means we have to migrate all the tenant configs.
1267 : /// This will be done in a future PR.
1268 : ///
1269 : /// In the meantime, we use emptiness / non-emptsiness of the `task_kinds`
1270 : /// field to determine if the throttle is enabled or not.
1271 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1272 : #[serde(transparent)]
1273 : pub struct ThrottleConfigTaskKinds(Vec<String>);
1274 :
1275 : impl ThrottleConfigTaskKinds {
1276 139 : pub fn disabled() -> Self {
1277 139 : Self(vec![])
1278 139 : }
1279 122 : pub fn is_enabled(&self) -> bool {
1280 122 : !self.0.is_empty()
1281 122 : }
1282 : }
1283 :
1284 : impl ThrottleConfig {
1285 139 : pub fn disabled() -> Self {
1286 139 : Self {
1287 139 : enabled: ThrottleConfigTaskKinds::disabled(),
1288 139 : // other values don't matter with emtpy `task_kinds`.
1289 139 : initial: 0,
1290 139 : refill_interval: Duration::from_millis(1),
1291 139 : refill_amount: NonZeroU32::new(1).unwrap(),
1292 139 : max: 1,
1293 139 : }
1294 139 : }
1295 : /// The requests per second allowed by the given config.
1296 0 : pub fn steady_rps(&self) -> f64 {
1297 0 : (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64())
1298 0 : }
1299 : }
1300 :
1301 : #[cfg(test)]
1302 : mod throttle_config_tests {
1303 : use super::*;
1304 :
1305 : #[test]
1306 1 : fn test_disabled_is_disabled() {
1307 1 : let config = ThrottleConfig::disabled();
1308 1 : assert!(!config.enabled.is_enabled());
1309 1 : }
1310 : #[test]
1311 1 : fn test_enabled_backwards_compat() {
1312 1 : let input = serde_json::json!({
1313 1 : "task_kinds": ["PageRequestHandler"],
1314 1 : "initial": 40000,
1315 1 : "refill_interval": "50ms",
1316 1 : "refill_amount": 1000,
1317 1 : "max": 40000,
1318 1 : "fair": true
1319 : });
1320 1 : let config: ThrottleConfig = serde_json::from_value(input).unwrap();
1321 1 : assert!(config.enabled.is_enabled());
1322 1 : }
1323 : }
1324 :
1325 : /// A flattened analog of a `pagesever::tenant::LocationMode`, which
1326 : /// lists out all possible states (and the virtual "Detached" state)
1327 : /// in a flat form rather than using rust-style enums.
1328 0 : #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
1329 : pub enum LocationConfigMode {
1330 : AttachedSingle,
1331 : AttachedMulti,
1332 : AttachedStale,
1333 : Secondary,
1334 : Detached,
1335 : }
1336 :
1337 0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
1338 : pub struct LocationConfigSecondary {
1339 : pub warm: bool,
1340 : }
1341 :
1342 : /// An alternative representation of `pageserver::tenant::LocationConf`,
1343 : /// for use in external-facing APIs.
1344 0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
1345 : pub struct LocationConfig {
1346 : pub mode: LocationConfigMode,
1347 : /// If attaching, in what generation?
1348 : #[serde(default)]
1349 : pub generation: Option<u32>,
1350 :
1351 : // If requesting mode `Secondary`, configuration for that.
1352 : #[serde(default)]
1353 : pub secondary_conf: Option<LocationConfigSecondary>,
1354 :
1355 : // Shard parameters: if shard_count is nonzero, then other shard_* fields
1356 : // must be set accurately.
1357 : #[serde(default)]
1358 : pub shard_number: u8,
1359 : #[serde(default)]
1360 : pub shard_count: u8,
1361 : #[serde(default)]
1362 : pub shard_stripe_size: u32,
1363 :
1364 : // This configuration only affects attached mode, but should be provided irrespective
1365 : // of the mode, as a secondary location might transition on startup if the response
1366 : // to the `/re-attach` control plane API requests it.
1367 : pub tenant_conf: TenantConfig,
1368 : }
1369 :
1370 0 : #[derive(Serialize, Deserialize)]
1371 : pub struct LocationConfigListResponse {
1372 : pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,
1373 : }
1374 :
1375 : #[derive(Serialize)]
1376 : pub struct StatusResponse {
1377 : pub id: NodeId,
1378 : }
1379 :
1380 0 : #[derive(Serialize, Deserialize, Debug)]
1381 : #[serde(deny_unknown_fields)]
1382 : pub struct TenantLocationConfigRequest {
1383 : #[serde(flatten)]
1384 : pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
1385 : }
1386 :
1387 0 : #[derive(Serialize, Deserialize, Debug)]
1388 : #[serde(deny_unknown_fields)]
1389 : pub struct TenantTimeTravelRequest {
1390 : pub shard_counts: Vec<ShardCount>,
1391 : }
1392 :
1393 0 : #[derive(Serialize, Deserialize, Debug)]
1394 : #[serde(deny_unknown_fields)]
1395 : pub struct TenantShardLocation {
1396 : pub shard_id: TenantShardId,
1397 : pub node_id: NodeId,
1398 : }
1399 :
1400 0 : #[derive(Serialize, Deserialize, Debug)]
1401 : #[serde(deny_unknown_fields)]
1402 : pub struct TenantLocationConfigResponse {
1403 : pub shards: Vec<TenantShardLocation>,
1404 : // If the shards' ShardCount count is >1, stripe_size will be set.
1405 : pub stripe_size: Option<ShardStripeSize>,
1406 : }
1407 :
1408 0 : #[derive(Serialize, Deserialize, Debug)]
1409 : #[serde(deny_unknown_fields)]
1410 : pub struct TenantConfigRequest {
1411 : pub tenant_id: TenantId,
1412 : #[serde(flatten)]
1413 : pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
1414 : }
1415 :
1416 : impl std::ops::Deref for TenantConfigRequest {
1417 : type Target = TenantConfig;
1418 :
1419 0 : fn deref(&self) -> &Self::Target {
1420 0 : &self.config
1421 0 : }
1422 : }
1423 :
1424 : impl TenantConfigRequest {
1425 0 : pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
1426 0 : let config = TenantConfig::default();
1427 0 : TenantConfigRequest { tenant_id, config }
1428 0 : }
1429 : }
1430 :
1431 0 : #[derive(Serialize, Deserialize, Debug)]
1432 : #[serde(deny_unknown_fields)]
1433 : pub struct TenantConfigPatchRequest {
1434 : pub tenant_id: TenantId,
1435 : #[serde(flatten)]
1436 : pub config: TenantConfigPatch, // as we have a flattened field, we should reject all unknown fields in it
1437 : }
1438 :
1439 0 : #[derive(Serialize, Deserialize, Debug)]
1440 : pub struct TenantWaitLsnRequest {
1441 : #[serde(flatten)]
1442 : pub timelines: HashMap<TimelineId, Lsn>,
1443 : pub timeout: Duration,
1444 : }
1445 :
1446 : /// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
1447 0 : #[derive(Serialize, Deserialize, Clone)]
1448 : #[serde(tag = "slug", content = "data", rename_all = "snake_case")]
1449 : pub enum TenantAttachmentStatus {
1450 : Maybe,
1451 : Attached,
1452 : Failed { reason: String },
1453 : }
1454 :
1455 0 : #[derive(Serialize, Deserialize, Clone)]
1456 : pub struct TenantInfo {
1457 : pub id: TenantShardId,
1458 : // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
1459 : pub state: TenantState,
1460 : /// Sum of the size of all layer files.
1461 : /// If a layer is present in both local FS and S3, it counts only once.
1462 : pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
1463 : pub attachment_status: TenantAttachmentStatus,
1464 : pub generation: u32,
1465 :
1466 : /// Opaque explanation if gc is being blocked.
1467 : ///
1468 : /// Only looked up for the individual tenant detail, not the listing.
1469 : #[serde(skip_serializing_if = "Option::is_none")]
1470 : pub gc_blocking: Option<String>,
1471 : }
1472 :
1473 0 : #[derive(Serialize, Deserialize, Clone)]
1474 : pub struct TenantDetails {
1475 : #[serde(flatten)]
1476 : pub tenant_info: TenantInfo,
1477 :
1478 : pub walredo: Option<WalRedoManagerStatus>,
1479 :
1480 : pub timelines: Vec<TimelineId>,
1481 : }
1482 :
1483 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)]
1484 : pub enum TimelineArchivalState {
1485 : Archived,
1486 : Unarchived,
1487 : }
1488 :
1489 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
1490 : pub enum TimelineVisibilityState {
1491 : Visible,
1492 : Invisible,
1493 : }
1494 :
1495 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
1496 : pub struct TimelineArchivalConfigRequest {
1497 : pub state: TimelineArchivalState,
1498 : }
1499 :
1500 0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
1501 : pub struct TimelinePatchIndexPartRequest {
1502 : pub rel_size_migration: Option<RelSizeMigration>,
1503 : pub gc_compaction_last_completed_lsn: Option<Lsn>,
1504 : pub applied_gc_cutoff_lsn: Option<Lsn>,
1505 : #[serde(default)]
1506 : pub force_index_update: bool,
1507 : }
1508 :
1509 0 : #[derive(Debug, Serialize, Deserialize, Clone)]
1510 : pub struct TimelinesInfoAndOffloaded {
1511 : pub timelines: Vec<TimelineInfo>,
1512 : pub offloaded: Vec<OffloadedTimelineInfo>,
1513 : }
1514 :
1515 : /// Analog of [`TimelineInfo`] for offloaded timelines.
1516 0 : #[derive(Debug, Serialize, Deserialize, Clone)]
1517 : pub struct OffloadedTimelineInfo {
1518 : pub tenant_id: TenantShardId,
1519 : pub timeline_id: TimelineId,
1520 : /// Whether the timeline has a parent it has been branched off from or not
1521 : pub ancestor_timeline_id: Option<TimelineId>,
1522 : /// Whether to retain the branch lsn at the ancestor or not
1523 : pub ancestor_retain_lsn: Option<Lsn>,
1524 : /// The time point when the timeline was archived
1525 : pub archived_at: chrono::DateTime<chrono::Utc>,
1526 : }
1527 :
1528 0 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1529 : #[serde(rename_all = "camelCase")]
1530 : pub enum RelSizeMigration {
1531 : /// The tenant is using the old rel_size format.
1532 : /// Note that this enum is persisted as `Option<RelSizeMigration>` in the index part, so
1533 : /// `None` is the same as `Some(RelSizeMigration::Legacy)`.
1534 : Legacy,
1535 : /// The tenant is migrating to the new rel_size format. Both old and new rel_size format are
1536 : /// persisted in the index part. The read path will read both formats and merge them.
1537 : Migrating,
1538 : /// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted
1539 : /// in the index part, and the read path will not read the old format.
1540 : Migrated,
1541 : }
1542 :
1543 : /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
1544 0 : #[derive(Debug, Serialize, Deserialize, Clone)]
1545 : pub struct TimelineInfo {
1546 : pub tenant_id: TenantShardId,
1547 : pub timeline_id: TimelineId,
1548 :
1549 : pub ancestor_timeline_id: Option<TimelineId>,
1550 : pub ancestor_lsn: Option<Lsn>,
1551 : pub last_record_lsn: Lsn,
1552 : pub prev_record_lsn: Option<Lsn>,
1553 :
1554 : /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
1555 : /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
1556 : /// as it is easier to reason about.
1557 : #[serde(default)]
1558 : pub applied_gc_cutoff_lsn: Lsn,
1559 :
1560 : /// The upper bound of data which is either already GC'ed, or elegible to be GC'ed at any time based on PITR interval.
1561 : /// This LSN represents the "end of history" for this timeline, and callers should use it to figure out the oldest
1562 : /// LSN at which it is legal to create a branch or ephemeral endpoint.
1563 : ///
1564 : /// Note that holders of valid LSN leases may be able to create branches and read pages earlier
1565 : /// than this LSN, but new leases may not be taken out earlier than this LSN.
1566 : #[serde(default)]
1567 : pub min_readable_lsn: Lsn,
1568 :
1569 : pub disk_consistent_lsn: Lsn,
1570 :
1571 : /// The LSN that we have succesfully uploaded to remote storage
1572 : pub remote_consistent_lsn: Lsn,
1573 :
1574 : /// The LSN that we are advertizing to safekeepers
1575 : pub remote_consistent_lsn_visible: Lsn,
1576 :
1577 : /// The LSN from the start of the root timeline (never changes)
1578 : pub initdb_lsn: Lsn,
1579 :
1580 : pub current_logical_size: u64,
1581 : pub current_logical_size_is_accurate: bool,
1582 :
1583 : pub directory_entries_counts: Vec<u64>,
1584 :
1585 : /// Sum of the size of all layer files.
1586 : /// If a layer is present in both local FS and S3, it counts only once.
1587 : pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
1588 : pub current_logical_size_non_incremental: Option<u64>,
1589 :
1590 : /// How many bytes of WAL are within this branch's pitr_interval. If the pitr_interval goes
1591 : /// beyond the branch's branch point, we only count up to the branch point.
1592 : pub pitr_history_size: u64,
1593 :
1594 : /// Whether this branch's branch point is within its ancestor's PITR interval (i.e. any
1595 : /// ancestor data used by this branch would have been retained anyway). If this is false, then
1596 : /// this branch may be imposing a cost on the ancestor by causing it to retain layers that it would
1597 : /// otherwise be able to GC.
1598 : pub within_ancestor_pitr: bool,
1599 :
1600 : pub timeline_dir_layer_file_size_sum: Option<u64>,
1601 :
1602 : pub wal_source_connstr: Option<String>,
1603 : pub last_received_msg_lsn: Option<Lsn>,
1604 : /// the timestamp (in microseconds) of the last received message
1605 : pub last_received_msg_ts: Option<u128>,
1606 : pub pg_version: PgMajorVersion,
1607 :
1608 : pub state: TimelineState,
1609 :
1610 : pub walreceiver_status: String,
1611 :
1612 : // ALWAYS add new fields at the end of the struct with `Option` to ensure forward/backward compatibility.
1613 : // Backward compatibility: you will get a JSON not containing the newly-added field.
1614 : // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does
1615 : // not deny unknown fields by default so it's safe to set the field to some value, though it won't be
1616 : // read.
1617 : /// Whether the timeline is archived.
1618 : pub is_archived: Option<bool>,
1619 :
1620 : /// The status of the rel_size migration.
1621 : pub rel_size_migration: Option<RelSizeMigration>,
1622 :
1623 : /// Whether the timeline is invisible in synthetic size calculations.
1624 : pub is_invisible: Option<bool>,
1625 : // HADRON: the largest LSN below which all page updates have been included in the image layers.
1626 : #[serde(skip_serializing_if = "Option::is_none")]
1627 : pub image_consistent_lsn: Option<Lsn>,
1628 : }
1629 :
1630 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1631 : pub struct LayerMapInfo {
1632 : pub in_memory_layers: Vec<InMemoryLayerInfo>,
1633 : pub historic_layers: Vec<HistoricLayerInfo>,
1634 : }
1635 :
1636 : /// The residence status of a layer
1637 0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1638 : pub enum LayerResidenceStatus {
1639 : /// Residence status for a layer file that exists locally.
1640 : /// It may also exist on the remote, we don't care here.
1641 : Resident,
1642 : /// Residence status for a layer file that only exists on the remote.
1643 : Evicted,
1644 : }
1645 :
1646 : #[serde_as]
1647 : #[derive(Debug, Clone, Serialize, Deserialize)]
1648 : pub struct LayerAccessStats {
1649 : #[serde_as(as = "serde_with::TimestampMilliSeconds")]
1650 : pub access_time: SystemTime,
1651 :
1652 : #[serde_as(as = "serde_with::TimestampMilliSeconds")]
1653 : pub residence_time: SystemTime,
1654 :
1655 : pub visible: bool,
1656 : }
1657 :
1658 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1659 : #[serde(tag = "kind")]
1660 : pub enum InMemoryLayerInfo {
1661 : Open { lsn_start: Lsn },
1662 : Frozen { lsn_start: Lsn, lsn_end: Lsn },
1663 : }
1664 :
1665 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1666 : #[serde(tag = "kind")]
1667 : pub enum HistoricLayerInfo {
1668 : Delta {
1669 : layer_file_name: String,
1670 : layer_file_size: u64,
1671 :
1672 : lsn_start: Lsn,
1673 : lsn_end: Lsn,
1674 : remote: bool,
1675 : access_stats: LayerAccessStats,
1676 :
1677 : l0: bool,
1678 : },
1679 : Image {
1680 : layer_file_name: String,
1681 : layer_file_size: u64,
1682 :
1683 : lsn_start: Lsn,
1684 : remote: bool,
1685 : access_stats: LayerAccessStats,
1686 : },
1687 : }
1688 :
1689 : impl HistoricLayerInfo {
1690 0 : pub fn layer_file_name(&self) -> &str {
1691 0 : match self {
1692 : HistoricLayerInfo::Delta {
1693 0 : layer_file_name, ..
1694 0 : } => layer_file_name,
1695 : HistoricLayerInfo::Image {
1696 0 : layer_file_name, ..
1697 0 : } => layer_file_name,
1698 : }
1699 0 : }
1700 0 : pub fn is_remote(&self) -> bool {
1701 0 : match self {
1702 0 : HistoricLayerInfo::Delta { remote, .. } => *remote,
1703 0 : HistoricLayerInfo::Image { remote, .. } => *remote,
1704 : }
1705 0 : }
1706 0 : pub fn set_remote(&mut self, value: bool) {
1707 0 : let field = match self {
1708 0 : HistoricLayerInfo::Delta { remote, .. } => remote,
1709 0 : HistoricLayerInfo::Image { remote, .. } => remote,
1710 : };
1711 0 : *field = value;
1712 0 : }
1713 0 : pub fn layer_file_size(&self) -> u64 {
1714 0 : match self {
1715 : HistoricLayerInfo::Delta {
1716 0 : layer_file_size, ..
1717 0 : } => *layer_file_size,
1718 : HistoricLayerInfo::Image {
1719 0 : layer_file_size, ..
1720 0 : } => *layer_file_size,
1721 : }
1722 0 : }
1723 : }
1724 :
1725 0 : #[derive(Debug, Serialize, Deserialize)]
1726 : pub struct DownloadRemoteLayersTaskSpawnRequest {
1727 : pub max_concurrent_downloads: NonZeroUsize,
1728 : }
1729 :
1730 0 : #[derive(Debug, Serialize, Deserialize)]
1731 : pub struct IngestAuxFilesRequest {
1732 : pub aux_files: HashMap<String, String>,
1733 : }
1734 :
1735 0 : #[derive(Debug, Serialize, Deserialize)]
1736 : pub struct ListAuxFilesRequest {
1737 : pub lsn: Lsn,
1738 : }
1739 :
1740 0 : #[derive(Debug, Serialize, Deserialize, Clone)]
1741 : pub struct DownloadRemoteLayersTaskInfo {
1742 : pub task_id: String,
1743 : pub state: DownloadRemoteLayersTaskState,
1744 : pub total_layer_count: u64, // stable once `completed`
1745 : pub successful_download_count: u64, // stable once `completed`
1746 : pub failed_download_count: u64, // stable once `completed`
1747 : }
1748 :
1749 0 : #[derive(Debug, Serialize, Deserialize, Clone)]
1750 : pub enum DownloadRemoteLayersTaskState {
1751 : Running,
1752 : Completed,
1753 : ShutDown,
1754 : }
1755 :
1756 0 : #[derive(Debug, Serialize, Deserialize)]
1757 : pub struct TimelineGcRequest {
1758 : pub gc_horizon: Option<u64>,
1759 : }
1760 :
1761 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1762 : pub struct WalRedoManagerProcessStatus {
1763 : pub pid: u32,
1764 : }
1765 :
1766 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1767 : pub struct WalRedoManagerStatus {
1768 : pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,
1769 : pub process: Option<WalRedoManagerProcessStatus>,
1770 : }
1771 :
1772 : /// The progress of a secondary tenant.
1773 : ///
1774 : /// It is mostly useful when doing a long running download: e.g. initiating
1775 : /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
1776 : /// what's happening.
1777 0 : #[derive(Default, Debug, Serialize, Deserialize, Clone)]
1778 : pub struct SecondaryProgress {
1779 : /// The remote storage LastModified time of the heatmap object we last downloaded.
1780 : pub heatmap_mtime: Option<serde_system_time::SystemTime>,
1781 :
1782 : /// The number of layers currently on-disk
1783 : pub layers_downloaded: usize,
1784 : /// The number of layers in the most recently seen heatmap
1785 : pub layers_total: usize,
1786 :
1787 : /// The number of layer bytes currently on-disk
1788 : pub bytes_downloaded: u64,
1789 : /// The number of layer bytes in the most recently seen heatmap
1790 : pub bytes_total: u64,
1791 : }
1792 :
1793 0 : #[derive(Serialize, Deserialize, Debug)]
1794 : pub struct TenantScanRemoteStorageShard {
1795 : pub tenant_shard_id: TenantShardId,
1796 : pub generation: Option<u32>,
1797 : pub stripe_size: Option<ShardStripeSize>,
1798 : }
1799 :
1800 0 : #[derive(Serialize, Deserialize, Debug, Default)]
1801 : pub struct TenantScanRemoteStorageResponse {
1802 : pub shards: Vec<TenantScanRemoteStorageShard>,
1803 : }
1804 :
1805 0 : #[derive(Serialize, Deserialize, Debug, Clone)]
1806 : #[serde(rename_all = "snake_case")]
1807 : pub enum TenantSorting {
1808 : /// Total size of layers on local disk for all timelines in a shard.
1809 : ResidentSize,
1810 : /// The logical size of the largest timeline within a _tenant_ (not shard). Only tracked on
1811 : /// shard 0, contains the sum across all shards.
1812 : MaxLogicalSize,
1813 : /// The logical size of the largest timeline within a _tenant_ (not shard), divided by number of
1814 : /// shards. Only tracked on shard 0, and estimates the per-shard logical size.
1815 : MaxLogicalSizePerShard,
1816 : }
1817 :
1818 : impl Default for TenantSorting {
1819 0 : fn default() -> Self {
1820 0 : Self::ResidentSize
1821 0 : }
1822 : }
1823 :
1824 0 : #[derive(Serialize, Deserialize, Debug, Clone)]
1825 : pub struct TopTenantShardsRequest {
1826 : // How would you like to sort the tenants?
1827 : pub order_by: TenantSorting,
1828 :
1829 : // How many results?
1830 : pub limit: usize,
1831 :
1832 : // Omit tenants with more than this many shards (e.g. if this is the max number of shards
1833 : // that the caller would ever split to)
1834 : pub where_shards_lt: Option<ShardCount>,
1835 :
1836 : // Omit tenants where the ordering metric is less than this (this is an optimization to
1837 : // let us quickly exclude numerous tiny shards)
1838 : pub where_gt: Option<u64>,
1839 : }
1840 :
1841 0 : #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
1842 : pub struct TopTenantShardItem {
1843 : pub id: TenantShardId,
1844 :
1845 : /// Total size of layers on local disk for all timelines in this shard.
1846 : pub resident_size: u64,
1847 :
1848 : /// Total size of layers in remote storage for all timelines in this shard.
1849 : pub physical_size: u64,
1850 :
1851 : /// The largest logical size of a timeline within this _tenant_ (not shard). This is only
1852 : /// tracked on shard 0, and contains the sum of the logical size across all shards.
1853 : pub max_logical_size: u64,
1854 :
1855 : /// The largest logical size of a timeline within this _tenant_ (not shard) divided by number of
1856 : /// shards. This is only tracked on shard 0, and is only an estimate as we divide it evenly by
1857 : /// shard count, rounded up.
1858 : pub max_logical_size_per_shard: u64,
1859 : }
1860 :
1861 0 : #[derive(Serialize, Deserialize, Debug, Default)]
1862 : pub struct TopTenantShardsResponse {
1863 : pub shards: Vec<TopTenantShardItem>,
1864 : }
1865 :
1866 : pub mod virtual_file {
1867 :
1868 : #[derive(
1869 : Copy,
1870 : Clone,
1871 : PartialEq,
1872 : Eq,
1873 : Hash,
1874 : strum_macros::EnumString,
1875 : strum_macros::Display,
1876 0 : serde_with::DeserializeFromStr,
1877 : serde_with::SerializeDisplay,
1878 : Debug,
1879 : )]
1880 : #[strum(serialize_all = "kebab-case")]
1881 : pub enum IoEngineKind {
1882 : StdFs,
1883 : #[cfg(target_os = "linux")]
1884 : TokioEpollUring,
1885 : }
1886 :
1887 : /// Direct IO modes for a pageserver.
1888 : #[derive(
1889 : Copy,
1890 : Clone,
1891 : PartialEq,
1892 : Eq,
1893 : Hash,
1894 : strum_macros::EnumString,
1895 : strum_macros::EnumIter,
1896 : strum_macros::Display,
1897 0 : serde_with::DeserializeFromStr,
1898 : serde_with::SerializeDisplay,
1899 : Debug,
1900 : )]
1901 : #[strum(serialize_all = "kebab-case")]
1902 : #[repr(u8)]
1903 : pub enum IoMode {
1904 : /// Uses buffered IO.
1905 : Buffered,
1906 : /// Uses direct IO for reads only.
1907 : Direct,
1908 : /// Use direct IO for reads and writes.
1909 : DirectRw,
1910 : }
1911 :
1912 : impl IoMode {
1913 260 : pub fn preferred() -> Self {
1914 260 : IoMode::DirectRw
1915 260 : }
1916 : }
1917 :
1918 : impl TryFrom<u8> for IoMode {
1919 : type Error = u8;
1920 :
1921 2654 : fn try_from(value: u8) -> Result<Self, Self::Error> {
1922 2654 : Ok(match value {
1923 2654 : v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
1924 2654 : v if v == (IoMode::Direct as u8) => IoMode::Direct,
1925 2654 : v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
1926 0 : x => return Err(x),
1927 : })
1928 2654 : }
1929 : }
1930 : }
1931 :
1932 0 : #[derive(Debug, Clone, Serialize, Deserialize)]
1933 : pub struct ScanDisposableKeysResponse {
1934 : pub disposable_count: usize,
1935 : pub not_disposable_count: usize,
1936 : }
1937 :
1938 : // This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
1939 : // that require pageserver-internal types. It is sufficient to get the total size.
1940 0 : #[derive(Serialize, Deserialize, Debug)]
1941 : pub struct TenantHistorySize {
1942 : pub id: TenantId,
1943 : /// Size is a mixture of WAL and logical size, so the unit is bytes.
1944 : ///
1945 : /// Will be none if `?inputs_only=true` was given.
1946 : pub size: Option<u64>,
1947 : }
1948 :
1949 0 : #[derive(Debug, Serialize, Deserialize)]
1950 : pub struct PageTraceEvent {
1951 : pub key: CompactKey,
1952 : pub effective_lsn: Lsn,
1953 : pub time: SystemTime,
1954 : }
1955 :
1956 : impl Default for PageTraceEvent {
1957 0 : fn default() -> Self {
1958 0 : Self {
1959 0 : key: Default::default(),
1960 0 : effective_lsn: Default::default(),
1961 0 : time: std::time::UNIX_EPOCH,
1962 0 : }
1963 0 : }
1964 : }
1965 :
1966 : #[cfg(test)]
1967 : mod tests {
1968 : use std::str::FromStr;
1969 :
1970 : use serde_json::json;
1971 :
1972 : use super::*;
1973 :
1974 : #[test]
1975 1 : fn test_tenantinfo_serde() {
1976 : // Test serialization/deserialization of TenantInfo
1977 1 : let original_active = TenantInfo {
1978 1 : id: TenantShardId::unsharded(TenantId::generate()),
1979 1 : state: TenantState::Active,
1980 1 : current_physical_size: Some(42),
1981 1 : attachment_status: TenantAttachmentStatus::Attached,
1982 1 : generation: 1,
1983 1 : gc_blocking: None,
1984 1 : };
1985 1 : let expected_active = json!({
1986 1 : "id": original_active.id.to_string(),
1987 1 : "state": {
1988 1 : "slug": "Active",
1989 : },
1990 1 : "current_physical_size": 42,
1991 1 : "attachment_status": {
1992 1 : "slug":"attached",
1993 : },
1994 1 : "generation" : 1
1995 : });
1996 :
1997 1 : let original_broken = TenantInfo {
1998 1 : id: TenantShardId::unsharded(TenantId::generate()),
1999 1 : state: TenantState::Broken {
2000 1 : reason: "reason".into(),
2001 1 : backtrace: "backtrace info".into(),
2002 1 : },
2003 1 : current_physical_size: Some(42),
2004 1 : attachment_status: TenantAttachmentStatus::Attached,
2005 1 : generation: 1,
2006 1 : gc_blocking: None,
2007 1 : };
2008 1 : let expected_broken = json!({
2009 1 : "id": original_broken.id.to_string(),
2010 1 : "state": {
2011 1 : "slug": "Broken",
2012 1 : "data": {
2013 1 : "backtrace": "backtrace info",
2014 1 : "reason": "reason",
2015 : }
2016 : },
2017 1 : "current_physical_size": 42,
2018 1 : "attachment_status": {
2019 1 : "slug":"attached",
2020 : },
2021 1 : "generation" : 1
2022 : });
2023 :
2024 1 : assert_eq!(
2025 1 : serde_json::to_value(&original_active).unwrap(),
2026 : expected_active
2027 : );
2028 :
2029 1 : assert_eq!(
2030 1 : serde_json::to_value(&original_broken).unwrap(),
2031 : expected_broken
2032 : );
2033 1 : assert!(format!("{:?}", &original_broken.state).contains("reason"));
2034 1 : assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
2035 1 : }
2036 :
2037 : #[test]
2038 1 : fn test_reject_unknown_field() {
2039 1 : let id = TenantId::generate();
2040 1 : let config_request = json!({
2041 1 : "tenant_id": id.to_string(),
2042 1 : "unknown_field": "unknown_value".to_string(),
2043 : });
2044 1 : let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
2045 1 : assert!(
2046 1 : err.to_string().contains("unknown field `unknown_field`"),
2047 0 : "expect unknown field `unknown_field` error, got: {err}"
2048 : );
2049 1 : }
2050 :
2051 : #[test]
2052 1 : fn tenantstatus_activating_serde() {
2053 1 : let states = [TenantState::Activating(ActivatingFrom::Attaching)];
2054 1 : let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
2055 :
2056 1 : let actual = serde_json::to_string(&states).unwrap();
2057 :
2058 1 : assert_eq!(actual, expected);
2059 :
2060 1 : let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
2061 :
2062 1 : assert_eq!(states.as_slice(), &parsed);
2063 1 : }
2064 :
2065 : #[test]
2066 1 : fn tenantstatus_activating_strum() {
2067 : // tests added, because we use these for metrics
2068 1 : let examples = [
2069 1 : (line!(), TenantState::Attaching, "Attaching"),
2070 1 : (
2071 1 : line!(),
2072 1 : TenantState::Activating(ActivatingFrom::Attaching),
2073 1 : "Activating",
2074 1 : ),
2075 1 : (line!(), TenantState::Active, "Active"),
2076 1 : (
2077 1 : line!(),
2078 1 : TenantState::Stopping { progress: None },
2079 1 : "Stopping",
2080 1 : ),
2081 1 : (
2082 1 : line!(),
2083 1 : TenantState::Stopping {
2084 1 : progress: Some(completion::Barrier::default()),
2085 1 : },
2086 1 : "Stopping",
2087 1 : ),
2088 1 : (
2089 1 : line!(),
2090 1 : TenantState::Broken {
2091 1 : reason: "Example".into(),
2092 1 : backtrace: "Looooong backtrace".into(),
2093 1 : },
2094 1 : "Broken",
2095 1 : ),
2096 1 : ];
2097 :
2098 7 : for (line, rendered, expected) in examples {
2099 6 : let actual: &'static str = rendered.into();
2100 6 : assert_eq!(actual, expected, "example on {line}");
2101 : }
2102 1 : }
2103 :
2104 : #[test]
2105 1 : fn test_image_compression_algorithm_parsing() {
2106 : use ImageCompressionAlgorithm::*;
2107 1 : let cases = [
2108 1 : ("disabled", Disabled),
2109 1 : ("zstd", Zstd { level: None }),
2110 1 : ("zstd(18)", Zstd { level: Some(18) }),
2111 1 : ("zstd(-3)", Zstd { level: Some(-3) }),
2112 1 : ];
2113 :
2114 5 : for (display, expected) in cases {
2115 4 : assert_eq!(
2116 4 : ImageCompressionAlgorithm::from_str(display).unwrap(),
2117 : expected,
2118 0 : "parsing works"
2119 : );
2120 4 : assert_eq!(format!("{expected}"), display, "Display FromStr roundtrip");
2121 :
2122 4 : let ser = serde_json::to_string(&expected).expect("serialization");
2123 4 : assert_eq!(
2124 4 : serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),
2125 : expected,
2126 0 : "serde roundtrip"
2127 : );
2128 :
2129 4 : assert_eq!(
2130 4 : serde_json::Value::String(display.to_string()),
2131 4 : serde_json::to_value(expected).unwrap(),
2132 0 : "Display is the serde serialization"
2133 : );
2134 : }
2135 1 : }
2136 :
2137 : #[test]
2138 1 : fn test_tenant_config_patch_request_serde() {
2139 1 : let patch_request = TenantConfigPatchRequest {
2140 1 : tenant_id: TenantId::from_str("17c6d121946a61e5ab0fe5a2fd4d8215").unwrap(),
2141 1 : config: TenantConfigPatch {
2142 1 : checkpoint_distance: FieldPatch::Upsert(42),
2143 1 : gc_horizon: FieldPatch::Remove,
2144 1 : compaction_threshold: FieldPatch::Noop,
2145 1 : ..TenantConfigPatch::default()
2146 1 : },
2147 1 : };
2148 :
2149 1 : let json = serde_json::to_string(&patch_request).unwrap();
2150 :
2151 1 : let expected = r#"{"tenant_id":"17c6d121946a61e5ab0fe5a2fd4d8215","checkpoint_distance":42,"gc_horizon":null}"#;
2152 1 : assert_eq!(json, expected);
2153 :
2154 1 : let decoded: TenantConfigPatchRequest = serde_json::from_str(&json).unwrap();
2155 1 : assert_eq!(decoded.tenant_id, patch_request.tenant_id);
2156 1 : assert_eq!(decoded.config, patch_request.config);
2157 :
2158 : // Now apply the patch to a config to demonstrate semantics
2159 :
2160 1 : let base = TenantConfig {
2161 1 : checkpoint_distance: Some(28),
2162 1 : gc_horizon: Some(100),
2163 1 : compaction_target_size: Some(1024),
2164 1 : ..Default::default()
2165 1 : };
2166 :
2167 1 : let expected = TenantConfig {
2168 1 : checkpoint_distance: Some(42),
2169 1 : gc_horizon: None,
2170 1 : ..base.clone()
2171 1 : };
2172 :
2173 1 : let patched = base.apply_patch(decoded.config).unwrap();
2174 :
2175 1 : assert_eq!(patched, expected);
2176 1 : }
2177 : }
|