LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - models.rs (source / functions) Coverage Total Hit
Test: 2ff680a820af2e5030dd8e14ace9c8cb73b50f66.info Lines: 51.7 % 1285 664
Test Date: 2025-05-27 12:46:00 Functions: 7.0 % 1078 75

            Line data    Source code
       1              : pub mod detach_ancestor;
       2              : pub mod partitioning;
       3              : pub mod utilization;
       4              : 
       5              : use core::ops::Range;
       6              : use std::collections::HashMap;
       7              : use std::fmt::Display;
       8              : use std::io::{BufRead, Read};
       9              : use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
      10              : use std::str::FromStr;
      11              : use std::time::{Duration, SystemTime};
      12              : 
      13              : use byteorder::{BigEndian, ReadBytesExt};
      14              : use bytes::{Buf, BufMut, Bytes, BytesMut};
      15              : #[cfg(feature = "testing")]
      16              : use camino::Utf8PathBuf;
      17              : use postgres_ffi::BLCKSZ;
      18              : use serde::{Deserialize, Deserializer, Serialize, Serializer};
      19              : use serde_with::serde_as;
      20              : pub use utilization::PageserverUtilization;
      21              : use utils::id::{NodeId, TenantId, TimelineId};
      22              : use utils::lsn::Lsn;
      23              : use utils::postgres_client::PostgresClientProtocol;
      24              : use utils::{completion, serde_system_time};
      25              : 
      26              : use crate::config::Ratio;
      27              : use crate::key::{CompactKey, Key};
      28              : use crate::reltag::RelTag;
      29              : use crate::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
      30              : 
      31              : /// The state of a tenant in this pageserver.
      32              : ///
      33              : /// ```mermaid
      34              : /// stateDiagram-v2
      35              : ///
      36              : ///     [*] --> Attaching: spawn_attach()
      37              : ///
      38              : ///     Attaching --> Activating: activate()
      39              : ///     Activating --> Active: infallible
      40              : ///
      41              : ///     Attaching --> Broken: attach() failure
      42              : ///
      43              : ///     Active --> Stopping: set_stopping(), part of shutdown & detach
      44              : ///     Stopping --> Broken: late error in remove_tenant_from_memory
      45              : ///
      46              : ///     Broken --> [*]: ignore / detach / shutdown
      47              : ///     Stopping --> [*]: remove_from_memory complete
      48              : ///
      49              : ///     Active --> Broken: cfg(testing)-only tenant break point
      50              : /// ```
      51              : #[derive(
      52              :     Clone,
      53              :     PartialEq,
      54              :     Eq,
      55            0 :     serde::Serialize,
      56            1 :     serde::Deserialize,
      57              :     strum_macros::Display,
      58              :     strum_macros::VariantNames,
      59              :     strum_macros::AsRefStr,
      60              :     strum_macros::IntoStaticStr,
      61              : )]
      62              : #[serde(tag = "slug", content = "data")]
      63              : pub enum TenantState {
      64              :     /// This tenant is being attached to the pageserver.
      65              :     ///
      66              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      67              :     Attaching,
      68              :     /// The tenant is transitioning from Loading/Attaching to Active.
      69              :     ///
      70              :     /// While in this state, the individual timelines are being activated.
      71              :     ///
      72              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      73              :     Activating(ActivatingFrom),
      74              :     /// The tenant has finished activating and is open for business.
      75              :     ///
      76              :     /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
      77              :     Active,
      78              :     /// The tenant is recognized by pageserver, but it is being detached or the
      79              :     /// system is being shut down.
      80              :     ///
      81              :     /// Transitions out of this state are possible through `set_broken()`.
      82              :     Stopping {
      83              :         /// The barrier can be used to wait for shutdown to complete. The first caller to set
      84              :         /// Some(Barrier) is responsible for driving shutdown to completion. Subsequent callers
      85              :         /// will wait for the first caller's existing barrier.
      86              :         ///
      87              :         /// None is set when an attach is cancelled, to signal to shutdown that the attach has in
      88              :         /// fact cancelled:
      89              :         ///
      90              :         /// 1. `shutdown` sees `TenantState::Attaching`, and cancels the tenant.
      91              :         /// 2. `attach` sets `TenantState::Stopping(None)` and exits.
      92              :         /// 3. `set_stopping` waits for `TenantState::Stopping(None)` and sets
      93              :         ///    `TenantState::Stopping(Some)` to claim the barrier as the shutdown owner.
      94              :         //
      95              :         // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
      96              :         // otherwise it will not be skipped during deserialization
      97              :         #[serde(skip)]
      98              :         progress: Option<completion::Barrier>,
      99              :     },
     100              :     /// The tenant is recognized by the pageserver, but can no longer be used for
     101              :     /// any operations.
     102              :     ///
     103              :     /// If the tenant fails to load or attach, it will transition to this state
     104              :     /// and it is guaranteed that no background tasks are running in its name.
     105              :     ///
     106              :     /// The other way to transition into this state is from `Stopping` state
     107              :     /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
     108              :     /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
     109              :     Broken { reason: String, backtrace: String },
     110              : }
     111              : 
     112              : impl TenantState {
     113            0 :     pub fn attachment_status(&self) -> TenantAttachmentStatus {
     114              :         use TenantAttachmentStatus::*;
     115              : 
     116              :         // Below TenantState::Activating is used as "transient" or "transparent" state for
     117              :         // attachment_status determining.
     118            0 :         match self {
     119              :             // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
     120              :             // So, technically, we can return Attached here.
     121              :             // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
     122              :             // But, our attach task might still be fetching the remote timelines, etc.
     123              :             // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
     124            0 :             Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
     125              :             // We only reach Active after successful load / attach.
     126              :             // So, call atttachment status Attached.
     127            0 :             Self::Active => Attached,
     128              :             // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
     129              :             // However, it also becomes Broken if the regular load fails.
     130              :             // From Console's perspective there's no practical difference
     131              :             // because attachment_status is polled by console only during attach operation execution.
     132            0 :             Self::Broken { reason, .. } => Failed {
     133            0 :                 reason: reason.to_owned(),
     134            0 :             },
     135              :             // Why is Stopping a Maybe case? Because, during pageserver shutdown,
     136              :             // we set the Stopping state irrespective of whether the tenant
     137              :             // has finished attaching or not.
     138            0 :             Self::Stopping { .. } => Maybe,
     139              :         }
     140            0 :     }
     141              : 
     142            0 :     pub fn broken_from_reason(reason: String) -> Self {
     143            0 :         let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture());
     144            0 :         Self::Broken {
     145            0 :             reason,
     146            0 :             backtrace: backtrace_str,
     147            0 :         }
     148            0 :     }
     149              : }
     150              : 
     151              : impl std::fmt::Debug for TenantState {
     152            2 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     153            2 :         match self {
     154            2 :             Self::Broken { reason, backtrace } if !reason.is_empty() => {
     155            2 :                 write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}")
     156              :             }
     157            0 :             _ => write!(f, "{self}"),
     158              :         }
     159            2 :     }
     160              : }
     161              : 
     162              : /// A temporary lease to a specific lsn inside a timeline.
     163              : /// Access to the lsn is guaranteed by the pageserver until the expiration indicated by `valid_until`.
     164              : #[serde_as]
     165            0 : #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
     166              : pub struct LsnLease {
     167              :     #[serde_as(as = "SystemTimeAsRfc3339Millis")]
     168              :     pub valid_until: SystemTime,
     169              : }
     170              : 
     171              : serde_with::serde_conv!(
     172              :     SystemTimeAsRfc3339Millis,
     173              :     SystemTime,
     174            0 :     |time: &SystemTime| humantime::format_rfc3339_millis(*time).to_string(),
     175            0 :     |value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }
     176              : );
     177              : 
     178              : impl LsnLease {
     179              :     /// The default length for an explicit LSN lease request (10 minutes).
     180              :     pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);
     181              : 
     182              :     /// The default length for an implicit LSN lease granted during
     183              :     /// `get_lsn_by_timestamp` request (1 minutes).
     184              :     pub const DEFAULT_LENGTH_FOR_TS: Duration = Duration::from_secs(60);
     185              : 
     186              :     /// Checks whether the lease is expired.
     187            3 :     pub fn is_expired(&self, now: &SystemTime) -> bool {
     188            3 :         now > &self.valid_until
     189            3 :     }
     190              : }
     191              : 
     192              : /// Controls the detach ancestor behavior.
     193              : /// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point.
     194              : /// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all.
     195              : #[derive(Debug, Clone, Copy, Default)]
     196              : pub enum DetachBehavior {
     197              :     #[default]
     198              :     NoAncestorAndReparent,
     199              :     MultiLevelAndNoReparent,
     200              : }
     201              : 
     202              : impl std::str::FromStr for DetachBehavior {
     203              :     type Err = &'static str;
     204              : 
     205            0 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     206            0 :         match s {
     207            0 :             "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent),
     208            0 :             "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent),
     209            0 :             "v1" => Ok(DetachBehavior::NoAncestorAndReparent),
     210            0 :             "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent),
     211            0 :             _ => Err("cannot parse detach behavior"),
     212              :         }
     213            0 :     }
     214              : }
     215              : 
     216              : impl std::fmt::Display for DetachBehavior {
     217            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     218            0 :         match self {
     219            0 :             DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"),
     220            0 :             DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"),
     221              :         }
     222            0 :     }
     223              : }
     224              : 
     225              : /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
     226              : ///
     227              : /// XXX: We used to have more variants here, but now it's just one, which makes this rather
     228              : /// useless. Remove, once we've checked that there's no client code left that looks at this.
     229            1 : #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     230              : pub enum ActivatingFrom {
     231              :     /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
     232              :     Attaching,
     233              : }
     234              : 
     235              : /// A state of a timeline in pageserver's memory.
     236            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     237              : pub enum TimelineState {
     238              :     /// The timeline is recognized by the pageserver but is not yet operational.
     239              :     /// In particular, the walreceiver connection loop is not running for this timeline.
     240              :     /// It will eventually transition to state Active or Broken.
     241              :     Loading,
     242              :     /// The timeline is fully operational.
     243              :     /// It can be queried, and the walreceiver connection loop is running.
     244              :     Active,
     245              :     /// The timeline was previously Loading or Active but is shutting down.
     246              :     /// It cannot transition back into any other state.
     247              :     Stopping,
     248              :     /// The timeline is broken and not operational (previous states: Loading or Active).
     249              :     Broken { reason: String, backtrace: String },
     250              : }
     251              : 
     252              : #[serde_with::serde_as]
     253            0 : #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
     254              : pub struct CompactLsnRange {
     255              :     pub start: Lsn,
     256              :     pub end: Lsn,
     257              : }
     258              : 
     259              : #[serde_with::serde_as]
     260            0 : #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
     261              : pub struct CompactKeyRange {
     262              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     263              :     pub start: Key,
     264              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     265              :     pub end: Key,
     266              : }
     267              : 
     268              : impl From<Range<Lsn>> for CompactLsnRange {
     269            3 :     fn from(range: Range<Lsn>) -> Self {
     270            3 :         Self {
     271            3 :             start: range.start,
     272            3 :             end: range.end,
     273            3 :         }
     274            3 :     }
     275              : }
     276              : 
     277              : impl From<Range<Key>> for CompactKeyRange {
     278            8 :     fn from(range: Range<Key>) -> Self {
     279            8 :         Self {
     280            8 :             start: range.start,
     281            8 :             end: range.end,
     282            8 :         }
     283            8 :     }
     284              : }
     285              : 
     286              : impl From<CompactLsnRange> for Range<Lsn> {
     287            5 :     fn from(range: CompactLsnRange) -> Self {
     288            5 :         range.start..range.end
     289            5 :     }
     290              : }
     291              : 
     292              : impl From<CompactKeyRange> for Range<Key> {
     293            8 :     fn from(range: CompactKeyRange) -> Self {
     294            8 :         range.start..range.end
     295            8 :     }
     296              : }
     297              : 
     298              : impl CompactLsnRange {
     299            2 :     pub fn above(lsn: Lsn) -> Self {
     300            2 :         Self {
     301            2 :             start: lsn,
     302            2 :             end: Lsn::MAX,
     303            2 :         }
     304            2 :     }
     305              : }
     306              : 
     307              : #[derive(Debug, Clone, Serialize)]
     308              : pub struct CompactInfoResponse {
     309              :     pub compact_key_range: Option<CompactKeyRange>,
     310              :     pub compact_lsn_range: Option<CompactLsnRange>,
     311              :     pub sub_compaction: bool,
     312              :     pub running: bool,
     313              :     pub job_id: usize,
     314              : }
     315              : 
     316            0 : #[derive(Serialize, Deserialize, Clone)]
     317              : pub struct TimelineCreateRequest {
     318              :     pub new_timeline_id: TimelineId,
     319              :     #[serde(flatten)]
     320              :     pub mode: TimelineCreateRequestMode,
     321              : }
     322              : 
     323              : impl TimelineCreateRequest {
     324            0 :     pub fn mode_tag(&self) -> &'static str {
     325            0 :         match &self.mode {
     326            0 :             TimelineCreateRequestMode::Branch { .. } => "branch",
     327            0 :             TimelineCreateRequestMode::ImportPgdata { .. } => "import",
     328            0 :             TimelineCreateRequestMode::Bootstrap { .. } => "bootstrap",
     329              :         }
     330            0 :     }
     331              : 
     332            0 :     pub fn is_import(&self) -> bool {
     333            0 :         matches!(self.mode, TimelineCreateRequestMode::ImportPgdata { .. })
     334            0 :     }
     335              : }
     336              : 
     337            0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
     338              : pub enum ShardImportStatus {
     339              :     InProgress(Option<ShardImportProgress>),
     340              :     Done,
     341              :     Error(String),
     342              : }
     343              : 
     344            0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
     345              : pub enum ShardImportProgress {
     346              :     V1(ShardImportProgressV1),
     347              : }
     348              : 
     349            0 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
     350              : pub struct ShardImportProgressV1 {
     351              :     /// Total number of jobs in the import plan
     352              :     pub jobs: usize,
     353              :     /// Number of jobs completed
     354              :     pub completed: usize,
     355              :     /// Hash of the plan
     356              :     pub import_plan_hash: u64,
     357              : }
     358              : 
     359              : impl ShardImportStatus {
     360            0 :     pub fn is_terminal(&self) -> bool {
     361            0 :         match self {
     362            0 :             ShardImportStatus::InProgress(_) => false,
     363            0 :             ShardImportStatus::Done | ShardImportStatus::Error(_) => true,
     364              :         }
     365            0 :     }
     366              : }
     367              : 
     368              : /// Storage controller specific extensions to [`TimelineInfo`].
     369            0 : #[derive(Serialize, Deserialize, Clone)]
     370              : pub struct TimelineCreateResponseStorcon {
     371              :     #[serde(flatten)]
     372              :     pub timeline_info: TimelineInfo,
     373              : 
     374              :     pub safekeepers: Option<SafekeepersInfo>,
     375              : }
     376              : 
     377              : /// Safekeepers as returned in timeline creation request to storcon or pushed to
     378              : /// cplane in the post migration hook.
     379            0 : #[derive(Serialize, Deserialize, Clone)]
     380              : pub struct SafekeepersInfo {
     381              :     pub tenant_id: TenantId,
     382              :     pub timeline_id: TimelineId,
     383              :     pub generation: u32,
     384              :     pub safekeepers: Vec<SafekeeperInfo>,
     385              : }
     386              : 
     387            0 : #[derive(Serialize, Deserialize, Clone)]
     388              : pub struct SafekeeperInfo {
     389              :     pub id: NodeId,
     390              :     pub hostname: String,
     391              : }
     392              : 
     393            0 : #[derive(Serialize, Deserialize, Clone)]
     394              : #[serde(untagged)]
     395              : pub enum TimelineCreateRequestMode {
     396              :     Branch {
     397              :         ancestor_timeline_id: TimelineId,
     398              :         #[serde(default)]
     399              :         ancestor_start_lsn: Option<Lsn>,
     400              :         // TODO: cplane sets this, but, the branching code always
     401              :         // inherits the ancestor's pg_version. Earlier code wasn't
     402              :         // using a flattened enum, so, it was an accepted field, and
     403              :         // we continue to accept it by having it here.
     404              :         pg_version: Option<u32>,
     405              :         #[serde(default, skip_serializing_if = "std::ops::Not::not")]
     406              :         read_only: bool,
     407              :     },
     408              :     ImportPgdata {
     409              :         import_pgdata: TimelineCreateRequestModeImportPgdata,
     410              :     },
     411              :     // NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
     412              :     // (serde picks the first matching enum variant, in declaration order).
     413              :     Bootstrap {
     414              :         #[serde(default)]
     415              :         existing_initdb_timeline_id: Option<TimelineId>,
     416              :         pg_version: Option<u32>,
     417              :     },
     418              : }
     419              : 
     420            0 : #[derive(Serialize, Deserialize, Clone)]
     421              : pub struct TimelineCreateRequestModeImportPgdata {
     422              :     pub location: ImportPgdataLocation,
     423              :     pub idempotency_key: ImportPgdataIdempotencyKey,
     424              : }
     425              : 
     426            0 : #[derive(Serialize, Deserialize, Clone, Debug)]
     427              : pub enum ImportPgdataLocation {
     428              :     #[cfg(feature = "testing")]
     429              :     LocalFs { path: Utf8PathBuf },
     430              :     AwsS3 {
     431              :         region: String,
     432              :         bucket: String,
     433              :         /// A better name for this would be `prefix`; changing requires coordination with cplane.
     434              :         /// See <https://github.com/neondatabase/cloud/issues/20646>.
     435              :         key: String,
     436              :     },
     437              : }
     438              : 
     439            0 : #[derive(Serialize, Deserialize, Clone)]
     440              : #[serde(transparent)]
     441              : pub struct ImportPgdataIdempotencyKey(pub String);
     442              : 
     443              : impl ImportPgdataIdempotencyKey {
     444            0 :     pub fn random() -> Self {
     445              :         use rand::Rng;
     446              :         use rand::distributions::Alphanumeric;
     447            0 :         Self(
     448            0 :             rand::thread_rng()
     449            0 :                 .sample_iter(&Alphanumeric)
     450            0 :                 .take(20)
     451            0 :                 .map(char::from)
     452            0 :                 .collect(),
     453            0 :         )
     454            0 :     }
     455              : }
     456              : 
     457            0 : #[derive(Serialize, Deserialize, Clone)]
     458              : pub struct LsnLeaseRequest {
     459              :     pub lsn: Lsn,
     460              : }
     461              : 
     462            0 : #[derive(Serialize, Deserialize)]
     463              : pub struct TenantShardSplitRequest {
     464              :     pub new_shard_count: u8,
     465              : 
     466              :     // A tenant's stripe size is only meaningful the first time their shard count goes
     467              :     // above 1: therefore during a split from 1->N shards, we may modify the stripe size.
     468              :     //
     469              :     // If this is set while the stripe count is being increased from an already >1 value,
     470              :     // then the request will fail with 400.
     471              :     pub new_stripe_size: Option<ShardStripeSize>,
     472              : }
     473              : 
     474            0 : #[derive(Serialize, Deserialize)]
     475              : pub struct TenantShardSplitResponse {
     476              :     pub new_shards: Vec<TenantShardId>,
     477              : }
     478              : 
     479              : /// Parameters that apply to all shards in a tenant.  Used during tenant creation.
     480            0 : #[derive(Serialize, Deserialize, Debug)]
     481              : #[serde(deny_unknown_fields)]
     482              : pub struct ShardParameters {
     483              :     pub count: ShardCount,
     484              :     pub stripe_size: ShardStripeSize,
     485              : }
     486              : 
     487              : impl ShardParameters {
     488            0 :     pub fn is_unsharded(&self) -> bool {
     489            0 :         self.count.is_unsharded()
     490            0 :     }
     491              : }
     492              : 
     493              : impl Default for ShardParameters {
     494          118 :     fn default() -> Self {
     495          118 :         Self {
     496          118 :             count: ShardCount::new(0),
     497          118 :             stripe_size: DEFAULT_STRIPE_SIZE,
     498          118 :         }
     499          118 :     }
     500              : }
     501              : 
     502              : #[derive(Debug, Default, Clone, Eq, PartialEq)]
     503              : pub enum FieldPatch<T> {
     504              :     Upsert(T),
     505              :     Remove,
     506              :     #[default]
     507              :     Noop,
     508              : }
     509              : 
     510              : impl<T> FieldPatch<T> {
     511           78 :     fn is_noop(&self) -> bool {
     512           78 :         matches!(self, FieldPatch::Noop)
     513           78 :     }
     514              : 
     515           39 :     pub fn apply(self, target: &mut Option<T>) {
     516           39 :         match self {
     517            1 :             Self::Upsert(v) => *target = Some(v),
     518            1 :             Self::Remove => *target = None,
     519           37 :             Self::Noop => {}
     520              :         }
     521           39 :     }
     522              : 
     523           10 :     pub fn map<U, E, F: FnOnce(T) -> Result<U, E>>(self, map: F) -> Result<FieldPatch<U>, E> {
     524           10 :         match self {
     525            0 :             Self::Upsert(v) => Ok(FieldPatch::<U>::Upsert(map(v)?)),
     526            0 :             Self::Remove => Ok(FieldPatch::<U>::Remove),
     527           10 :             Self::Noop => Ok(FieldPatch::<U>::Noop),
     528              :         }
     529           10 :     }
     530              : }
     531              : 
     532              : impl<'de, T: Deserialize<'de>> Deserialize<'de> for FieldPatch<T> {
     533            2 :     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     534            2 :     where
     535            2 :         D: Deserializer<'de>,
     536            2 :     {
     537            2 :         Option::deserialize(deserializer).map(|opt| match opt {
     538            1 :             None => FieldPatch::Remove,
     539            1 :             Some(val) => FieldPatch::Upsert(val),
     540            2 :         })
     541            2 :     }
     542              : }
     543              : 
     544              : impl<T: Serialize> Serialize for FieldPatch<T> {
     545            2 :     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     546            2 :     where
     547            2 :         S: Serializer,
     548            2 :     {
     549            2 :         match self {
     550            1 :             FieldPatch::Upsert(val) => serializer.serialize_some(val),
     551            1 :             FieldPatch::Remove => serializer.serialize_none(),
     552            0 :             FieldPatch::Noop => unreachable!(),
     553              :         }
     554            2 :     }
     555              : }
     556              : 
     557            2 : #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
     558              : #[serde(default)]
     559              : pub struct TenantConfigPatch {
     560              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     561              :     pub checkpoint_distance: FieldPatch<u64>,
     562              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     563              :     pub checkpoint_timeout: FieldPatch<String>,
     564              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     565              :     pub compaction_target_size: FieldPatch<u64>,
     566              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     567              :     pub compaction_period: FieldPatch<String>,
     568              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     569              :     pub compaction_threshold: FieldPatch<usize>,
     570              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     571              :     pub compaction_upper_limit: FieldPatch<usize>,
     572              :     // defer parsing compaction_algorithm, like eviction_policy
     573              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     574              :     pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
     575              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     576              :     pub compaction_shard_ancestor: FieldPatch<bool>,
     577              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     578              :     pub compaction_l0_first: FieldPatch<bool>,
     579              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     580              :     pub compaction_l0_semaphore: FieldPatch<bool>,
     581              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     582              :     pub l0_flush_delay_threshold: FieldPatch<usize>,
     583              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     584              :     pub l0_flush_stall_threshold: FieldPatch<usize>,
     585              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     586              :     pub gc_horizon: FieldPatch<u64>,
     587              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     588              :     pub gc_period: FieldPatch<String>,
     589              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     590              :     pub image_creation_threshold: FieldPatch<usize>,
     591              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     592              :     pub pitr_interval: FieldPatch<String>,
     593              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     594              :     pub walreceiver_connect_timeout: FieldPatch<String>,
     595              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     596              :     pub lagging_wal_timeout: FieldPatch<String>,
     597              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     598              :     pub max_lsn_wal_lag: FieldPatch<NonZeroU64>,
     599              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     600              :     pub eviction_policy: FieldPatch<EvictionPolicy>,
     601              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     602              :     pub min_resident_size_override: FieldPatch<u64>,
     603              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     604              :     pub evictions_low_residence_duration_metric_threshold: FieldPatch<String>,
     605              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     606              :     pub heatmap_period: FieldPatch<String>,
     607              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     608              :     pub lazy_slru_download: FieldPatch<bool>,
     609              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     610              :     pub timeline_get_throttle: FieldPatch<ThrottleConfig>,
     611              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     612              :     pub image_layer_creation_check_threshold: FieldPatch<u8>,
     613              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     614              :     pub image_creation_preempt_threshold: FieldPatch<usize>,
     615              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     616              :     pub lsn_lease_length: FieldPatch<String>,
     617              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     618              :     pub lsn_lease_length_for_ts: FieldPatch<String>,
     619              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     620              :     pub timeline_offloading: FieldPatch<bool>,
     621              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     622              :     pub wal_receiver_protocol_override: FieldPatch<PostgresClientProtocol>,
     623              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     624              :     pub rel_size_v2_enabled: FieldPatch<bool>,
     625              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     626              :     pub gc_compaction_enabled: FieldPatch<bool>,
     627              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     628              :     pub gc_compaction_verification: FieldPatch<bool>,
     629              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     630              :     pub gc_compaction_initial_threshold_kb: FieldPatch<u64>,
     631              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     632              :     pub gc_compaction_ratio_percent: FieldPatch<u64>,
     633              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     634              :     pub sampling_ratio: FieldPatch<Option<Ratio>>,
     635              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     636              :     pub relsize_snapshot_cache_capacity: FieldPatch<usize>,
     637              :     #[serde(skip_serializing_if = "FieldPatch::is_noop")]
     638              :     pub basebackup_cache_enabled: FieldPatch<bool>,
     639              : }
     640              : 
     641              : /// Like [`crate::config::TenantConfigToml`], but preserves the information
     642              : /// about which parameters are set and which are not.
     643              : ///
     644              : /// Used in many places, including durably stored ones.
     645            2 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
     646              : #[serde(default)] // this maps omitted fields in deserialization to None
     647              : pub struct TenantConfig {
     648              :     #[serde(skip_serializing_if = "Option::is_none")]
     649              :     pub checkpoint_distance: Option<u64>,
     650              : 
     651              :     #[serde(skip_serializing_if = "Option::is_none")]
     652              :     #[serde(with = "humantime_serde")]
     653              :     pub checkpoint_timeout: Option<Duration>,
     654              : 
     655              :     #[serde(skip_serializing_if = "Option::is_none")]
     656              :     pub compaction_target_size: Option<u64>,
     657              : 
     658              :     #[serde(skip_serializing_if = "Option::is_none")]
     659              :     #[serde(with = "humantime_serde")]
     660              :     pub compaction_period: Option<Duration>,
     661              : 
     662              :     #[serde(skip_serializing_if = "Option::is_none")]
     663              :     pub compaction_threshold: Option<usize>,
     664              : 
     665              :     #[serde(skip_serializing_if = "Option::is_none")]
     666              :     pub compaction_upper_limit: Option<usize>,
     667              : 
     668              :     #[serde(skip_serializing_if = "Option::is_none")]
     669              :     pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
     670              : 
     671              :     #[serde(skip_serializing_if = "Option::is_none")]
     672              :     pub compaction_shard_ancestor: Option<bool>,
     673              : 
     674              :     #[serde(skip_serializing_if = "Option::is_none")]
     675              :     pub compaction_l0_first: Option<bool>,
     676              : 
     677              :     #[serde(skip_serializing_if = "Option::is_none")]
     678              :     pub compaction_l0_semaphore: Option<bool>,
     679              : 
     680              :     #[serde(skip_serializing_if = "Option::is_none")]
     681              :     pub l0_flush_delay_threshold: Option<usize>,
     682              : 
     683              :     #[serde(skip_serializing_if = "Option::is_none")]
     684              :     pub l0_flush_stall_threshold: Option<usize>,
     685              : 
     686              :     #[serde(skip_serializing_if = "Option::is_none")]
     687              :     pub gc_horizon: Option<u64>,
     688              : 
     689              :     #[serde(skip_serializing_if = "Option::is_none")]
     690              :     #[serde(with = "humantime_serde")]
     691              :     pub gc_period: Option<Duration>,
     692              : 
     693              :     #[serde(skip_serializing_if = "Option::is_none")]
     694              :     pub image_creation_threshold: Option<usize>,
     695              : 
     696              :     #[serde(skip_serializing_if = "Option::is_none")]
     697              :     #[serde(with = "humantime_serde")]
     698              :     pub pitr_interval: Option<Duration>,
     699              : 
     700              :     #[serde(skip_serializing_if = "Option::is_none")]
     701              :     #[serde(with = "humantime_serde")]
     702              :     pub walreceiver_connect_timeout: Option<Duration>,
     703              : 
     704              :     #[serde(skip_serializing_if = "Option::is_none")]
     705              :     #[serde(with = "humantime_serde")]
     706              :     pub lagging_wal_timeout: Option<Duration>,
     707              : 
     708              :     #[serde(skip_serializing_if = "Option::is_none")]
     709              :     pub max_lsn_wal_lag: Option<NonZeroU64>,
     710              : 
     711              :     #[serde(skip_serializing_if = "Option::is_none")]
     712              :     pub eviction_policy: Option<EvictionPolicy>,
     713              : 
     714              :     #[serde(skip_serializing_if = "Option::is_none")]
     715              :     pub min_resident_size_override: Option<u64>,
     716              : 
     717              :     #[serde(skip_serializing_if = "Option::is_none")]
     718              :     #[serde(with = "humantime_serde")]
     719              :     pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
     720              : 
     721              :     #[serde(skip_serializing_if = "Option::is_none")]
     722              :     #[serde(with = "humantime_serde")]
     723              :     pub heatmap_period: Option<Duration>,
     724              : 
     725              :     #[serde(skip_serializing_if = "Option::is_none")]
     726              :     pub lazy_slru_download: Option<bool>,
     727              : 
     728              :     #[serde(skip_serializing_if = "Option::is_none")]
     729              :     pub timeline_get_throttle: Option<ThrottleConfig>,
     730              : 
     731              :     #[serde(skip_serializing_if = "Option::is_none")]
     732              :     pub image_layer_creation_check_threshold: Option<u8>,
     733              : 
     734              :     #[serde(skip_serializing_if = "Option::is_none")]
     735              :     pub image_creation_preempt_threshold: Option<usize>,
     736              : 
     737              :     #[serde(skip_serializing_if = "Option::is_none")]
     738              :     #[serde(with = "humantime_serde")]
     739              :     pub lsn_lease_length: Option<Duration>,
     740              : 
     741              :     #[serde(skip_serializing_if = "Option::is_none")]
     742              :     #[serde(with = "humantime_serde")]
     743              :     pub lsn_lease_length_for_ts: Option<Duration>,
     744              : 
     745              :     #[serde(skip_serializing_if = "Option::is_none")]
     746              :     pub timeline_offloading: Option<bool>,
     747              : 
     748              :     #[serde(skip_serializing_if = "Option::is_none")]
     749              :     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     750              : 
     751              :     #[serde(skip_serializing_if = "Option::is_none")]
     752              :     pub rel_size_v2_enabled: Option<bool>,
     753              : 
     754              :     #[serde(skip_serializing_if = "Option::is_none")]
     755              :     pub gc_compaction_enabled: Option<bool>,
     756              : 
     757              :     #[serde(skip_serializing_if = "Option::is_none")]
     758              :     pub gc_compaction_verification: Option<bool>,
     759              : 
     760              :     #[serde(skip_serializing_if = "Option::is_none")]
     761              :     pub gc_compaction_initial_threshold_kb: Option<u64>,
     762              : 
     763              :     #[serde(skip_serializing_if = "Option::is_none")]
     764              :     pub gc_compaction_ratio_percent: Option<u64>,
     765              : 
     766              :     #[serde(skip_serializing_if = "Option::is_none")]
     767              :     pub sampling_ratio: Option<Option<Ratio>>,
     768              : 
     769              :     #[serde(skip_serializing_if = "Option::is_none")]
     770              :     pub relsize_snapshot_cache_capacity: Option<usize>,
     771              : 
     772              :     #[serde(skip_serializing_if = "Option::is_none")]
     773              :     pub basebackup_cache_enabled: Option<bool>,
     774              : }
     775              : 
     776              : impl TenantConfig {
     777            1 :     pub fn apply_patch(
     778            1 :         self,
     779            1 :         patch: TenantConfigPatch,
     780            1 :     ) -> Result<TenantConfig, humantime::DurationError> {
     781            1 :         let Self {
     782            1 :             mut checkpoint_distance,
     783            1 :             mut checkpoint_timeout,
     784            1 :             mut compaction_target_size,
     785            1 :             mut compaction_period,
     786            1 :             mut compaction_threshold,
     787            1 :             mut compaction_upper_limit,
     788            1 :             mut compaction_algorithm,
     789            1 :             mut compaction_shard_ancestor,
     790            1 :             mut compaction_l0_first,
     791            1 :             mut compaction_l0_semaphore,
     792            1 :             mut l0_flush_delay_threshold,
     793            1 :             mut l0_flush_stall_threshold,
     794            1 :             mut gc_horizon,
     795            1 :             mut gc_period,
     796            1 :             mut image_creation_threshold,
     797            1 :             mut pitr_interval,
     798            1 :             mut walreceiver_connect_timeout,
     799            1 :             mut lagging_wal_timeout,
     800            1 :             mut max_lsn_wal_lag,
     801            1 :             mut eviction_policy,
     802            1 :             mut min_resident_size_override,
     803            1 :             mut evictions_low_residence_duration_metric_threshold,
     804            1 :             mut heatmap_period,
     805            1 :             mut lazy_slru_download,
     806            1 :             mut timeline_get_throttle,
     807            1 :             mut image_layer_creation_check_threshold,
     808            1 :             mut image_creation_preempt_threshold,
     809            1 :             mut lsn_lease_length,
     810            1 :             mut lsn_lease_length_for_ts,
     811            1 :             mut timeline_offloading,
     812            1 :             mut wal_receiver_protocol_override,
     813            1 :             mut rel_size_v2_enabled,
     814            1 :             mut gc_compaction_enabled,
     815            1 :             mut gc_compaction_verification,
     816            1 :             mut gc_compaction_initial_threshold_kb,
     817            1 :             mut gc_compaction_ratio_percent,
     818            1 :             mut sampling_ratio,
     819            1 :             mut relsize_snapshot_cache_capacity,
     820            1 :             mut basebackup_cache_enabled,
     821            1 :         } = self;
     822            1 : 
     823            1 :         patch.checkpoint_distance.apply(&mut checkpoint_distance);
     824            1 :         patch
     825            1 :             .checkpoint_timeout
     826            1 :             .map(|v| humantime::parse_duration(&v))?
     827            1 :             .apply(&mut checkpoint_timeout);
     828            1 :         patch
     829            1 :             .compaction_target_size
     830            1 :             .apply(&mut compaction_target_size);
     831            1 :         patch
     832            1 :             .compaction_period
     833            1 :             .map(|v| humantime::parse_duration(&v))?
     834            1 :             .apply(&mut compaction_period);
     835            1 :         patch.compaction_threshold.apply(&mut compaction_threshold);
     836            1 :         patch
     837            1 :             .compaction_upper_limit
     838            1 :             .apply(&mut compaction_upper_limit);
     839            1 :         patch.compaction_algorithm.apply(&mut compaction_algorithm);
     840            1 :         patch
     841            1 :             .compaction_shard_ancestor
     842            1 :             .apply(&mut compaction_shard_ancestor);
     843            1 :         patch.compaction_l0_first.apply(&mut compaction_l0_first);
     844            1 :         patch
     845            1 :             .compaction_l0_semaphore
     846            1 :             .apply(&mut compaction_l0_semaphore);
     847            1 :         patch
     848            1 :             .l0_flush_delay_threshold
     849            1 :             .apply(&mut l0_flush_delay_threshold);
     850            1 :         patch
     851            1 :             .l0_flush_stall_threshold
     852            1 :             .apply(&mut l0_flush_stall_threshold);
     853            1 :         patch.gc_horizon.apply(&mut gc_horizon);
     854            1 :         patch
     855            1 :             .gc_period
     856            1 :             .map(|v| humantime::parse_duration(&v))?
     857            1 :             .apply(&mut gc_period);
     858            1 :         patch
     859            1 :             .image_creation_threshold
     860            1 :             .apply(&mut image_creation_threshold);
     861            1 :         patch
     862            1 :             .pitr_interval
     863            1 :             .map(|v| humantime::parse_duration(&v))?
     864            1 :             .apply(&mut pitr_interval);
     865            1 :         patch
     866            1 :             .walreceiver_connect_timeout
     867            1 :             .map(|v| humantime::parse_duration(&v))?
     868            1 :             .apply(&mut walreceiver_connect_timeout);
     869            1 :         patch
     870            1 :             .lagging_wal_timeout
     871            1 :             .map(|v| humantime::parse_duration(&v))?
     872            1 :             .apply(&mut lagging_wal_timeout);
     873            1 :         patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
     874            1 :         patch.eviction_policy.apply(&mut eviction_policy);
     875            1 :         patch
     876            1 :             .min_resident_size_override
     877            1 :             .apply(&mut min_resident_size_override);
     878            1 :         patch
     879            1 :             .evictions_low_residence_duration_metric_threshold
     880            1 :             .map(|v| humantime::parse_duration(&v))?
     881            1 :             .apply(&mut evictions_low_residence_duration_metric_threshold);
     882            1 :         patch
     883            1 :             .heatmap_period
     884            1 :             .map(|v| humantime::parse_duration(&v))?
     885            1 :             .apply(&mut heatmap_period);
     886            1 :         patch.lazy_slru_download.apply(&mut lazy_slru_download);
     887            1 :         patch
     888            1 :             .timeline_get_throttle
     889            1 :             .apply(&mut timeline_get_throttle);
     890            1 :         patch
     891            1 :             .image_layer_creation_check_threshold
     892            1 :             .apply(&mut image_layer_creation_check_threshold);
     893            1 :         patch
     894            1 :             .image_creation_preempt_threshold
     895            1 :             .apply(&mut image_creation_preempt_threshold);
     896            1 :         patch
     897            1 :             .lsn_lease_length
     898            1 :             .map(|v| humantime::parse_duration(&v))?
     899            1 :             .apply(&mut lsn_lease_length);
     900            1 :         patch
     901            1 :             .lsn_lease_length_for_ts
     902            1 :             .map(|v| humantime::parse_duration(&v))?
     903            1 :             .apply(&mut lsn_lease_length_for_ts);
     904            1 :         patch.timeline_offloading.apply(&mut timeline_offloading);
     905            1 :         patch
     906            1 :             .wal_receiver_protocol_override
     907            1 :             .apply(&mut wal_receiver_protocol_override);
     908            1 :         patch.rel_size_v2_enabled.apply(&mut rel_size_v2_enabled);
     909            1 :         patch
     910            1 :             .gc_compaction_enabled
     911            1 :             .apply(&mut gc_compaction_enabled);
     912            1 :         patch
     913            1 :             .gc_compaction_verification
     914            1 :             .apply(&mut gc_compaction_verification);
     915            1 :         patch
     916            1 :             .gc_compaction_initial_threshold_kb
     917            1 :             .apply(&mut gc_compaction_initial_threshold_kb);
     918            1 :         patch
     919            1 :             .gc_compaction_ratio_percent
     920            1 :             .apply(&mut gc_compaction_ratio_percent);
     921            1 :         patch.sampling_ratio.apply(&mut sampling_ratio);
     922            1 :         patch
     923            1 :             .relsize_snapshot_cache_capacity
     924            1 :             .apply(&mut relsize_snapshot_cache_capacity);
     925            1 :         patch
     926            1 :             .basebackup_cache_enabled
     927            1 :             .apply(&mut basebackup_cache_enabled);
     928            1 : 
     929            1 :         Ok(Self {
     930            1 :             checkpoint_distance,
     931            1 :             checkpoint_timeout,
     932            1 :             compaction_target_size,
     933            1 :             compaction_period,
     934            1 :             compaction_threshold,
     935            1 :             compaction_upper_limit,
     936            1 :             compaction_algorithm,
     937            1 :             compaction_shard_ancestor,
     938            1 :             compaction_l0_first,
     939            1 :             compaction_l0_semaphore,
     940            1 :             l0_flush_delay_threshold,
     941            1 :             l0_flush_stall_threshold,
     942            1 :             gc_horizon,
     943            1 :             gc_period,
     944            1 :             image_creation_threshold,
     945            1 :             pitr_interval,
     946            1 :             walreceiver_connect_timeout,
     947            1 :             lagging_wal_timeout,
     948            1 :             max_lsn_wal_lag,
     949            1 :             eviction_policy,
     950            1 :             min_resident_size_override,
     951            1 :             evictions_low_residence_duration_metric_threshold,
     952            1 :             heatmap_period,
     953            1 :             lazy_slru_download,
     954            1 :             timeline_get_throttle,
     955            1 :             image_layer_creation_check_threshold,
     956            1 :             image_creation_preempt_threshold,
     957            1 :             lsn_lease_length,
     958            1 :             lsn_lease_length_for_ts,
     959            1 :             timeline_offloading,
     960            1 :             wal_receiver_protocol_override,
     961            1 :             rel_size_v2_enabled,
     962            1 :             gc_compaction_enabled,
     963            1 :             gc_compaction_verification,
     964            1 :             gc_compaction_initial_threshold_kb,
     965            1 :             gc_compaction_ratio_percent,
     966            1 :             sampling_ratio,
     967            1 :             relsize_snapshot_cache_capacity,
     968            1 :             basebackup_cache_enabled,
     969            1 :         })
     970            1 :     }
     971              : 
     972            0 :     pub fn merge(
     973            0 :         &self,
     974            0 :         global_conf: crate::config::TenantConfigToml,
     975            0 :     ) -> crate::config::TenantConfigToml {
     976            0 :         crate::config::TenantConfigToml {
     977            0 :             checkpoint_distance: self
     978            0 :                 .checkpoint_distance
     979            0 :                 .unwrap_or(global_conf.checkpoint_distance),
     980            0 :             checkpoint_timeout: self
     981            0 :                 .checkpoint_timeout
     982            0 :                 .unwrap_or(global_conf.checkpoint_timeout),
     983            0 :             compaction_target_size: self
     984            0 :                 .compaction_target_size
     985            0 :                 .unwrap_or(global_conf.compaction_target_size),
     986            0 :             compaction_period: self
     987            0 :                 .compaction_period
     988            0 :                 .unwrap_or(global_conf.compaction_period),
     989            0 :             compaction_threshold: self
     990            0 :                 .compaction_threshold
     991            0 :                 .unwrap_or(global_conf.compaction_threshold),
     992            0 :             compaction_upper_limit: self
     993            0 :                 .compaction_upper_limit
     994            0 :                 .unwrap_or(global_conf.compaction_upper_limit),
     995            0 :             compaction_algorithm: self
     996            0 :                 .compaction_algorithm
     997            0 :                 .as_ref()
     998            0 :                 .unwrap_or(&global_conf.compaction_algorithm)
     999            0 :                 .clone(),
    1000            0 :             compaction_shard_ancestor: self
    1001            0 :                 .compaction_shard_ancestor
    1002            0 :                 .unwrap_or(global_conf.compaction_shard_ancestor),
    1003            0 :             compaction_l0_first: self
    1004            0 :                 .compaction_l0_first
    1005            0 :                 .unwrap_or(global_conf.compaction_l0_first),
    1006            0 :             compaction_l0_semaphore: self
    1007            0 :                 .compaction_l0_semaphore
    1008            0 :                 .unwrap_or(global_conf.compaction_l0_semaphore),
    1009            0 :             l0_flush_delay_threshold: self
    1010            0 :                 .l0_flush_delay_threshold
    1011            0 :                 .or(global_conf.l0_flush_delay_threshold),
    1012            0 :             l0_flush_stall_threshold: self
    1013            0 :                 .l0_flush_stall_threshold
    1014            0 :                 .or(global_conf.l0_flush_stall_threshold),
    1015            0 :             gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
    1016            0 :             gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
    1017            0 :             image_creation_threshold: self
    1018            0 :                 .image_creation_threshold
    1019            0 :                 .unwrap_or(global_conf.image_creation_threshold),
    1020            0 :             pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),
    1021            0 :             walreceiver_connect_timeout: self
    1022            0 :                 .walreceiver_connect_timeout
    1023            0 :                 .unwrap_or(global_conf.walreceiver_connect_timeout),
    1024            0 :             lagging_wal_timeout: self
    1025            0 :                 .lagging_wal_timeout
    1026            0 :                 .unwrap_or(global_conf.lagging_wal_timeout),
    1027            0 :             max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),
    1028            0 :             eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
    1029            0 :             min_resident_size_override: self
    1030            0 :                 .min_resident_size_override
    1031            0 :                 .or(global_conf.min_resident_size_override),
    1032            0 :             evictions_low_residence_duration_metric_threshold: self
    1033            0 :                 .evictions_low_residence_duration_metric_threshold
    1034            0 :                 .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
    1035            0 :             heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
    1036            0 :             lazy_slru_download: self
    1037            0 :                 .lazy_slru_download
    1038            0 :                 .unwrap_or(global_conf.lazy_slru_download),
    1039            0 :             timeline_get_throttle: self
    1040            0 :                 .timeline_get_throttle
    1041            0 :                 .clone()
    1042            0 :                 .unwrap_or(global_conf.timeline_get_throttle),
    1043            0 :             image_layer_creation_check_threshold: self
    1044            0 :                 .image_layer_creation_check_threshold
    1045            0 :                 .unwrap_or(global_conf.image_layer_creation_check_threshold),
    1046            0 :             image_creation_preempt_threshold: self
    1047            0 :                 .image_creation_preempt_threshold
    1048            0 :                 .unwrap_or(global_conf.image_creation_preempt_threshold),
    1049            0 :             lsn_lease_length: self
    1050            0 :                 .lsn_lease_length
    1051            0 :                 .unwrap_or(global_conf.lsn_lease_length),
    1052            0 :             lsn_lease_length_for_ts: self
    1053            0 :                 .lsn_lease_length_for_ts
    1054            0 :                 .unwrap_or(global_conf.lsn_lease_length_for_ts),
    1055            0 :             timeline_offloading: self
    1056            0 :                 .timeline_offloading
    1057            0 :                 .unwrap_or(global_conf.timeline_offloading),
    1058            0 :             wal_receiver_protocol_override: self
    1059            0 :                 .wal_receiver_protocol_override
    1060            0 :                 .or(global_conf.wal_receiver_protocol_override),
    1061            0 :             rel_size_v2_enabled: self
    1062            0 :                 .rel_size_v2_enabled
    1063            0 :                 .unwrap_or(global_conf.rel_size_v2_enabled),
    1064            0 :             gc_compaction_enabled: self
    1065            0 :                 .gc_compaction_enabled
    1066            0 :                 .unwrap_or(global_conf.gc_compaction_enabled),
    1067            0 :             gc_compaction_verification: self
    1068            0 :                 .gc_compaction_verification
    1069            0 :                 .unwrap_or(global_conf.gc_compaction_verification),
    1070            0 :             gc_compaction_initial_threshold_kb: self
    1071            0 :                 .gc_compaction_initial_threshold_kb
    1072            0 :                 .unwrap_or(global_conf.gc_compaction_initial_threshold_kb),
    1073            0 :             gc_compaction_ratio_percent: self
    1074            0 :                 .gc_compaction_ratio_percent
    1075            0 :                 .unwrap_or(global_conf.gc_compaction_ratio_percent),
    1076            0 :             sampling_ratio: self.sampling_ratio.unwrap_or(global_conf.sampling_ratio),
    1077            0 :             relsize_snapshot_cache_capacity: self
    1078            0 :                 .relsize_snapshot_cache_capacity
    1079            0 :                 .unwrap_or(global_conf.relsize_snapshot_cache_capacity),
    1080            0 :             basebackup_cache_enabled: self
    1081            0 :                 .basebackup_cache_enabled
    1082            0 :                 .unwrap_or(global_conf.basebackup_cache_enabled),
    1083            0 :         }
    1084            0 :     }
    1085              : }
    1086              : 
    1087              : /// The policy for the aux file storage.
    1088              : ///
    1089              : /// It can be switched through `switch_aux_file_policy` tenant config.
    1090              : /// When the first aux file written, the policy will be persisted in the
    1091              : /// `index_part.json` file and has a limited migration path.
    1092              : ///
    1093              : /// Currently, we only allow the following migration path:
    1094              : ///
    1095              : /// Unset -> V1
    1096              : ///       -> V2
    1097              : ///       -> CrossValidation -> V2
    1098              : #[derive(
    1099              :     Eq,
    1100              :     PartialEq,
    1101              :     Debug,
    1102              :     Copy,
    1103              :     Clone,
    1104            0 :     strum_macros::EnumString,
    1105              :     strum_macros::Display,
    1106            1 :     serde_with::DeserializeFromStr,
    1107              :     serde_with::SerializeDisplay,
    1108              : )]
    1109              : #[strum(serialize_all = "kebab-case")]
    1110              : pub enum AuxFilePolicy {
    1111              :     /// V1 aux file policy: store everything in AUX_FILE_KEY
    1112              :     #[strum(ascii_case_insensitive)]
    1113              :     V1,
    1114              :     /// V2 aux file policy: store in the AUX_FILE keyspace
    1115              :     #[strum(ascii_case_insensitive)]
    1116              :     V2,
    1117              :     /// Cross validation runs both formats on the write path and does validation
    1118              :     /// on the read path.
    1119              :     #[strum(ascii_case_insensitive)]
    1120              :     CrossValidation,
    1121              : }
    1122              : 
    1123            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
    1124              : #[serde(tag = "kind")]
    1125              : pub enum EvictionPolicy {
    1126              :     NoEviction,
    1127              :     LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
    1128              :     OnlyImitiate(EvictionPolicyLayerAccessThreshold),
    1129              : }
    1130              : 
    1131              : impl EvictionPolicy {
    1132            0 :     pub fn discriminant_str(&self) -> &'static str {
    1133            0 :         match self {
    1134            0 :             EvictionPolicy::NoEviction => "NoEviction",
    1135            0 :             EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
    1136            0 :             EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate",
    1137              :         }
    1138            0 :     }
    1139              : }
    1140              : 
    1141              : #[derive(
    1142              :     Eq,
    1143              :     PartialEq,
    1144              :     Debug,
    1145              :     Copy,
    1146              :     Clone,
    1147            0 :     strum_macros::EnumString,
    1148              :     strum_macros::Display,
    1149            0 :     serde_with::DeserializeFromStr,
    1150              :     serde_with::SerializeDisplay,
    1151              : )]
    1152              : #[strum(serialize_all = "kebab-case")]
    1153              : pub enum CompactionAlgorithm {
    1154              :     Legacy,
    1155              :     Tiered,
    1156              : }
    1157              : 
    1158              : #[derive(
    1159            4 :     Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,
    1160              : )]
    1161              : pub enum ImageCompressionAlgorithm {
    1162              :     // Disabled for writes, support decompressing during read path
    1163              :     Disabled,
    1164              :     /// Zstandard compression. Level 0 means and None mean the same (default level). Levels can be negative as well.
    1165              :     /// For details, see the [manual](http://facebook.github.io/zstd/zstd_manual.html).
    1166              :     Zstd {
    1167              :         level: Option<i8>,
    1168              :     },
    1169              : }
    1170              : 
    1171              : impl FromStr for ImageCompressionAlgorithm {
    1172              :     type Err = anyhow::Error;
    1173            8 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
    1174            8 :         let mut components = s.split(['(', ')']);
    1175            8 :         let first = components
    1176            8 :             .next()
    1177            8 :             .ok_or_else(|| anyhow::anyhow!("empty string"))?;
    1178            8 :         match first {
    1179            8 :             "disabled" => Ok(ImageCompressionAlgorithm::Disabled),
    1180            6 :             "zstd" => {
    1181            6 :                 let level = if let Some(v) = components.next() {
    1182            4 :                     let v: i8 = v.parse()?;
    1183            4 :                     Some(v)
    1184              :                 } else {
    1185            2 :                     None
    1186              :                 };
    1187              : 
    1188            6 :                 Ok(ImageCompressionAlgorithm::Zstd { level })
    1189              :             }
    1190            0 :             _ => anyhow::bail!("invalid specifier '{first}'"),
    1191              :         }
    1192            8 :     }
    1193              : }
    1194              : 
    1195              : impl Display for ImageCompressionAlgorithm {
    1196           12 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
    1197           12 :         match self {
    1198            3 :             ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
    1199            9 :             ImageCompressionAlgorithm::Zstd { level } => {
    1200            9 :                 if let Some(level) = level {
    1201            6 :                     write!(f, "zstd({})", level)
    1202              :                 } else {
    1203            3 :                     write!(f, "zstd")
    1204              :                 }
    1205              :             }
    1206              :         }
    1207           12 :     }
    1208              : }
    1209              : 
    1210            0 : #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
    1211              : pub struct CompactionAlgorithmSettings {
    1212              :     pub kind: CompactionAlgorithm,
    1213              : }
    1214              : 
    1215            0 : #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
    1216              : #[serde(tag = "mode", rename_all = "kebab-case")]
    1217              : pub enum L0FlushConfig {
    1218              :     #[serde(rename_all = "snake_case")]
    1219              :     Direct { max_concurrency: NonZeroUsize },
    1220              : }
    1221              : 
    1222            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
    1223              : pub struct EvictionPolicyLayerAccessThreshold {
    1224              :     #[serde(with = "humantime_serde")]
    1225              :     pub period: Duration,
    1226              :     #[serde(with = "humantime_serde")]
    1227              :     pub threshold: Duration,
    1228              : }
    1229              : 
    1230            6 : #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
    1231              : pub struct ThrottleConfig {
    1232              :     /// See [`ThrottleConfigTaskKinds`] for why we do the serde `rename`.
    1233              :     #[serde(rename = "task_kinds")]
    1234              :     pub enabled: ThrottleConfigTaskKinds,
    1235              :     pub initial: u32,
    1236              :     #[serde(with = "humantime_serde")]
    1237              :     pub refill_interval: Duration,
    1238              :     pub refill_amount: NonZeroU32,
    1239              :     pub max: u32,
    1240              : }
    1241              : 
    1242              : /// Before <https://github.com/neondatabase/neon/pull/9962>
    1243              : /// the throttle was a per `Timeline::get`/`Timeline::get_vectored` call.
    1244              : /// The `task_kinds` field controlled which Pageserver "Task Kind"s
    1245              : /// were subject to the throttle.
    1246              : ///
    1247              : /// After that PR, the throttle is applied at pagestream request level
    1248              : /// and the `task_kinds` field does not apply since the only task kind
    1249              : /// that us subject to the throttle is that of the page service.
    1250              : ///
    1251              : /// However, we don't want to make a breaking config change right now
    1252              : /// because it means we have to migrate all the tenant configs.
    1253              : /// This will be done in a future PR.
    1254              : ///
    1255              : /// In the meantime, we use emptiness / non-emptsiness of the `task_kinds`
    1256              : /// field to determine if the throttle is enabled or not.
    1257            1 : #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
    1258              : #[serde(transparent)]
    1259              : pub struct ThrottleConfigTaskKinds(Vec<String>);
    1260              : 
    1261              : impl ThrottleConfigTaskKinds {
    1262          128 :     pub fn disabled() -> Self {
    1263          128 :         Self(vec![])
    1264          128 :     }
    1265          119 :     pub fn is_enabled(&self) -> bool {
    1266          119 :         !self.0.is_empty()
    1267          119 :     }
    1268              : }
    1269              : 
    1270              : impl ThrottleConfig {
    1271          128 :     pub fn disabled() -> Self {
    1272          128 :         Self {
    1273          128 :             enabled: ThrottleConfigTaskKinds::disabled(),
    1274          128 :             // other values don't matter with emtpy `task_kinds`.
    1275          128 :             initial: 0,
    1276          128 :             refill_interval: Duration::from_millis(1),
    1277          128 :             refill_amount: NonZeroU32::new(1).unwrap(),
    1278          128 :             max: 1,
    1279          128 :         }
    1280          128 :     }
    1281              :     /// The requests per second allowed  by the given config.
    1282            0 :     pub fn steady_rps(&self) -> f64 {
    1283            0 :         (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64())
    1284            0 :     }
    1285              : }
    1286              : 
    1287              : #[cfg(test)]
    1288              : mod throttle_config_tests {
    1289              :     use super::*;
    1290              : 
    1291              :     #[test]
    1292            1 :     fn test_disabled_is_disabled() {
    1293            1 :         let config = ThrottleConfig::disabled();
    1294            1 :         assert!(!config.enabled.is_enabled());
    1295            1 :     }
    1296              :     #[test]
    1297            1 :     fn test_enabled_backwards_compat() {
    1298            1 :         let input = serde_json::json!({
    1299            1 :             "task_kinds": ["PageRequestHandler"],
    1300            1 :             "initial": 40000,
    1301            1 :             "refill_interval": "50ms",
    1302            1 :             "refill_amount": 1000,
    1303            1 :             "max": 40000,
    1304            1 :             "fair": true
    1305            1 :         });
    1306            1 :         let config: ThrottleConfig = serde_json::from_value(input).unwrap();
    1307            1 :         assert!(config.enabled.is_enabled());
    1308            1 :     }
    1309              : }
    1310              : 
    1311              : /// A flattened analog of a `pagesever::tenant::LocationMode`, which
    1312              : /// lists out all possible states (and the virtual "Detached" state)
    1313              : /// in a flat form rather than using rust-style enums.
    1314            0 : #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
    1315              : pub enum LocationConfigMode {
    1316              :     AttachedSingle,
    1317              :     AttachedMulti,
    1318              :     AttachedStale,
    1319              :     Secondary,
    1320              :     Detached,
    1321              : }
    1322              : 
    1323            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
    1324              : pub struct LocationConfigSecondary {
    1325              :     pub warm: bool,
    1326              : }
    1327              : 
    1328              : /// An alternative representation of `pageserver::tenant::LocationConf`,
    1329              : /// for use in external-facing APIs.
    1330            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
    1331              : pub struct LocationConfig {
    1332              :     pub mode: LocationConfigMode,
    1333              :     /// If attaching, in what generation?
    1334              :     #[serde(default)]
    1335              :     pub generation: Option<u32>,
    1336              : 
    1337              :     // If requesting mode `Secondary`, configuration for that.
    1338              :     #[serde(default)]
    1339              :     pub secondary_conf: Option<LocationConfigSecondary>,
    1340              : 
    1341              :     // Shard parameters: if shard_count is nonzero, then other shard_* fields
    1342              :     // must be set accurately.
    1343              :     #[serde(default)]
    1344              :     pub shard_number: u8,
    1345              :     #[serde(default)]
    1346              :     pub shard_count: u8,
    1347              :     #[serde(default)]
    1348              :     pub shard_stripe_size: u32,
    1349              : 
    1350              :     // This configuration only affects attached mode, but should be provided irrespective
    1351              :     // of the mode, as a secondary location might transition on startup if the response
    1352              :     // to the `/re-attach` control plane API requests it.
    1353              :     pub tenant_conf: TenantConfig,
    1354              : }
    1355              : 
    1356            0 : #[derive(Serialize, Deserialize)]
    1357              : pub struct LocationConfigListResponse {
    1358              :     pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,
    1359              : }
    1360              : 
    1361              : #[derive(Serialize)]
    1362              : pub struct StatusResponse {
    1363              :     pub id: NodeId,
    1364              : }
    1365              : 
    1366            0 : #[derive(Serialize, Deserialize, Debug)]
    1367              : #[serde(deny_unknown_fields)]
    1368              : pub struct TenantLocationConfigRequest {
    1369              :     #[serde(flatten)]
    1370              :     pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
    1371              : }
    1372              : 
    1373            0 : #[derive(Serialize, Deserialize, Debug)]
    1374              : #[serde(deny_unknown_fields)]
    1375              : pub struct TenantTimeTravelRequest {
    1376              :     pub shard_counts: Vec<ShardCount>,
    1377              : }
    1378              : 
    1379            0 : #[derive(Serialize, Deserialize, Debug)]
    1380              : #[serde(deny_unknown_fields)]
    1381              : pub struct TenantShardLocation {
    1382              :     pub shard_id: TenantShardId,
    1383              :     pub node_id: NodeId,
    1384              : }
    1385              : 
    1386            0 : #[derive(Serialize, Deserialize, Debug)]
    1387              : #[serde(deny_unknown_fields)]
    1388              : pub struct TenantLocationConfigResponse {
    1389              :     pub shards: Vec<TenantShardLocation>,
    1390              :     // If the shards' ShardCount count is >1, stripe_size will be set.
    1391              :     pub stripe_size: Option<ShardStripeSize>,
    1392              : }
    1393              : 
    1394            2 : #[derive(Serialize, Deserialize, Debug)]
    1395              : #[serde(deny_unknown_fields)]
    1396              : pub struct TenantConfigRequest {
    1397              :     pub tenant_id: TenantId,
    1398              :     #[serde(flatten)]
    1399              :     pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
    1400              : }
    1401              : 
    1402              : impl std::ops::Deref for TenantConfigRequest {
    1403              :     type Target = TenantConfig;
    1404              : 
    1405            0 :     fn deref(&self) -> &Self::Target {
    1406            0 :         &self.config
    1407            0 :     }
    1408              : }
    1409              : 
    1410              : impl TenantConfigRequest {
    1411            0 :     pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
    1412            0 :         let config = TenantConfig::default();
    1413            0 :         TenantConfigRequest { tenant_id, config }
    1414            0 :     }
    1415              : }
    1416              : 
    1417            3 : #[derive(Serialize, Deserialize, Debug)]
    1418              : #[serde(deny_unknown_fields)]
    1419              : pub struct TenantConfigPatchRequest {
    1420              :     pub tenant_id: TenantId,
    1421              :     #[serde(flatten)]
    1422              :     pub config: TenantConfigPatch, // as we have a flattened field, we should reject all unknown fields in it
    1423              : }
    1424              : 
    1425            0 : #[derive(Serialize, Deserialize, Debug)]
    1426              : pub struct TenantWaitLsnRequest {
    1427              :     #[serde(flatten)]
    1428              :     pub timelines: HashMap<TimelineId, Lsn>,
    1429              :     pub timeout: Duration,
    1430              : }
    1431              : 
    1432              : /// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
    1433            0 : #[derive(Serialize, Deserialize, Clone)]
    1434              : #[serde(tag = "slug", content = "data", rename_all = "snake_case")]
    1435              : pub enum TenantAttachmentStatus {
    1436              :     Maybe,
    1437              :     Attached,
    1438              :     Failed { reason: String },
    1439              : }
    1440              : 
    1441            0 : #[derive(Serialize, Deserialize, Clone)]
    1442              : pub struct TenantInfo {
    1443              :     pub id: TenantShardId,
    1444              :     // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    1445              :     pub state: TenantState,
    1446              :     /// Sum of the size of all layer files.
    1447              :     /// If a layer is present in both local FS and S3, it counts only once.
    1448              :     pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
    1449              :     pub attachment_status: TenantAttachmentStatus,
    1450              :     pub generation: u32,
    1451              : 
    1452              :     /// Opaque explanation if gc is being blocked.
    1453              :     ///
    1454              :     /// Only looked up for the individual tenant detail, not the listing.
    1455              :     #[serde(skip_serializing_if = "Option::is_none")]
    1456              :     pub gc_blocking: Option<String>,
    1457              : }
    1458              : 
    1459            0 : #[derive(Serialize, Deserialize, Clone)]
    1460              : pub struct TenantDetails {
    1461              :     #[serde(flatten)]
    1462              :     pub tenant_info: TenantInfo,
    1463              : 
    1464              :     pub walredo: Option<WalRedoManagerStatus>,
    1465              : 
    1466              :     pub timelines: Vec<TimelineId>,
    1467              : }
    1468              : 
    1469            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)]
    1470              : pub enum TimelineArchivalState {
    1471              :     Archived,
    1472              :     Unarchived,
    1473              : }
    1474              : 
    1475            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
    1476              : pub enum TimelineVisibilityState {
    1477              :     Visible,
    1478              :     Invisible,
    1479              : }
    1480              : 
    1481            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
    1482              : pub struct TimelineArchivalConfigRequest {
    1483              :     pub state: TimelineArchivalState,
    1484              : }
    1485              : 
    1486            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
    1487              : pub struct TimelinePatchIndexPartRequest {
    1488              :     pub rel_size_migration: Option<RelSizeMigration>,
    1489              :     pub gc_compaction_last_completed_lsn: Option<Lsn>,
    1490              :     pub applied_gc_cutoff_lsn: Option<Lsn>,
    1491              :     #[serde(default)]
    1492              :     pub force_index_update: bool,
    1493              : }
    1494              : 
    1495            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
    1496              : pub struct TimelinesInfoAndOffloaded {
    1497              :     pub timelines: Vec<TimelineInfo>,
    1498              :     pub offloaded: Vec<OffloadedTimelineInfo>,
    1499              : }
    1500              : 
    1501              : /// Analog of [`TimelineInfo`] for offloaded timelines.
    1502            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
    1503              : pub struct OffloadedTimelineInfo {
    1504              :     pub tenant_id: TenantShardId,
    1505              :     pub timeline_id: TimelineId,
    1506              :     /// Whether the timeline has a parent it has been branched off from or not
    1507              :     pub ancestor_timeline_id: Option<TimelineId>,
    1508              :     /// Whether to retain the branch lsn at the ancestor or not
    1509              :     pub ancestor_retain_lsn: Option<Lsn>,
    1510              :     /// The time point when the timeline was archived
    1511              :     pub archived_at: chrono::DateTime<chrono::Utc>,
    1512              : }
    1513              : 
    1514            4 : #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
    1515              : #[serde(rename_all = "camelCase")]
    1516              : pub enum RelSizeMigration {
    1517              :     /// The tenant is using the old rel_size format.
    1518              :     /// Note that this enum is persisted as `Option<RelSizeMigration>` in the index part, so
    1519              :     /// `None` is the same as `Some(RelSizeMigration::Legacy)`.
    1520              :     Legacy,
    1521              :     /// The tenant is migrating to the new rel_size format. Both old and new rel_size format are
    1522              :     /// persisted in the index part. The read path will read both formats and merge them.
    1523              :     Migrating,
    1524              :     /// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted
    1525              :     /// in the index part, and the read path will not read the old format.
    1526              :     Migrated,
    1527              : }
    1528              : 
    1529              : /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
    1530            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
    1531              : pub struct TimelineInfo {
    1532              :     pub tenant_id: TenantShardId,
    1533              :     pub timeline_id: TimelineId,
    1534              : 
    1535              :     pub ancestor_timeline_id: Option<TimelineId>,
    1536              :     pub ancestor_lsn: Option<Lsn>,
    1537              :     pub last_record_lsn: Lsn,
    1538              :     pub prev_record_lsn: Option<Lsn>,
    1539              : 
    1540              :     /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients.
    1541              :     /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead,
    1542              :     /// as it is easier to reason about.
    1543              :     #[serde(default)]
    1544              :     pub applied_gc_cutoff_lsn: Lsn,
    1545              : 
    1546              :     /// The upper bound of data which is either already GC'ed, or elegible to be GC'ed at any time based on PITR interval.
    1547              :     /// This LSN represents the "end of history" for this timeline, and callers should use it to figure out the oldest
    1548              :     /// LSN at which it is legal to create a branch or ephemeral endpoint.
    1549              :     ///
    1550              :     /// Note that holders of valid LSN leases may be able to create branches and read pages earlier
    1551              :     /// than this LSN, but new leases may not be taken out earlier than this LSN.
    1552              :     #[serde(default)]
    1553              :     pub min_readable_lsn: Lsn,
    1554              : 
    1555              :     pub disk_consistent_lsn: Lsn,
    1556              : 
    1557              :     /// The LSN that we have succesfully uploaded to remote storage
    1558              :     pub remote_consistent_lsn: Lsn,
    1559              : 
    1560              :     /// The LSN that we are advertizing to safekeepers
    1561              :     pub remote_consistent_lsn_visible: Lsn,
    1562              : 
    1563              :     /// The LSN from the start of the root timeline (never changes)
    1564              :     pub initdb_lsn: Lsn,
    1565              : 
    1566              :     pub current_logical_size: u64,
    1567              :     pub current_logical_size_is_accurate: bool,
    1568              : 
    1569              :     pub directory_entries_counts: Vec<u64>,
    1570              : 
    1571              :     /// Sum of the size of all layer files.
    1572              :     /// If a layer is present in both local FS and S3, it counts only once.
    1573              :     pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
    1574              :     pub current_logical_size_non_incremental: Option<u64>,
    1575              : 
    1576              :     /// How many bytes of WAL are within this branch's pitr_interval.  If the pitr_interval goes
    1577              :     /// beyond the branch's branch point, we only count up to the branch point.
    1578              :     pub pitr_history_size: u64,
    1579              : 
    1580              :     /// Whether this branch's branch point is within its ancestor's PITR interval (i.e. any
    1581              :     /// ancestor data used by this branch would have been retained anyway).  If this is false, then
    1582              :     /// this branch may be imposing a cost on the ancestor by causing it to retain layers that it would
    1583              :     /// otherwise be able to GC.
    1584              :     pub within_ancestor_pitr: bool,
    1585              : 
    1586              :     pub timeline_dir_layer_file_size_sum: Option<u64>,
    1587              : 
    1588              :     pub wal_source_connstr: Option<String>,
    1589              :     pub last_received_msg_lsn: Option<Lsn>,
    1590              :     /// the timestamp (in microseconds) of the last received message
    1591              :     pub last_received_msg_ts: Option<u128>,
    1592              :     pub pg_version: u32,
    1593              : 
    1594              :     pub state: TimelineState,
    1595              : 
    1596              :     pub walreceiver_status: String,
    1597              : 
    1598              :     // ALWAYS add new fields at the end of the struct with `Option` to ensure forward/backward compatibility.
    1599              :     // Backward compatibility: you will get a JSON not containing the newly-added field.
    1600              :     // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does
    1601              :     // not deny unknown fields by default so it's safe to set the field to some value, though it won't be
    1602              :     // read.
    1603              :     /// Whether the timeline is archived.
    1604              :     pub is_archived: Option<bool>,
    1605              : 
    1606              :     /// The status of the rel_size migration.
    1607              :     pub rel_size_migration: Option<RelSizeMigration>,
    1608              : 
    1609              :     /// Whether the timeline is invisible in synthetic size calculations.
    1610              :     pub is_invisible: Option<bool>,
    1611              : }
    1612              : 
    1613            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1614              : pub struct LayerMapInfo {
    1615              :     pub in_memory_layers: Vec<InMemoryLayerInfo>,
    1616              :     pub historic_layers: Vec<HistoricLayerInfo>,
    1617              : }
    1618              : 
    1619              : /// The residence status of a layer
    1620            0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
    1621              : pub enum LayerResidenceStatus {
    1622              :     /// Residence status for a layer file that exists locally.
    1623              :     /// It may also exist on the remote, we don't care here.
    1624              :     Resident,
    1625              :     /// Residence status for a layer file that only exists on the remote.
    1626              :     Evicted,
    1627              : }
    1628              : 
    1629              : #[serde_as]
    1630            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1631              : pub struct LayerAccessStats {
    1632              :     #[serde_as(as = "serde_with::TimestampMilliSeconds")]
    1633              :     pub access_time: SystemTime,
    1634              : 
    1635              :     #[serde_as(as = "serde_with::TimestampMilliSeconds")]
    1636              :     pub residence_time: SystemTime,
    1637              : 
    1638              :     pub visible: bool,
    1639              : }
    1640              : 
    1641            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1642              : #[serde(tag = "kind")]
    1643              : pub enum InMemoryLayerInfo {
    1644              :     Open { lsn_start: Lsn },
    1645              :     Frozen { lsn_start: Lsn, lsn_end: Lsn },
    1646              : }
    1647              : 
    1648            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1649              : #[serde(tag = "kind")]
    1650              : pub enum HistoricLayerInfo {
    1651              :     Delta {
    1652              :         layer_file_name: String,
    1653              :         layer_file_size: u64,
    1654              : 
    1655              :         lsn_start: Lsn,
    1656              :         lsn_end: Lsn,
    1657              :         remote: bool,
    1658              :         access_stats: LayerAccessStats,
    1659              : 
    1660              :         l0: bool,
    1661              :     },
    1662              :     Image {
    1663              :         layer_file_name: String,
    1664              :         layer_file_size: u64,
    1665              : 
    1666              :         lsn_start: Lsn,
    1667              :         remote: bool,
    1668              :         access_stats: LayerAccessStats,
    1669              :     },
    1670              : }
    1671              : 
    1672              : impl HistoricLayerInfo {
    1673            0 :     pub fn layer_file_name(&self) -> &str {
    1674            0 :         match self {
    1675              :             HistoricLayerInfo::Delta {
    1676            0 :                 layer_file_name, ..
    1677            0 :             } => layer_file_name,
    1678              :             HistoricLayerInfo::Image {
    1679            0 :                 layer_file_name, ..
    1680            0 :             } => layer_file_name,
    1681              :         }
    1682            0 :     }
    1683            0 :     pub fn is_remote(&self) -> bool {
    1684            0 :         match self {
    1685            0 :             HistoricLayerInfo::Delta { remote, .. } => *remote,
    1686            0 :             HistoricLayerInfo::Image { remote, .. } => *remote,
    1687              :         }
    1688            0 :     }
    1689            0 :     pub fn set_remote(&mut self, value: bool) {
    1690            0 :         let field = match self {
    1691            0 :             HistoricLayerInfo::Delta { remote, .. } => remote,
    1692            0 :             HistoricLayerInfo::Image { remote, .. } => remote,
    1693              :         };
    1694            0 :         *field = value;
    1695            0 :     }
    1696            0 :     pub fn layer_file_size(&self) -> u64 {
    1697            0 :         match self {
    1698              :             HistoricLayerInfo::Delta {
    1699            0 :                 layer_file_size, ..
    1700            0 :             } => *layer_file_size,
    1701              :             HistoricLayerInfo::Image {
    1702            0 :                 layer_file_size, ..
    1703            0 :             } => *layer_file_size,
    1704              :         }
    1705            0 :     }
    1706              : }
    1707              : 
    1708            0 : #[derive(Debug, Serialize, Deserialize)]
    1709              : pub struct DownloadRemoteLayersTaskSpawnRequest {
    1710              :     pub max_concurrent_downloads: NonZeroUsize,
    1711              : }
    1712              : 
    1713            0 : #[derive(Debug, Serialize, Deserialize)]
    1714              : pub struct IngestAuxFilesRequest {
    1715              :     pub aux_files: HashMap<String, String>,
    1716              : }
    1717              : 
    1718            0 : #[derive(Debug, Serialize, Deserialize)]
    1719              : pub struct ListAuxFilesRequest {
    1720              :     pub lsn: Lsn,
    1721              : }
    1722              : 
    1723            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
    1724              : pub struct DownloadRemoteLayersTaskInfo {
    1725              :     pub task_id: String,
    1726              :     pub state: DownloadRemoteLayersTaskState,
    1727              :     pub total_layer_count: u64,         // stable once `completed`
    1728              :     pub successful_download_count: u64, // stable once `completed`
    1729              :     pub failed_download_count: u64,     // stable once `completed`
    1730              : }
    1731              : 
    1732            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
    1733              : pub enum DownloadRemoteLayersTaskState {
    1734              :     Running,
    1735              :     Completed,
    1736              :     ShutDown,
    1737              : }
    1738              : 
    1739            0 : #[derive(Debug, Serialize, Deserialize)]
    1740              : pub struct TimelineGcRequest {
    1741              :     pub gc_horizon: Option<u64>,
    1742              : }
    1743              : 
    1744            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1745              : pub struct WalRedoManagerProcessStatus {
    1746              :     pub pid: u32,
    1747              : }
    1748              : 
    1749            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1750              : pub struct WalRedoManagerStatus {
    1751              :     pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,
    1752              :     pub process: Option<WalRedoManagerProcessStatus>,
    1753              : }
    1754              : 
    1755              : /// The progress of a secondary tenant.
    1756              : ///
    1757              : /// It is mostly useful when doing a long running download: e.g. initiating
    1758              : /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
    1759              : /// what's happening.
    1760            0 : #[derive(Default, Debug, Serialize, Deserialize, Clone)]
    1761              : pub struct SecondaryProgress {
    1762              :     /// The remote storage LastModified time of the heatmap object we last downloaded.
    1763              :     pub heatmap_mtime: Option<serde_system_time::SystemTime>,
    1764              : 
    1765              :     /// The number of layers currently on-disk
    1766              :     pub layers_downloaded: usize,
    1767              :     /// The number of layers in the most recently seen heatmap
    1768              :     pub layers_total: usize,
    1769              : 
    1770              :     /// The number of layer bytes currently on-disk
    1771              :     pub bytes_downloaded: u64,
    1772              :     /// The number of layer bytes in the most recently seen heatmap
    1773              :     pub bytes_total: u64,
    1774              : }
    1775              : 
    1776            0 : #[derive(Serialize, Deserialize, Debug)]
    1777              : pub struct TenantScanRemoteStorageShard {
    1778              :     pub tenant_shard_id: TenantShardId,
    1779              :     pub generation: Option<u32>,
    1780              :     pub stripe_size: Option<ShardStripeSize>,
    1781              : }
    1782              : 
    1783            0 : #[derive(Serialize, Deserialize, Debug, Default)]
    1784              : pub struct TenantScanRemoteStorageResponse {
    1785              :     pub shards: Vec<TenantScanRemoteStorageShard>,
    1786              : }
    1787              : 
    1788            0 : #[derive(Serialize, Deserialize, Debug, Clone)]
    1789              : #[serde(rename_all = "snake_case")]
    1790              : pub enum TenantSorting {
    1791              :     /// Total size of layers on local disk for all timelines in a shard.
    1792              :     ResidentSize,
    1793              :     /// The logical size of the largest timeline within a _tenant_ (not shard). Only tracked on
    1794              :     /// shard 0, contains the sum across all shards.
    1795              :     MaxLogicalSize,
    1796              :     /// The logical size of the largest timeline within a _tenant_ (not shard), divided by number of
    1797              :     /// shards. Only tracked on shard 0, and estimates the per-shard logical size.
    1798              :     MaxLogicalSizePerShard,
    1799              : }
    1800              : 
    1801              : impl Default for TenantSorting {
    1802            0 :     fn default() -> Self {
    1803            0 :         Self::ResidentSize
    1804            0 :     }
    1805              : }
    1806              : 
    1807            0 : #[derive(Serialize, Deserialize, Debug, Clone)]
    1808              : pub struct TopTenantShardsRequest {
    1809              :     // How would you like to sort the tenants?
    1810              :     pub order_by: TenantSorting,
    1811              : 
    1812              :     // How many results?
    1813              :     pub limit: usize,
    1814              : 
    1815              :     // Omit tenants with more than this many shards (e.g. if this is the max number of shards
    1816              :     // that the caller would ever split to)
    1817              :     pub where_shards_lt: Option<ShardCount>,
    1818              : 
    1819              :     // Omit tenants where the ordering metric is less than this (this is an optimization to
    1820              :     // let us quickly exclude numerous tiny shards)
    1821              :     pub where_gt: Option<u64>,
    1822              : }
    1823              : 
    1824            0 : #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
    1825              : pub struct TopTenantShardItem {
    1826              :     pub id: TenantShardId,
    1827              : 
    1828              :     /// Total size of layers on local disk for all timelines in this shard.
    1829              :     pub resident_size: u64,
    1830              : 
    1831              :     /// Total size of layers in remote storage for all timelines in this shard.
    1832              :     pub physical_size: u64,
    1833              : 
    1834              :     /// The largest logical size of a timeline within this _tenant_ (not shard). This is only
    1835              :     /// tracked on shard 0, and contains the sum of the logical size across all shards.
    1836              :     pub max_logical_size: u64,
    1837              : 
    1838              :     /// The largest logical size of a timeline within this _tenant_ (not shard) divided by number of
    1839              :     /// shards. This is only tracked on shard 0, and is only an estimate as we divide it evenly by
    1840              :     /// shard count, rounded up.
    1841              :     pub max_logical_size_per_shard: u64,
    1842              : }
    1843              : 
    1844            0 : #[derive(Serialize, Deserialize, Debug, Default)]
    1845              : pub struct TopTenantShardsResponse {
    1846              :     pub shards: Vec<TopTenantShardItem>,
    1847              : }
    1848              : 
    1849              : pub mod virtual_file {
    1850              : 
    1851              :     #[derive(
    1852              :         Copy,
    1853              :         Clone,
    1854              :         PartialEq,
    1855              :         Eq,
    1856              :         Hash,
    1857            0 :         strum_macros::EnumString,
    1858              :         strum_macros::Display,
    1859            0 :         serde_with::DeserializeFromStr,
    1860              :         serde_with::SerializeDisplay,
    1861              :         Debug,
    1862              :     )]
    1863              :     #[strum(serialize_all = "kebab-case")]
    1864              :     pub enum IoEngineKind {
    1865              :         StdFs,
    1866              :         #[cfg(target_os = "linux")]
    1867              :         TokioEpollUring,
    1868              :     }
    1869              : 
    1870              :     /// Direct IO modes for a pageserver.
    1871              :     #[derive(
    1872              :         Copy,
    1873              :         Clone,
    1874              :         PartialEq,
    1875              :         Eq,
    1876              :         Hash,
    1877            0 :         strum_macros::EnumString,
    1878            0 :         strum_macros::EnumIter,
    1879              :         strum_macros::Display,
    1880            0 :         serde_with::DeserializeFromStr,
    1881              :         serde_with::SerializeDisplay,
    1882              :         Debug,
    1883              :     )]
    1884              :     #[strum(serialize_all = "kebab-case")]
    1885              :     #[repr(u8)]
    1886              :     pub enum IoMode {
    1887              :         /// Uses buffered IO.
    1888              :         Buffered,
    1889              :         /// Uses direct IO for reads only.
    1890              :         Direct,
    1891              :         /// Use direct IO for reads and writes.
    1892              :         DirectRw,
    1893              :     }
    1894              : 
    1895              :     impl IoMode {
    1896          247 :         pub fn preferred() -> Self {
    1897          247 :             IoMode::DirectRw
    1898          247 :         }
    1899              :     }
    1900              : 
    1901              :     impl TryFrom<u8> for IoMode {
    1902              :         type Error = u8;
    1903              : 
    1904         2576 :         fn try_from(value: u8) -> Result<Self, Self::Error> {
    1905         2576 :             Ok(match value {
    1906         2576 :                 v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
    1907         2576 :                 v if v == (IoMode::Direct as u8) => IoMode::Direct,
    1908         2576 :                 v if v == (IoMode::DirectRw as u8) => IoMode::DirectRw,
    1909            0 :                 x => return Err(x),
    1910              :             })
    1911         2576 :         }
    1912              :     }
    1913              : }
    1914              : 
    1915            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1916              : pub struct ScanDisposableKeysResponse {
    1917              :     pub disposable_count: usize,
    1918              :     pub not_disposable_count: usize,
    1919              : }
    1920              : 
    1921              : // Wrapped in libpq CopyData
    1922              : #[derive(PartialEq, Eq, Debug)]
    1923              : pub enum PagestreamFeMessage {
    1924              :     Exists(PagestreamExistsRequest),
    1925              :     Nblocks(PagestreamNblocksRequest),
    1926              :     GetPage(PagestreamGetPageRequest),
    1927              :     DbSize(PagestreamDbSizeRequest),
    1928              :     GetSlruSegment(PagestreamGetSlruSegmentRequest),
    1929              :     #[cfg(feature = "testing")]
    1930              :     Test(PagestreamTestRequest),
    1931              : }
    1932              : 
    1933              : // Wrapped in libpq CopyData
    1934              : #[derive(strum_macros::EnumProperty)]
    1935              : pub enum PagestreamBeMessage {
    1936              :     Exists(PagestreamExistsResponse),
    1937              :     Nblocks(PagestreamNblocksResponse),
    1938              :     GetPage(PagestreamGetPageResponse),
    1939              :     Error(PagestreamErrorResponse),
    1940              :     DbSize(PagestreamDbSizeResponse),
    1941              :     GetSlruSegment(PagestreamGetSlruSegmentResponse),
    1942              :     #[cfg(feature = "testing")]
    1943              :     Test(PagestreamTestResponse),
    1944              : }
    1945              : 
    1946              : // Keep in sync with `pagestore_client.h`
    1947              : #[repr(u8)]
    1948              : enum PagestreamFeMessageTag {
    1949              :     Exists = 0,
    1950              :     Nblocks = 1,
    1951              :     GetPage = 2,
    1952              :     DbSize = 3,
    1953              :     GetSlruSegment = 4,
    1954              :     /* future tags above this line */
    1955              :     /// For testing purposes, not available in production.
    1956              :     #[cfg(feature = "testing")]
    1957              :     Test = 99,
    1958              : }
    1959              : 
    1960              : // Keep in sync with `pagestore_client.h`
    1961              : #[repr(u8)]
    1962              : enum PagestreamBeMessageTag {
    1963              :     Exists = 100,
    1964              :     Nblocks = 101,
    1965              :     GetPage = 102,
    1966              :     Error = 103,
    1967              :     DbSize = 104,
    1968              :     GetSlruSegment = 105,
    1969              :     /* future tags above this line */
    1970              :     /// For testing purposes, not available in production.
    1971              :     #[cfg(feature = "testing")]
    1972              :     Test = 199,
    1973              : }
    1974              : 
    1975              : impl TryFrom<u8> for PagestreamFeMessageTag {
    1976              :     type Error = u8;
    1977            4 :     fn try_from(value: u8) -> Result<Self, u8> {
    1978            4 :         match value {
    1979            1 :             0 => Ok(PagestreamFeMessageTag::Exists),
    1980            1 :             1 => Ok(PagestreamFeMessageTag::Nblocks),
    1981            1 :             2 => Ok(PagestreamFeMessageTag::GetPage),
    1982            1 :             3 => Ok(PagestreamFeMessageTag::DbSize),
    1983            0 :             4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
    1984              :             #[cfg(feature = "testing")]
    1985            0 :             99 => Ok(PagestreamFeMessageTag::Test),
    1986            0 :             _ => Err(value),
    1987              :         }
    1988            4 :     }
    1989              : }
    1990              : 
    1991              : impl TryFrom<u8> for PagestreamBeMessageTag {
    1992              :     type Error = u8;
    1993            0 :     fn try_from(value: u8) -> Result<Self, u8> {
    1994            0 :         match value {
    1995            0 :             100 => Ok(PagestreamBeMessageTag::Exists),
    1996            0 :             101 => Ok(PagestreamBeMessageTag::Nblocks),
    1997            0 :             102 => Ok(PagestreamBeMessageTag::GetPage),
    1998            0 :             103 => Ok(PagestreamBeMessageTag::Error),
    1999            0 :             104 => Ok(PagestreamBeMessageTag::DbSize),
    2000            0 :             105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
    2001              :             #[cfg(feature = "testing")]
    2002            0 :             199 => Ok(PagestreamBeMessageTag::Test),
    2003            0 :             _ => Err(value),
    2004              :         }
    2005            0 :     }
    2006              : }
    2007              : 
    2008              : // A GetPage request contains two LSN values:
    2009              : //
    2010              : // request_lsn: Get the page version at this point in time.  Lsn::Max is a special value that means
    2011              : // "get the latest version present". It's used by the primary server, which knows that no one else
    2012              : // is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
    2013              : // Lsn::Max. Standby servers use the current replay LSN as the request LSN.
    2014              : //
    2015              : // not_modified_since: Hint to the pageserver that the client knows that the page has not been
    2016              : // modified between 'not_modified_since' and the request LSN. It's always correct to set
    2017              : // 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
    2018              : // passing an earlier LSN can speed up the request, by allowing the pageserver to process the
    2019              : // request without waiting for 'request_lsn' to arrive.
    2020              : //
    2021              : // The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
    2022              : // sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
    2023              : // 'latest' was set to true. The V2 interface was added because there was no correct way for a
    2024              : // standby to request a page at a particular non-latest LSN, and also include the
    2025              : // 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
    2026              : // request, if the standby knows that the page hasn't been modified since, and risk getting an error
    2027              : // if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
    2028              : // require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
    2029              : // interface allows sending both LSNs, and let the pageserver do the right thing. There was no
    2030              : // difference in the responses between V1 and V2.
    2031              : //
    2032              : // V3 version of protocol adds request ID to all requests. This request ID is also included in response
    2033              : // as well as other fields from requests, which allows to verify that we receive response for our request.
    2034              : // We copy fields from request to response to make checking more reliable: request ID is formed from process ID
    2035              : // and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
    2036              : //
    2037              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2038              : pub enum PagestreamProtocolVersion {
    2039              :     V2,
    2040              :     V3,
    2041              : }
    2042              : 
    2043              : pub type RequestId = u64;
    2044              : 
    2045              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2046              : pub struct PagestreamRequest {
    2047              :     pub reqid: RequestId,
    2048              :     pub request_lsn: Lsn,
    2049              :     pub not_modified_since: Lsn,
    2050              : }
    2051              : 
    2052              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2053              : pub struct PagestreamExistsRequest {
    2054              :     pub hdr: PagestreamRequest,
    2055              :     pub rel: RelTag,
    2056              : }
    2057              : 
    2058              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2059              : pub struct PagestreamNblocksRequest {
    2060              :     pub hdr: PagestreamRequest,
    2061              :     pub rel: RelTag,
    2062              : }
    2063              : 
    2064              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2065              : pub struct PagestreamGetPageRequest {
    2066              :     pub hdr: PagestreamRequest,
    2067              :     pub rel: RelTag,
    2068              :     pub blkno: u32,
    2069              : }
    2070              : 
    2071              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2072              : pub struct PagestreamDbSizeRequest {
    2073              :     pub hdr: PagestreamRequest,
    2074              :     pub dbnode: u32,
    2075              : }
    2076              : 
    2077              : #[derive(Debug, PartialEq, Eq, Clone, Copy)]
    2078              : pub struct PagestreamGetSlruSegmentRequest {
    2079              :     pub hdr: PagestreamRequest,
    2080              :     pub kind: u8,
    2081              :     pub segno: u32,
    2082              : }
    2083              : 
    2084              : #[derive(Debug)]
    2085              : pub struct PagestreamExistsResponse {
    2086              :     pub req: PagestreamExistsRequest,
    2087              :     pub exists: bool,
    2088              : }
    2089              : 
    2090              : #[derive(Debug)]
    2091              : pub struct PagestreamNblocksResponse {
    2092              :     pub req: PagestreamNblocksRequest,
    2093              :     pub n_blocks: u32,
    2094              : }
    2095              : 
    2096              : #[derive(Debug)]
    2097              : pub struct PagestreamGetPageResponse {
    2098              :     pub req: PagestreamGetPageRequest,
    2099              :     pub page: Bytes,
    2100              : }
    2101              : 
    2102              : #[derive(Debug)]
    2103              : pub struct PagestreamGetSlruSegmentResponse {
    2104              :     pub req: PagestreamGetSlruSegmentRequest,
    2105              :     pub segment: Bytes,
    2106              : }
    2107              : 
    2108              : #[derive(Debug)]
    2109              : pub struct PagestreamErrorResponse {
    2110              :     pub req: PagestreamRequest,
    2111              :     pub message: String,
    2112              : }
    2113              : 
    2114              : #[derive(Debug)]
    2115              : pub struct PagestreamDbSizeResponse {
    2116              :     pub req: PagestreamDbSizeRequest,
    2117              :     pub db_size: i64,
    2118              : }
    2119              : 
    2120              : #[cfg(feature = "testing")]
    2121              : #[derive(Debug, PartialEq, Eq, Clone)]
    2122              : pub struct PagestreamTestRequest {
    2123              :     pub hdr: PagestreamRequest,
    2124              :     pub batch_key: u64,
    2125              :     pub message: String,
    2126              : }
    2127              : 
    2128              : #[cfg(feature = "testing")]
    2129              : #[derive(Debug)]
    2130              : pub struct PagestreamTestResponse {
    2131              :     pub req: PagestreamTestRequest,
    2132              : }
    2133              : 
    2134              : // This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
    2135              : // that require pageserver-internal types.  It is sufficient to get the total size.
    2136            0 : #[derive(Serialize, Deserialize, Debug)]
    2137              : pub struct TenantHistorySize {
    2138              :     pub id: TenantId,
    2139              :     /// Size is a mixture of WAL and logical size, so the unit is bytes.
    2140              :     ///
    2141              :     /// Will be none if `?inputs_only=true` was given.
    2142              :     pub size: Option<u64>,
    2143              : }
    2144              : 
    2145              : impl PagestreamFeMessage {
    2146              :     /// Serialize a compute -> pageserver message. This is currently only used in testing
    2147              :     /// tools. Always uses protocol version 3.
    2148            4 :     pub fn serialize(&self) -> Bytes {
    2149            4 :         let mut bytes = BytesMut::new();
    2150            4 : 
    2151            4 :         match self {
    2152            1 :             Self::Exists(req) => {
    2153            1 :                 bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
    2154            1 :                 bytes.put_u64(req.hdr.reqid);
    2155            1 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2156            1 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2157            1 :                 bytes.put_u32(req.rel.spcnode);
    2158            1 :                 bytes.put_u32(req.rel.dbnode);
    2159            1 :                 bytes.put_u32(req.rel.relnode);
    2160            1 :                 bytes.put_u8(req.rel.forknum);
    2161            1 :             }
    2162              : 
    2163            1 :             Self::Nblocks(req) => {
    2164            1 :                 bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
    2165            1 :                 bytes.put_u64(req.hdr.reqid);
    2166            1 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2167            1 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2168            1 :                 bytes.put_u32(req.rel.spcnode);
    2169            1 :                 bytes.put_u32(req.rel.dbnode);
    2170            1 :                 bytes.put_u32(req.rel.relnode);
    2171            1 :                 bytes.put_u8(req.rel.forknum);
    2172            1 :             }
    2173              : 
    2174            1 :             Self::GetPage(req) => {
    2175            1 :                 bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
    2176            1 :                 bytes.put_u64(req.hdr.reqid);
    2177            1 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2178            1 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2179            1 :                 bytes.put_u32(req.rel.spcnode);
    2180            1 :                 bytes.put_u32(req.rel.dbnode);
    2181            1 :                 bytes.put_u32(req.rel.relnode);
    2182            1 :                 bytes.put_u8(req.rel.forknum);
    2183            1 :                 bytes.put_u32(req.blkno);
    2184            1 :             }
    2185              : 
    2186            1 :             Self::DbSize(req) => {
    2187            1 :                 bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
    2188            1 :                 bytes.put_u64(req.hdr.reqid);
    2189            1 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2190            1 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2191            1 :                 bytes.put_u32(req.dbnode);
    2192            1 :             }
    2193              : 
    2194            0 :             Self::GetSlruSegment(req) => {
    2195            0 :                 bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
    2196            0 :                 bytes.put_u64(req.hdr.reqid);
    2197            0 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2198            0 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2199            0 :                 bytes.put_u8(req.kind);
    2200            0 :                 bytes.put_u32(req.segno);
    2201            0 :             }
    2202              :             #[cfg(feature = "testing")]
    2203            0 :             Self::Test(req) => {
    2204            0 :                 bytes.put_u8(PagestreamFeMessageTag::Test as u8);
    2205            0 :                 bytes.put_u64(req.hdr.reqid);
    2206            0 :                 bytes.put_u64(req.hdr.request_lsn.0);
    2207            0 :                 bytes.put_u64(req.hdr.not_modified_since.0);
    2208            0 :                 bytes.put_u64(req.batch_key);
    2209            0 :                 let message = req.message.as_bytes();
    2210            0 :                 bytes.put_u64(message.len() as u64);
    2211            0 :                 bytes.put_slice(message);
    2212            0 :             }
    2213              :         }
    2214              : 
    2215            4 :         bytes.into()
    2216            4 :     }
    2217              : 
    2218            4 :     pub fn parse<R: std::io::Read>(
    2219            4 :         body: &mut R,
    2220            4 :         protocol_version: PagestreamProtocolVersion,
    2221            4 :     ) -> anyhow::Result<PagestreamFeMessage> {
    2222              :         // these correspond to the NeonMessageTag enum in pagestore_client.h
    2223              :         //
    2224              :         // TODO: consider using protobuf or serde bincode for less error prone
    2225              :         // serialization.
    2226            4 :         let msg_tag = body.read_u8()?;
    2227            4 :         let (reqid, request_lsn, not_modified_since) = match protocol_version {
    2228              :             PagestreamProtocolVersion::V2 => (
    2229              :                 0,
    2230            0 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    2231            0 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    2232              :             ),
    2233              :             PagestreamProtocolVersion::V3 => (
    2234            4 :                 body.read_u64::<BigEndian>()?,
    2235            4 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    2236            4 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    2237              :             ),
    2238              :         };
    2239              : 
    2240            4 :         match PagestreamFeMessageTag::try_from(msg_tag)
    2241            4 :             .map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
    2242              :         {
    2243              :             PagestreamFeMessageTag::Exists => {
    2244              :                 Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
    2245            1 :                     hdr: PagestreamRequest {
    2246            1 :                         reqid,
    2247            1 :                         request_lsn,
    2248            1 :                         not_modified_since,
    2249            1 :                     },
    2250            1 :                     rel: RelTag {
    2251            1 :                         spcnode: body.read_u32::<BigEndian>()?,
    2252            1 :                         dbnode: body.read_u32::<BigEndian>()?,
    2253            1 :                         relnode: body.read_u32::<BigEndian>()?,
    2254            1 :                         forknum: body.read_u8()?,
    2255              :                     },
    2256              :                 }))
    2257              :             }
    2258              :             PagestreamFeMessageTag::Nblocks => {
    2259              :                 Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    2260            1 :                     hdr: PagestreamRequest {
    2261            1 :                         reqid,
    2262            1 :                         request_lsn,
    2263            1 :                         not_modified_since,
    2264            1 :                     },
    2265            1 :                     rel: RelTag {
    2266            1 :                         spcnode: body.read_u32::<BigEndian>()?,
    2267            1 :                         dbnode: body.read_u32::<BigEndian>()?,
    2268            1 :                         relnode: body.read_u32::<BigEndian>()?,
    2269            1 :                         forknum: body.read_u8()?,
    2270              :                     },
    2271              :                 }))
    2272              :             }
    2273              :             PagestreamFeMessageTag::GetPage => {
    2274              :                 Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    2275            1 :                     hdr: PagestreamRequest {
    2276            1 :                         reqid,
    2277            1 :                         request_lsn,
    2278            1 :                         not_modified_since,
    2279            1 :                     },
    2280            1 :                     rel: RelTag {
    2281            1 :                         spcnode: body.read_u32::<BigEndian>()?,
    2282            1 :                         dbnode: body.read_u32::<BigEndian>()?,
    2283            1 :                         relnode: body.read_u32::<BigEndian>()?,
    2284            1 :                         forknum: body.read_u8()?,
    2285              :                     },
    2286            1 :                     blkno: body.read_u32::<BigEndian>()?,
    2287              :                 }))
    2288              :             }
    2289              :             PagestreamFeMessageTag::DbSize => {
    2290              :                 Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    2291            1 :                     hdr: PagestreamRequest {
    2292            1 :                         reqid,
    2293            1 :                         request_lsn,
    2294            1 :                         not_modified_since,
    2295            1 :                     },
    2296            1 :                     dbnode: body.read_u32::<BigEndian>()?,
    2297              :                 }))
    2298              :             }
    2299              :             PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
    2300              :                 PagestreamGetSlruSegmentRequest {
    2301            0 :                     hdr: PagestreamRequest {
    2302            0 :                         reqid,
    2303            0 :                         request_lsn,
    2304            0 :                         not_modified_since,
    2305            0 :                     },
    2306            0 :                     kind: body.read_u8()?,
    2307            0 :                     segno: body.read_u32::<BigEndian>()?,
    2308              :                 },
    2309              :             )),
    2310              :             #[cfg(feature = "testing")]
    2311              :             PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
    2312            0 :                 hdr: PagestreamRequest {
    2313            0 :                     reqid,
    2314            0 :                     request_lsn,
    2315            0 :                     not_modified_since,
    2316            0 :                 },
    2317            0 :                 batch_key: body.read_u64::<BigEndian>()?,
    2318              :                 message: {
    2319            0 :                     let len = body.read_u64::<BigEndian>()?;
    2320            0 :                     let mut buf = vec![0; len as usize];
    2321            0 :                     body.read_exact(&mut buf)?;
    2322            0 :                     String::from_utf8(buf)?
    2323              :                 },
    2324              :             })),
    2325              :         }
    2326            4 :     }
    2327              : }
    2328              : 
    2329              : impl PagestreamBeMessage {
    2330            0 :     pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
    2331            0 :         let mut bytes = BytesMut::new();
    2332              : 
    2333              :         use PagestreamBeMessageTag as Tag;
    2334            0 :         match protocol_version {
    2335              :             PagestreamProtocolVersion::V2 => {
    2336            0 :                 match self {
    2337            0 :                     Self::Exists(resp) => {
    2338            0 :                         bytes.put_u8(Tag::Exists as u8);
    2339            0 :                         bytes.put_u8(resp.exists as u8);
    2340            0 :                     }
    2341              : 
    2342            0 :                     Self::Nblocks(resp) => {
    2343            0 :                         bytes.put_u8(Tag::Nblocks as u8);
    2344            0 :                         bytes.put_u32(resp.n_blocks);
    2345            0 :                     }
    2346              : 
    2347            0 :                     Self::GetPage(resp) => {
    2348            0 :                         bytes.put_u8(Tag::GetPage as u8);
    2349            0 :                         bytes.put(&resp.page[..])
    2350              :                     }
    2351              : 
    2352            0 :                     Self::Error(resp) => {
    2353            0 :                         bytes.put_u8(Tag::Error as u8);
    2354            0 :                         bytes.put(resp.message.as_bytes());
    2355            0 :                         bytes.put_u8(0); // null terminator
    2356            0 :                     }
    2357            0 :                     Self::DbSize(resp) => {
    2358            0 :                         bytes.put_u8(Tag::DbSize as u8);
    2359            0 :                         bytes.put_i64(resp.db_size);
    2360            0 :                     }
    2361              : 
    2362            0 :                     Self::GetSlruSegment(resp) => {
    2363            0 :                         bytes.put_u8(Tag::GetSlruSegment as u8);
    2364            0 :                         bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
    2365            0 :                         bytes.put(&resp.segment[..]);
    2366            0 :                     }
    2367              : 
    2368              :                     #[cfg(feature = "testing")]
    2369            0 :                     Self::Test(resp) => {
    2370            0 :                         bytes.put_u8(Tag::Test as u8);
    2371            0 :                         bytes.put_u64(resp.req.batch_key);
    2372            0 :                         let message = resp.req.message.as_bytes();
    2373            0 :                         bytes.put_u64(message.len() as u64);
    2374            0 :                         bytes.put_slice(message);
    2375            0 :                     }
    2376              :                 }
    2377              :             }
    2378              :             PagestreamProtocolVersion::V3 => {
    2379            0 :                 match self {
    2380            0 :                     Self::Exists(resp) => {
    2381            0 :                         bytes.put_u8(Tag::Exists as u8);
    2382            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2383            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2384            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2385            0 :                         bytes.put_u32(resp.req.rel.spcnode);
    2386            0 :                         bytes.put_u32(resp.req.rel.dbnode);
    2387            0 :                         bytes.put_u32(resp.req.rel.relnode);
    2388            0 :                         bytes.put_u8(resp.req.rel.forknum);
    2389            0 :                         bytes.put_u8(resp.exists as u8);
    2390            0 :                     }
    2391              : 
    2392            0 :                     Self::Nblocks(resp) => {
    2393            0 :                         bytes.put_u8(Tag::Nblocks as u8);
    2394            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2395            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2396            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2397            0 :                         bytes.put_u32(resp.req.rel.spcnode);
    2398            0 :                         bytes.put_u32(resp.req.rel.dbnode);
    2399            0 :                         bytes.put_u32(resp.req.rel.relnode);
    2400            0 :                         bytes.put_u8(resp.req.rel.forknum);
    2401            0 :                         bytes.put_u32(resp.n_blocks);
    2402            0 :                     }
    2403              : 
    2404            0 :                     Self::GetPage(resp) => {
    2405            0 :                         bytes.put_u8(Tag::GetPage as u8);
    2406            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2407            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2408            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2409            0 :                         bytes.put_u32(resp.req.rel.spcnode);
    2410            0 :                         bytes.put_u32(resp.req.rel.dbnode);
    2411            0 :                         bytes.put_u32(resp.req.rel.relnode);
    2412            0 :                         bytes.put_u8(resp.req.rel.forknum);
    2413            0 :                         bytes.put_u32(resp.req.blkno);
    2414            0 :                         bytes.put(&resp.page[..])
    2415              :                     }
    2416              : 
    2417            0 :                     Self::Error(resp) => {
    2418            0 :                         bytes.put_u8(Tag::Error as u8);
    2419            0 :                         bytes.put_u64(resp.req.reqid);
    2420            0 :                         bytes.put_u64(resp.req.request_lsn.0);
    2421            0 :                         bytes.put_u64(resp.req.not_modified_since.0);
    2422            0 :                         bytes.put(resp.message.as_bytes());
    2423            0 :                         bytes.put_u8(0); // null terminator
    2424            0 :                     }
    2425            0 :                     Self::DbSize(resp) => {
    2426            0 :                         bytes.put_u8(Tag::DbSize as u8);
    2427            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2428            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2429            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2430            0 :                         bytes.put_u32(resp.req.dbnode);
    2431            0 :                         bytes.put_i64(resp.db_size);
    2432            0 :                     }
    2433              : 
    2434            0 :                     Self::GetSlruSegment(resp) => {
    2435            0 :                         bytes.put_u8(Tag::GetSlruSegment as u8);
    2436            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2437            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2438            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2439            0 :                         bytes.put_u8(resp.req.kind);
    2440            0 :                         bytes.put_u32(resp.req.segno);
    2441            0 :                         bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
    2442            0 :                         bytes.put(&resp.segment[..]);
    2443            0 :                     }
    2444              : 
    2445              :                     #[cfg(feature = "testing")]
    2446            0 :                     Self::Test(resp) => {
    2447            0 :                         bytes.put_u8(Tag::Test as u8);
    2448            0 :                         bytes.put_u64(resp.req.hdr.reqid);
    2449            0 :                         bytes.put_u64(resp.req.hdr.request_lsn.0);
    2450            0 :                         bytes.put_u64(resp.req.hdr.not_modified_since.0);
    2451            0 :                         bytes.put_u64(resp.req.batch_key);
    2452            0 :                         let message = resp.req.message.as_bytes();
    2453            0 :                         bytes.put_u64(message.len() as u64);
    2454            0 :                         bytes.put_slice(message);
    2455            0 :                     }
    2456              :                 }
    2457              :             }
    2458              :         }
    2459            0 :         bytes.into()
    2460            0 :     }
    2461              : 
    2462            0 :     pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
    2463            0 :         let mut buf = buf.reader();
    2464            0 :         let msg_tag = buf.read_u8()?;
    2465              : 
    2466              :         use PagestreamBeMessageTag as Tag;
    2467            0 :         let ok =
    2468            0 :             match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
    2469              :                 Tag::Exists => {
    2470            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2471            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2472            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2473            0 :                     let rel = RelTag {
    2474            0 :                         spcnode: buf.read_u32::<BigEndian>()?,
    2475            0 :                         dbnode: buf.read_u32::<BigEndian>()?,
    2476            0 :                         relnode: buf.read_u32::<BigEndian>()?,
    2477            0 :                         forknum: buf.read_u8()?,
    2478              :                     };
    2479            0 :                     let exists = buf.read_u8()? != 0;
    2480            0 :                     Self::Exists(PagestreamExistsResponse {
    2481            0 :                         req: PagestreamExistsRequest {
    2482            0 :                             hdr: PagestreamRequest {
    2483            0 :                                 reqid,
    2484            0 :                                 request_lsn,
    2485            0 :                                 not_modified_since,
    2486            0 :                             },
    2487            0 :                             rel,
    2488            0 :                         },
    2489            0 :                         exists,
    2490            0 :                     })
    2491              :                 }
    2492              :                 Tag::Nblocks => {
    2493            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2494            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2495            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2496            0 :                     let rel = RelTag {
    2497            0 :                         spcnode: buf.read_u32::<BigEndian>()?,
    2498            0 :                         dbnode: buf.read_u32::<BigEndian>()?,
    2499            0 :                         relnode: buf.read_u32::<BigEndian>()?,
    2500            0 :                         forknum: buf.read_u8()?,
    2501              :                     };
    2502            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    2503            0 :                     Self::Nblocks(PagestreamNblocksResponse {
    2504            0 :                         req: PagestreamNblocksRequest {
    2505            0 :                             hdr: PagestreamRequest {
    2506            0 :                                 reqid,
    2507            0 :                                 request_lsn,
    2508            0 :                                 not_modified_since,
    2509            0 :                             },
    2510            0 :                             rel,
    2511            0 :                         },
    2512            0 :                         n_blocks,
    2513            0 :                     })
    2514              :                 }
    2515              :                 Tag::GetPage => {
    2516            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2517            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2518            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2519            0 :                     let rel = RelTag {
    2520            0 :                         spcnode: buf.read_u32::<BigEndian>()?,
    2521            0 :                         dbnode: buf.read_u32::<BigEndian>()?,
    2522            0 :                         relnode: buf.read_u32::<BigEndian>()?,
    2523            0 :                         forknum: buf.read_u8()?,
    2524              :                     };
    2525            0 :                     let blkno = buf.read_u32::<BigEndian>()?;
    2526            0 :                     let mut page = vec![0; 8192]; // TODO: use MaybeUninit
    2527            0 :                     buf.read_exact(&mut page)?;
    2528            0 :                     Self::GetPage(PagestreamGetPageResponse {
    2529            0 :                         req: PagestreamGetPageRequest {
    2530            0 :                             hdr: PagestreamRequest {
    2531            0 :                                 reqid,
    2532            0 :                                 request_lsn,
    2533            0 :                                 not_modified_since,
    2534            0 :                             },
    2535            0 :                             rel,
    2536            0 :                             blkno,
    2537            0 :                         },
    2538            0 :                         page: page.into(),
    2539            0 :                     })
    2540              :                 }
    2541              :                 Tag::Error => {
    2542            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2543            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2544            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2545            0 :                     let mut msg = Vec::new();
    2546            0 :                     buf.read_until(0, &mut msg)?;
    2547            0 :                     let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
    2548            0 :                     let rust_str = cstring.to_str()?;
    2549            0 :                     Self::Error(PagestreamErrorResponse {
    2550            0 :                         req: PagestreamRequest {
    2551            0 :                             reqid,
    2552            0 :                             request_lsn,
    2553            0 :                             not_modified_since,
    2554            0 :                         },
    2555            0 :                         message: rust_str.to_owned(),
    2556            0 :                     })
    2557              :                 }
    2558              :                 Tag::DbSize => {
    2559            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2560            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2561            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2562            0 :                     let dbnode = buf.read_u32::<BigEndian>()?;
    2563            0 :                     let db_size = buf.read_i64::<BigEndian>()?;
    2564            0 :                     Self::DbSize(PagestreamDbSizeResponse {
    2565            0 :                         req: PagestreamDbSizeRequest {
    2566            0 :                             hdr: PagestreamRequest {
    2567            0 :                                 reqid,
    2568            0 :                                 request_lsn,
    2569            0 :                                 not_modified_since,
    2570            0 :                             },
    2571            0 :                             dbnode,
    2572            0 :                         },
    2573            0 :                         db_size,
    2574            0 :                     })
    2575              :                 }
    2576              :                 Tag::GetSlruSegment => {
    2577            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2578            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2579            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2580            0 :                     let kind = buf.read_u8()?;
    2581            0 :                     let segno = buf.read_u32::<BigEndian>()?;
    2582            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    2583            0 :                     let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
    2584            0 :                     buf.read_exact(&mut segment)?;
    2585            0 :                     Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
    2586            0 :                         req: PagestreamGetSlruSegmentRequest {
    2587            0 :                             hdr: PagestreamRequest {
    2588            0 :                                 reqid,
    2589            0 :                                 request_lsn,
    2590            0 :                                 not_modified_since,
    2591            0 :                             },
    2592            0 :                             kind,
    2593            0 :                             segno,
    2594            0 :                         },
    2595            0 :                         segment: segment.into(),
    2596            0 :                     })
    2597              :                 }
    2598              :                 #[cfg(feature = "testing")]
    2599              :                 Tag::Test => {
    2600            0 :                     let reqid = buf.read_u64::<BigEndian>()?;
    2601            0 :                     let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
    2602            0 :                     let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
    2603            0 :                     let batch_key = buf.read_u64::<BigEndian>()?;
    2604            0 :                     let len = buf.read_u64::<BigEndian>()?;
    2605            0 :                     let mut msg = vec![0; len as usize];
    2606            0 :                     buf.read_exact(&mut msg)?;
    2607            0 :                     let message = String::from_utf8(msg)?;
    2608            0 :                     Self::Test(PagestreamTestResponse {
    2609            0 :                         req: PagestreamTestRequest {
    2610            0 :                             hdr: PagestreamRequest {
    2611            0 :                                 reqid,
    2612            0 :                                 request_lsn,
    2613            0 :                                 not_modified_since,
    2614            0 :                             },
    2615            0 :                             batch_key,
    2616            0 :                             message,
    2617            0 :                         },
    2618            0 :                     })
    2619              :                 }
    2620              :             };
    2621            0 :         let remaining = buf.into_inner();
    2622            0 :         if !remaining.is_empty() {
    2623            0 :             anyhow::bail!(
    2624            0 :                 "remaining bytes in msg with tag={msg_tag}: {}",
    2625            0 :                 remaining.len()
    2626            0 :             );
    2627            0 :         }
    2628            0 :         Ok(ok)
    2629            0 :     }
    2630              : 
    2631            0 :     pub fn kind(&self) -> &'static str {
    2632            0 :         match self {
    2633            0 :             Self::Exists(_) => "Exists",
    2634            0 :             Self::Nblocks(_) => "Nblocks",
    2635            0 :             Self::GetPage(_) => "GetPage",
    2636            0 :             Self::Error(_) => "Error",
    2637            0 :             Self::DbSize(_) => "DbSize",
    2638            0 :             Self::GetSlruSegment(_) => "GetSlruSegment",
    2639              :             #[cfg(feature = "testing")]
    2640            0 :             Self::Test(_) => "Test",
    2641              :         }
    2642            0 :     }
    2643              : }
    2644              : 
    2645            0 : #[derive(Debug, Serialize, Deserialize)]
    2646              : pub struct PageTraceEvent {
    2647              :     pub key: CompactKey,
    2648              :     pub effective_lsn: Lsn,
    2649              :     pub time: SystemTime,
    2650              : }
    2651              : 
    2652              : impl Default for PageTraceEvent {
    2653            0 :     fn default() -> Self {
    2654            0 :         Self {
    2655            0 :             key: Default::default(),
    2656            0 :             effective_lsn: Default::default(),
    2657            0 :             time: std::time::UNIX_EPOCH,
    2658            0 :         }
    2659            0 :     }
    2660              : }
    2661              : 
    2662              : #[cfg(test)]
    2663              : mod tests {
    2664              :     use std::str::FromStr;
    2665              : 
    2666              :     use serde_json::json;
    2667              : 
    2668              :     use super::*;
    2669              : 
    2670              :     #[test]
    2671            1 :     fn test_pagestream() {
    2672            1 :         // Test serialization/deserialization of PagestreamFeMessage
    2673            1 :         let messages = vec![
    2674            1 :             PagestreamFeMessage::Exists(PagestreamExistsRequest {
    2675            1 :                 hdr: PagestreamRequest {
    2676            1 :                     reqid: 0,
    2677            1 :                     request_lsn: Lsn(4),
    2678            1 :                     not_modified_since: Lsn(3),
    2679            1 :                 },
    2680            1 :                 rel: RelTag {
    2681            1 :                     forknum: 1,
    2682            1 :                     spcnode: 2,
    2683            1 :                     dbnode: 3,
    2684            1 :                     relnode: 4,
    2685            1 :                 },
    2686            1 :             }),
    2687            1 :             PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    2688            1 :                 hdr: PagestreamRequest {
    2689            1 :                     reqid: 0,
    2690            1 :                     request_lsn: Lsn(4),
    2691            1 :                     not_modified_since: Lsn(4),
    2692            1 :                 },
    2693            1 :                 rel: RelTag {
    2694            1 :                     forknum: 1,
    2695            1 :                     spcnode: 2,
    2696            1 :                     dbnode: 3,
    2697            1 :                     relnode: 4,
    2698            1 :                 },
    2699            1 :             }),
    2700            1 :             PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    2701            1 :                 hdr: PagestreamRequest {
    2702            1 :                     reqid: 0,
    2703            1 :                     request_lsn: Lsn(4),
    2704            1 :                     not_modified_since: Lsn(3),
    2705            1 :                 },
    2706            1 :                 rel: RelTag {
    2707            1 :                     forknum: 1,
    2708            1 :                     spcnode: 2,
    2709            1 :                     dbnode: 3,
    2710            1 :                     relnode: 4,
    2711            1 :                 },
    2712            1 :                 blkno: 7,
    2713            1 :             }),
    2714            1 :             PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    2715            1 :                 hdr: PagestreamRequest {
    2716            1 :                     reqid: 0,
    2717            1 :                     request_lsn: Lsn(4),
    2718            1 :                     not_modified_since: Lsn(3),
    2719            1 :                 },
    2720            1 :                 dbnode: 7,
    2721            1 :             }),
    2722            1 :         ];
    2723            5 :         for msg in messages {
    2724            4 :             let bytes = msg.serialize();
    2725            4 :             let reconstructed =
    2726            4 :                 PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
    2727            4 :                     .unwrap();
    2728            4 :             assert!(msg == reconstructed);
    2729              :         }
    2730            1 :     }
    2731              : 
    2732              :     #[test]
    2733            1 :     fn test_tenantinfo_serde() {
    2734            1 :         // Test serialization/deserialization of TenantInfo
    2735            1 :         let original_active = TenantInfo {
    2736            1 :             id: TenantShardId::unsharded(TenantId::generate()),
    2737            1 :             state: TenantState::Active,
    2738            1 :             current_physical_size: Some(42),
    2739            1 :             attachment_status: TenantAttachmentStatus::Attached,
    2740            1 :             generation: 1,
    2741            1 :             gc_blocking: None,
    2742            1 :         };
    2743            1 :         let expected_active = json!({
    2744            1 :             "id": original_active.id.to_string(),
    2745            1 :             "state": {
    2746            1 :                 "slug": "Active",
    2747            1 :             },
    2748            1 :             "current_physical_size": 42,
    2749            1 :             "attachment_status": {
    2750            1 :                 "slug":"attached",
    2751            1 :             },
    2752            1 :             "generation" : 1
    2753            1 :         });
    2754            1 : 
    2755            1 :         let original_broken = TenantInfo {
    2756            1 :             id: TenantShardId::unsharded(TenantId::generate()),
    2757            1 :             state: TenantState::Broken {
    2758            1 :                 reason: "reason".into(),
    2759            1 :                 backtrace: "backtrace info".into(),
    2760            1 :             },
    2761            1 :             current_physical_size: Some(42),
    2762            1 :             attachment_status: TenantAttachmentStatus::Attached,
    2763            1 :             generation: 1,
    2764            1 :             gc_blocking: None,
    2765            1 :         };
    2766            1 :         let expected_broken = json!({
    2767            1 :             "id": original_broken.id.to_string(),
    2768            1 :             "state": {
    2769            1 :                 "slug": "Broken",
    2770            1 :                 "data": {
    2771            1 :                     "backtrace": "backtrace info",
    2772            1 :                     "reason": "reason",
    2773            1 :                 }
    2774            1 :             },
    2775            1 :             "current_physical_size": 42,
    2776            1 :             "attachment_status": {
    2777            1 :                 "slug":"attached",
    2778            1 :             },
    2779            1 :             "generation" : 1
    2780            1 :         });
    2781            1 : 
    2782            1 :         assert_eq!(
    2783            1 :             serde_json::to_value(&original_active).unwrap(),
    2784            1 :             expected_active
    2785            1 :         );
    2786              : 
    2787            1 :         assert_eq!(
    2788            1 :             serde_json::to_value(&original_broken).unwrap(),
    2789            1 :             expected_broken
    2790            1 :         );
    2791            1 :         assert!(format!("{:?}", &original_broken.state).contains("reason"));
    2792            1 :         assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    2793            1 :     }
    2794              : 
    2795              :     #[test]
    2796            1 :     fn test_reject_unknown_field() {
    2797            1 :         let id = TenantId::generate();
    2798            1 :         let config_request = json!({
    2799            1 :             "tenant_id": id.to_string(),
    2800            1 :             "unknown_field": "unknown_value".to_string(),
    2801            1 :         });
    2802            1 :         let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
    2803            1 :         assert!(
    2804            1 :             err.to_string().contains("unknown field `unknown_field`"),
    2805            0 :             "expect unknown field `unknown_field` error, got: {}",
    2806              :             err
    2807              :         );
    2808            1 :     }
    2809              : 
    2810              :     #[test]
    2811            1 :     fn tenantstatus_activating_serde() {
    2812            1 :         let states = [TenantState::Activating(ActivatingFrom::Attaching)];
    2813            1 :         let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
    2814            1 : 
    2815            1 :         let actual = serde_json::to_string(&states).unwrap();
    2816            1 : 
    2817            1 :         assert_eq!(actual, expected);
    2818              : 
    2819            1 :         let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
    2820            1 : 
    2821            1 :         assert_eq!(states.as_slice(), &parsed);
    2822            1 :     }
    2823              : 
    2824              :     #[test]
    2825            1 :     fn tenantstatus_activating_strum() {
    2826            1 :         // tests added, because we use these for metrics
    2827            1 :         let examples = [
    2828            1 :             (line!(), TenantState::Attaching, "Attaching"),
    2829            1 :             (
    2830            1 :                 line!(),
    2831            1 :                 TenantState::Activating(ActivatingFrom::Attaching),
    2832            1 :                 "Activating",
    2833            1 :             ),
    2834            1 :             (line!(), TenantState::Active, "Active"),
    2835            1 :             (
    2836            1 :                 line!(),
    2837            1 :                 TenantState::Stopping { progress: None },
    2838            1 :                 "Stopping",
    2839            1 :             ),
    2840            1 :             (
    2841            1 :                 line!(),
    2842            1 :                 TenantState::Stopping {
    2843            1 :                     progress: Some(completion::Barrier::default()),
    2844            1 :                 },
    2845            1 :                 "Stopping",
    2846            1 :             ),
    2847            1 :             (
    2848            1 :                 line!(),
    2849            1 :                 TenantState::Broken {
    2850            1 :                     reason: "Example".into(),
    2851            1 :                     backtrace: "Looooong backtrace".into(),
    2852            1 :                 },
    2853            1 :                 "Broken",
    2854            1 :             ),
    2855            1 :         ];
    2856              : 
    2857            7 :         for (line, rendered, expected) in examples {
    2858            6 :             let actual: &'static str = rendered.into();
    2859            6 :             assert_eq!(actual, expected, "example on {line}");
    2860              :         }
    2861            1 :     }
    2862              : 
    2863              :     #[test]
    2864            1 :     fn test_image_compression_algorithm_parsing() {
    2865              :         use ImageCompressionAlgorithm::*;
    2866            1 :         let cases = [
    2867            1 :             ("disabled", Disabled),
    2868            1 :             ("zstd", Zstd { level: None }),
    2869            1 :             ("zstd(18)", Zstd { level: Some(18) }),
    2870            1 :             ("zstd(-3)", Zstd { level: Some(-3) }),
    2871            1 :         ];
    2872              : 
    2873            5 :         for (display, expected) in cases {
    2874            4 :             assert_eq!(
    2875            4 :                 ImageCompressionAlgorithm::from_str(display).unwrap(),
    2876              :                 expected,
    2877            0 :                 "parsing works"
    2878              :             );
    2879            4 :             assert_eq!(format!("{expected}"), display, "Display FromStr roundtrip");
    2880              : 
    2881            4 :             let ser = serde_json::to_string(&expected).expect("serialization");
    2882            4 :             assert_eq!(
    2883            4 :                 serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),
    2884              :                 expected,
    2885            0 :                 "serde roundtrip"
    2886              :             );
    2887              : 
    2888            4 :             assert_eq!(
    2889            4 :                 serde_json::Value::String(display.to_string()),
    2890            4 :                 serde_json::to_value(expected).unwrap(),
    2891            0 :                 "Display is the serde serialization"
    2892              :             );
    2893              :         }
    2894            1 :     }
    2895              : 
    2896              :     #[test]
    2897            1 :     fn test_tenant_config_patch_request_serde() {
    2898            1 :         let patch_request = TenantConfigPatchRequest {
    2899            1 :             tenant_id: TenantId::from_str("17c6d121946a61e5ab0fe5a2fd4d8215").unwrap(),
    2900            1 :             config: TenantConfigPatch {
    2901            1 :                 checkpoint_distance: FieldPatch::Upsert(42),
    2902            1 :                 gc_horizon: FieldPatch::Remove,
    2903            1 :                 compaction_threshold: FieldPatch::Noop,
    2904            1 :                 ..TenantConfigPatch::default()
    2905            1 :             },
    2906            1 :         };
    2907            1 : 
    2908            1 :         let json = serde_json::to_string(&patch_request).unwrap();
    2909            1 : 
    2910            1 :         let expected = r#"{"tenant_id":"17c6d121946a61e5ab0fe5a2fd4d8215","checkpoint_distance":42,"gc_horizon":null}"#;
    2911            1 :         assert_eq!(json, expected);
    2912              : 
    2913            1 :         let decoded: TenantConfigPatchRequest = serde_json::from_str(&json).unwrap();
    2914            1 :         assert_eq!(decoded.tenant_id, patch_request.tenant_id);
    2915            1 :         assert_eq!(decoded.config, patch_request.config);
    2916              : 
    2917              :         // Now apply the patch to a config to demonstrate semantics
    2918              : 
    2919            1 :         let base = TenantConfig {
    2920            1 :             checkpoint_distance: Some(28),
    2921            1 :             gc_horizon: Some(100),
    2922            1 :             compaction_target_size: Some(1024),
    2923            1 :             ..Default::default()
    2924            1 :         };
    2925            1 : 
    2926            1 :         let expected = TenantConfig {
    2927            1 :             checkpoint_distance: Some(42),
    2928            1 :             gc_horizon: None,
    2929            1 :             ..base.clone()
    2930            1 :         };
    2931            1 : 
    2932            1 :         let patched = base.apply_patch(decoded.config).unwrap();
    2933            1 : 
    2934            1 :         assert_eq!(patched, expected);
    2935            1 :     }
    2936              : }
        

Generated by: LCOV version 2.1-beta