LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - models.rs (source / functions) Coverage Total Hit
Test: 3eba1babe267649f8cebefc91c236589db030548.info Lines: 56.4 % 544 307
Test Date: 2024-11-22 12:36:12 Functions: 3.7 % 1039 38

            Line data    Source code
       1              : pub mod detach_ancestor;
       2              : pub mod partitioning;
       3              : pub mod utilization;
       4              : 
       5              : pub use utilization::PageserverUtilization;
       6              : 
       7              : use std::{
       8              :     collections::HashMap,
       9              :     fmt::Display,
      10              :     io::{BufRead, Read},
      11              :     num::{NonZeroU32, NonZeroU64, NonZeroUsize},
      12              :     str::FromStr,
      13              :     time::{Duration, SystemTime},
      14              : };
      15              : 
      16              : use byteorder::{BigEndian, ReadBytesExt};
      17              : use postgres_ffi::BLCKSZ;
      18              : use serde::{Deserialize, Serialize};
      19              : use serde_with::serde_as;
      20              : use utils::{
      21              :     completion,
      22              :     id::{NodeId, TenantId, TimelineId},
      23              :     lsn::Lsn,
      24              :     serde_system_time,
      25              : };
      26              : 
      27              : use crate::{
      28              :     reltag::RelTag,
      29              :     shard::{ShardCount, ShardStripeSize, TenantShardId},
      30              : };
      31              : use anyhow::bail;
      32              : use bytes::{Buf, BufMut, Bytes, BytesMut};
      33              : 
      34              : /// The state of a tenant in this pageserver.
      35              : ///
      36              : /// ```mermaid
      37              : /// stateDiagram-v2
      38              : ///
      39              : ///     [*] --> Attaching: spawn_attach()
      40              : ///
      41              : ///     Attaching --> Activating: activate()
      42              : ///     Activating --> Active: infallible
      43              : ///
      44              : ///     Attaching --> Broken: attach() failure
      45              : ///
      46              : ///     Active --> Stopping: set_stopping(), part of shutdown & detach
      47              : ///     Stopping --> Broken: late error in remove_tenant_from_memory
      48              : ///
      49              : ///     Broken --> [*]: ignore / detach / shutdown
      50              : ///     Stopping --> [*]: remove_from_memory complete
      51              : ///
      52              : ///     Active --> Broken: cfg(testing)-only tenant break point
      53              : /// ```
      54              : #[derive(
      55              :     Clone,
      56              :     PartialEq,
      57              :     Eq,
      58            1 :     serde::Serialize,
      59            3 :     serde::Deserialize,
      60            0 :     strum_macros::Display,
      61              :     strum_macros::VariantNames,
      62            0 :     strum_macros::AsRefStr,
      63          389 :     strum_macros::IntoStaticStr,
      64              : )]
      65              : #[serde(tag = "slug", content = "data")]
      66              : pub enum TenantState {
      67              :     /// This tenant is being attached to the pageserver.
      68              :     ///
      69              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      70              :     Attaching,
      71              :     /// The tenant is transitioning from Loading/Attaching to Active.
      72              :     ///
      73              :     /// While in this state, the individual timelines are being activated.
      74              :     ///
      75              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      76              :     Activating(ActivatingFrom),
      77              :     /// The tenant has finished activating and is open for business.
      78              :     ///
      79              :     /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
      80              :     Active,
      81              :     /// The tenant is recognized by pageserver, but it is being detached or the
      82              :     /// system is being shut down.
      83              :     ///
      84              :     /// Transitions out of this state are possible through `set_broken()`.
      85              :     Stopping {
      86              :         // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
      87              :         // otherwise it will not be skipped during deserialization
      88              :         #[serde(skip)]
      89              :         progress: completion::Barrier,
      90              :     },
      91              :     /// The tenant is recognized by the pageserver, but can no longer be used for
      92              :     /// any operations.
      93              :     ///
      94              :     /// If the tenant fails to load or attach, it will transition to this state
      95              :     /// and it is guaranteed that no background tasks are running in its name.
      96              :     ///
      97              :     /// The other way to transition into this state is from `Stopping` state
      98              :     /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
      99              :     /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
     100              :     Broken { reason: String, backtrace: String },
     101              : }
     102              : 
     103              : impl TenantState {
     104            0 :     pub fn attachment_status(&self) -> TenantAttachmentStatus {
     105              :         use TenantAttachmentStatus::*;
     106              : 
     107              :         // Below TenantState::Activating is used as "transient" or "transparent" state for
     108              :         // attachment_status determining.
     109            0 :         match self {
     110              :             // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
     111              :             // So, technically, we can return Attached here.
     112              :             // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
     113              :             // But, our attach task might still be fetching the remote timelines, etc.
     114              :             // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
     115            0 :             Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
     116              :             // We only reach Active after successful load / attach.
     117              :             // So, call atttachment status Attached.
     118            0 :             Self::Active => Attached,
     119              :             // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
     120              :             // However, it also becomes Broken if the regular load fails.
     121              :             // From Console's perspective there's no practical difference
     122              :             // because attachment_status is polled by console only during attach operation execution.
     123            0 :             Self::Broken { reason, .. } => Failed {
     124            0 :                 reason: reason.to_owned(),
     125            0 :             },
     126              :             // Why is Stopping a Maybe case? Because, during pageserver shutdown,
     127              :             // we set the Stopping state irrespective of whether the tenant
     128              :             // has finished attaching or not.
     129            0 :             Self::Stopping { .. } => Maybe,
     130              :         }
     131            0 :     }
     132              : 
     133            0 :     pub fn broken_from_reason(reason: String) -> Self {
     134            0 :         let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture());
     135            0 :         Self::Broken {
     136            0 :             reason,
     137            0 :             backtrace: backtrace_str,
     138            0 :         }
     139            0 :     }
     140              : }
     141              : 
     142              : impl std::fmt::Debug for TenantState {
     143            2 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     144            2 :         match self {
     145            2 :             Self::Broken { reason, backtrace } if !reason.is_empty() => {
     146            2 :                 write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}")
     147              :             }
     148            0 :             _ => write!(f, "{self}"),
     149              :         }
     150            2 :     }
     151              : }
     152              : 
     153              : /// A temporary lease to a specific lsn inside a timeline.
     154              : /// Access to the lsn is guaranteed by the pageserver until the expiration indicated by `valid_until`.
     155              : #[serde_as]
     156            0 : #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
     157              : pub struct LsnLease {
     158              :     #[serde_as(as = "SystemTimeAsRfc3339Millis")]
     159              :     pub valid_until: SystemTime,
     160              : }
     161              : 
     162              : serde_with::serde_conv!(
     163              :     SystemTimeAsRfc3339Millis,
     164              :     SystemTime,
     165            0 :     |time: &SystemTime| humantime::format_rfc3339_millis(*time).to_string(),
     166            0 :     |value: String| -> Result<_, humantime::TimestampError> { humantime::parse_rfc3339(&value) }
     167              : );
     168              : 
     169              : impl LsnLease {
     170              :     /// The default length for an explicit LSN lease request (10 minutes).
     171              :     pub const DEFAULT_LENGTH: Duration = Duration::from_secs(10 * 60);
     172              : 
     173              :     /// The default length for an implicit LSN lease granted during
     174              :     /// `get_lsn_by_timestamp` request (1 minutes).
     175              :     pub const DEFAULT_LENGTH_FOR_TS: Duration = Duration::from_secs(60);
     176              : 
     177              :     /// Checks whether the lease is expired.
     178            6 :     pub fn is_expired(&self, now: &SystemTime) -> bool {
     179            6 :         now > &self.valid_until
     180            6 :     }
     181              : }
     182              : 
     183              : /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
     184              : ///
     185              : /// XXX: We used to have more variants here, but now it's just one, which makes this rather
     186              : /// useless. Remove, once we've checked that there's no client code left that looks at this.
     187            2 : #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     188              : pub enum ActivatingFrom {
     189              :     /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
     190              :     Attaching,
     191              : }
     192              : 
     193              : /// A state of a timeline in pageserver's memory.
     194            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     195              : pub enum TimelineState {
     196              :     /// The timeline is recognized by the pageserver but is not yet operational.
     197              :     /// In particular, the walreceiver connection loop is not running for this timeline.
     198              :     /// It will eventually transition to state Active or Broken.
     199              :     Loading,
     200              :     /// The timeline is fully operational.
     201              :     /// It can be queried, and the walreceiver connection loop is running.
     202              :     Active,
     203              :     /// The timeline was previously Loading or Active but is shutting down.
     204              :     /// It cannot transition back into any other state.
     205              :     Stopping,
     206              :     /// The timeline is broken and not operational (previous states: Loading or Active).
     207              :     Broken { reason: String, backtrace: String },
     208              : }
     209              : 
     210            0 : #[derive(Serialize, Deserialize, Clone)]
     211              : pub struct TimelineCreateRequest {
     212              :     pub new_timeline_id: TimelineId,
     213              :     #[serde(flatten)]
     214              :     pub mode: TimelineCreateRequestMode,
     215              : }
     216              : 
     217            0 : #[derive(Serialize, Deserialize, Clone)]
     218              : #[serde(untagged)]
     219              : pub enum TimelineCreateRequestMode {
     220              :     Branch {
     221              :         ancestor_timeline_id: TimelineId,
     222              :         #[serde(default)]
     223              :         ancestor_start_lsn: Option<Lsn>,
     224              :         // TODO: cplane sets this, but, the branching code always
     225              :         // inherits the ancestor's pg_version. Earlier code wasn't
     226              :         // using a flattened enum, so, it was an accepted field, and
     227              :         // we continue to accept it by having it here.
     228              :         pg_version: Option<u32>,
     229              :     },
     230              :     // NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
     231              :     // (serde picks the first matching enum variant, in declaration order).
     232              :     Bootstrap {
     233              :         #[serde(default)]
     234              :         existing_initdb_timeline_id: Option<TimelineId>,
     235              :         pg_version: Option<u32>,
     236              :     },
     237              : }
     238              : 
     239            0 : #[derive(Serialize, Deserialize, Clone)]
     240              : pub struct LsnLeaseRequest {
     241              :     pub lsn: Lsn,
     242              : }
     243              : 
     244            0 : #[derive(Serialize, Deserialize)]
     245              : pub struct TenantShardSplitRequest {
     246              :     pub new_shard_count: u8,
     247              : 
     248              :     // A tenant's stripe size is only meaningful the first time their shard count goes
     249              :     // above 1: therefore during a split from 1->N shards, we may modify the stripe size.
     250              :     //
     251              :     // If this is set while the stripe count is being increased from an already >1 value,
     252              :     // then the request will fail with 400.
     253              :     pub new_stripe_size: Option<ShardStripeSize>,
     254              : }
     255              : 
     256            0 : #[derive(Serialize, Deserialize)]
     257              : pub struct TenantShardSplitResponse {
     258              :     pub new_shards: Vec<TenantShardId>,
     259              : }
     260              : 
     261              : /// Parameters that apply to all shards in a tenant.  Used during tenant creation.
     262            0 : #[derive(Serialize, Deserialize, Debug)]
     263              : #[serde(deny_unknown_fields)]
     264              : pub struct ShardParameters {
     265              :     pub count: ShardCount,
     266              :     pub stripe_size: ShardStripeSize,
     267              : }
     268              : 
     269              : impl ShardParameters {
     270              :     pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
     271              : 
     272            0 :     pub fn is_unsharded(&self) -> bool {
     273            0 :         self.count.is_unsharded()
     274            0 :     }
     275              : }
     276              : 
     277              : impl Default for ShardParameters {
     278          193 :     fn default() -> Self {
     279          193 :         Self {
     280          193 :             count: ShardCount::new(0),
     281          193 :             stripe_size: Self::DEFAULT_STRIPE_SIZE,
     282          193 :         }
     283          193 :     }
     284              : }
     285              : 
     286              : /// An alternative representation of `pageserver::tenant::TenantConf` with
     287              : /// simpler types.
     288            2 : #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
     289              : pub struct TenantConfig {
     290              :     pub checkpoint_distance: Option<u64>,
     291              :     pub checkpoint_timeout: Option<String>,
     292              :     pub compaction_target_size: Option<u64>,
     293              :     pub compaction_period: Option<String>,
     294              :     pub compaction_threshold: Option<usize>,
     295              :     // defer parsing compaction_algorithm, like eviction_policy
     296              :     pub compaction_algorithm: Option<CompactionAlgorithmSettings>,
     297              :     pub gc_horizon: Option<u64>,
     298              :     pub gc_period: Option<String>,
     299              :     pub image_creation_threshold: Option<usize>,
     300              :     pub pitr_interval: Option<String>,
     301              :     pub walreceiver_connect_timeout: Option<String>,
     302              :     pub lagging_wal_timeout: Option<String>,
     303              :     pub max_lsn_wal_lag: Option<NonZeroU64>,
     304              :     pub eviction_policy: Option<EvictionPolicy>,
     305              :     pub min_resident_size_override: Option<u64>,
     306              :     pub evictions_low_residence_duration_metric_threshold: Option<String>,
     307              :     pub heatmap_period: Option<String>,
     308              :     pub lazy_slru_download: Option<bool>,
     309              :     pub timeline_get_throttle: Option<ThrottleConfig>,
     310              :     pub image_layer_creation_check_threshold: Option<u8>,
     311              :     pub lsn_lease_length: Option<String>,
     312              :     pub lsn_lease_length_for_ts: Option<String>,
     313              :     pub timeline_offloading: Option<bool>,
     314              : }
     315              : 
     316              : /// The policy for the aux file storage.
     317              : ///
     318              : /// It can be switched through `switch_aux_file_policy` tenant config.
     319              : /// When the first aux file written, the policy will be persisted in the
     320              : /// `index_part.json` file and has a limited migration path.
     321              : ///
     322              : /// Currently, we only allow the following migration path:
     323              : ///
     324              : /// Unset -> V1
     325              : ///       -> V2
     326              : ///       -> CrossValidation -> V2
     327              : #[derive(
     328              :     Eq,
     329              :     PartialEq,
     330              :     Debug,
     331              :     Copy,
     332              :     Clone,
     333            2 :     strum_macros::EnumString,
     334            0 :     strum_macros::Display,
     335            0 :     serde_with::DeserializeFromStr,
     336              :     serde_with::SerializeDisplay,
     337              : )]
     338              : #[strum(serialize_all = "kebab-case")]
     339              : pub enum AuxFilePolicy {
     340              :     /// V1 aux file policy: store everything in AUX_FILE_KEY
     341              :     #[strum(ascii_case_insensitive)]
     342              :     V1,
     343              :     /// V2 aux file policy: store in the AUX_FILE keyspace
     344              :     #[strum(ascii_case_insensitive)]
     345              :     V2,
     346              :     /// Cross validation runs both formats on the write path and does validation
     347              :     /// on the read path.
     348              :     #[strum(ascii_case_insensitive)]
     349              :     CrossValidation,
     350              : }
     351              : 
     352            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     353              : #[serde(tag = "kind")]
     354              : pub enum EvictionPolicy {
     355              :     NoEviction,
     356              :     LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
     357              :     OnlyImitiate(EvictionPolicyLayerAccessThreshold),
     358              : }
     359              : 
     360              : impl EvictionPolicy {
     361            0 :     pub fn discriminant_str(&self) -> &'static str {
     362            0 :         match self {
     363            0 :             EvictionPolicy::NoEviction => "NoEviction",
     364            0 :             EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
     365            0 :             EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate",
     366              :         }
     367            0 :     }
     368              : }
     369              : 
     370              : #[derive(
     371              :     Eq,
     372              :     PartialEq,
     373              :     Debug,
     374              :     Copy,
     375              :     Clone,
     376            0 :     strum_macros::EnumString,
     377            0 :     strum_macros::Display,
     378            0 :     serde_with::DeserializeFromStr,
     379              :     serde_with::SerializeDisplay,
     380              : )]
     381              : #[strum(serialize_all = "kebab-case")]
     382              : pub enum CompactionAlgorithm {
     383              :     Legacy,
     384              :     Tiered,
     385              : }
     386              : 
     387              : #[derive(
     388            0 :     Debug, Clone, Copy, PartialEq, Eq, serde_with::DeserializeFromStr, serde_with::SerializeDisplay,
     389              : )]
     390              : pub enum ImageCompressionAlgorithm {
     391              :     // Disabled for writes, support decompressing during read path
     392              :     Disabled,
     393              :     /// Zstandard compression. Level 0 means and None mean the same (default level). Levels can be negative as well.
     394              :     /// For details, see the [manual](http://facebook.github.io/zstd/zstd_manual.html).
     395              :     Zstd {
     396              :         level: Option<i8>,
     397              :     },
     398              : }
     399              : 
     400              : impl FromStr for ImageCompressionAlgorithm {
     401              :     type Err = anyhow::Error;
     402            8 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     403            8 :         let mut components = s.split(['(', ')']);
     404            8 :         let first = components
     405            8 :             .next()
     406            8 :             .ok_or_else(|| anyhow::anyhow!("empty string"))?;
     407            8 :         match first {
     408            8 :             "disabled" => Ok(ImageCompressionAlgorithm::Disabled),
     409            6 :             "zstd" => {
     410            6 :                 let level = if let Some(v) = components.next() {
     411            4 :                     let v: i8 = v.parse()?;
     412            4 :                     Some(v)
     413              :                 } else {
     414            2 :                     None
     415              :                 };
     416              : 
     417            6 :                 Ok(ImageCompressionAlgorithm::Zstd { level })
     418              :             }
     419            0 :             _ => anyhow::bail!("invalid specifier '{first}'"),
     420              :         }
     421            8 :     }
     422              : }
     423              : 
     424              : impl Display for ImageCompressionAlgorithm {
     425           12 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     426           12 :         match self {
     427            3 :             ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
     428            9 :             ImageCompressionAlgorithm::Zstd { level } => {
     429            9 :                 if let Some(level) = level {
     430            6 :                     write!(f, "zstd({})", level)
     431              :                 } else {
     432            3 :                     write!(f, "zstd")
     433              :                 }
     434              :             }
     435              :         }
     436           12 :     }
     437              : }
     438              : 
     439            0 : #[derive(Eq, PartialEq, Debug, Clone, Serialize, Deserialize)]
     440              : pub struct CompactionAlgorithmSettings {
     441              :     pub kind: CompactionAlgorithm,
     442              : }
     443              : 
     444            6 : #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)]
     445              : #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
     446              : pub enum L0FlushConfig {
     447              :     #[serde(rename_all = "snake_case")]
     448              :     Direct { max_concurrency: NonZeroUsize },
     449              : }
     450              : 
     451            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     452              : pub struct EvictionPolicyLayerAccessThreshold {
     453              :     #[serde(with = "humantime_serde")]
     454              :     pub period: Duration,
     455              :     #[serde(with = "humantime_serde")]
     456              :     pub threshold: Duration,
     457              : }
     458              : 
     459            0 : #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
     460              : pub struct ThrottleConfig {
     461              :     pub task_kinds: Vec<String>, // TaskKind
     462              :     pub initial: u32,
     463              :     #[serde(with = "humantime_serde")]
     464              :     pub refill_interval: Duration,
     465              :     pub refill_amount: NonZeroU32,
     466              :     pub max: u32,
     467              : }
     468              : 
     469              : impl ThrottleConfig {
     470          394 :     pub fn disabled() -> Self {
     471          394 :         Self {
     472          394 :             task_kinds: vec![], // effectively disables the throttle
     473          394 :             // other values don't matter with emtpy `task_kinds`.
     474          394 :             initial: 0,
     475          394 :             refill_interval: Duration::from_millis(1),
     476          394 :             refill_amount: NonZeroU32::new(1).unwrap(),
     477          394 :             max: 1,
     478          394 :         }
     479          394 :     }
     480              :     /// The requests per second allowed  by the given config.
     481            0 :     pub fn steady_rps(&self) -> f64 {
     482            0 :         (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64())
     483            0 :     }
     484              : }
     485              : 
     486              : /// A flattened analog of a `pagesever::tenant::LocationMode`, which
     487              : /// lists out all possible states (and the virtual "Detached" state)
     488              : /// in a flat form rather than using rust-style enums.
     489            0 : #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
     490              : pub enum LocationConfigMode {
     491              :     AttachedSingle,
     492              :     AttachedMulti,
     493              :     AttachedStale,
     494              :     Secondary,
     495              :     Detached,
     496              : }
     497              : 
     498            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
     499              : pub struct LocationConfigSecondary {
     500              :     pub warm: bool,
     501              : }
     502              : 
     503              : /// An alternative representation of `pageserver::tenant::LocationConf`,
     504              : /// for use in external-facing APIs.
     505            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
     506              : pub struct LocationConfig {
     507              :     pub mode: LocationConfigMode,
     508              :     /// If attaching, in what generation?
     509              :     #[serde(default)]
     510              :     pub generation: Option<u32>,
     511              : 
     512              :     // If requesting mode `Secondary`, configuration for that.
     513              :     #[serde(default)]
     514              :     pub secondary_conf: Option<LocationConfigSecondary>,
     515              : 
     516              :     // Shard parameters: if shard_count is nonzero, then other shard_* fields
     517              :     // must be set accurately.
     518              :     #[serde(default)]
     519              :     pub shard_number: u8,
     520              :     #[serde(default)]
     521              :     pub shard_count: u8,
     522              :     #[serde(default)]
     523              :     pub shard_stripe_size: u32,
     524              : 
     525              :     // This configuration only affects attached mode, but should be provided irrespective
     526              :     // of the mode, as a secondary location might transition on startup if the response
     527              :     // to the `/re-attach` control plane API requests it.
     528              :     pub tenant_conf: TenantConfig,
     529              : }
     530              : 
     531            0 : #[derive(Serialize, Deserialize)]
     532              : pub struct LocationConfigListResponse {
     533              :     pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,
     534              : }
     535              : 
     536              : #[derive(Serialize)]
     537              : pub struct StatusResponse {
     538              :     pub id: NodeId,
     539              : }
     540              : 
     541            0 : #[derive(Serialize, Deserialize, Debug)]
     542              : #[serde(deny_unknown_fields)]
     543              : pub struct TenantLocationConfigRequest {
     544              :     #[serde(flatten)]
     545              :     pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
     546              : }
     547              : 
     548            0 : #[derive(Serialize, Deserialize, Debug)]
     549              : #[serde(deny_unknown_fields)]
     550              : pub struct TenantTimeTravelRequest {
     551              :     pub shard_counts: Vec<ShardCount>,
     552              : }
     553              : 
     554            0 : #[derive(Serialize, Deserialize, Debug)]
     555              : #[serde(deny_unknown_fields)]
     556              : pub struct TenantShardLocation {
     557              :     pub shard_id: TenantShardId,
     558              :     pub node_id: NodeId,
     559              : }
     560              : 
     561            0 : #[derive(Serialize, Deserialize, Debug)]
     562              : #[serde(deny_unknown_fields)]
     563              : pub struct TenantLocationConfigResponse {
     564              :     pub shards: Vec<TenantShardLocation>,
     565              :     // If the shards' ShardCount count is >1, stripe_size will be set.
     566              :     pub stripe_size: Option<ShardStripeSize>,
     567              : }
     568              : 
     569            3 : #[derive(Serialize, Deserialize, Debug)]
     570              : #[serde(deny_unknown_fields)]
     571              : pub struct TenantConfigRequest {
     572              :     pub tenant_id: TenantId,
     573              :     #[serde(flatten)]
     574              :     pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
     575              : }
     576              : 
     577              : impl std::ops::Deref for TenantConfigRequest {
     578              :     type Target = TenantConfig;
     579              : 
     580            0 :     fn deref(&self) -> &Self::Target {
     581            0 :         &self.config
     582            0 :     }
     583              : }
     584              : 
     585              : impl TenantConfigRequest {
     586            0 :     pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
     587            0 :         let config = TenantConfig::default();
     588            0 :         TenantConfigRequest { tenant_id, config }
     589            0 :     }
     590              : }
     591              : 
     592              : /// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
     593            0 : #[derive(Serialize, Deserialize, Clone)]
     594              : #[serde(tag = "slug", content = "data", rename_all = "snake_case")]
     595              : pub enum TenantAttachmentStatus {
     596              :     Maybe,
     597              :     Attached,
     598              :     Failed { reason: String },
     599              : }
     600              : 
     601            0 : #[derive(Serialize, Deserialize, Clone)]
     602              : pub struct TenantInfo {
     603              :     pub id: TenantShardId,
     604              :     // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
     605              :     pub state: TenantState,
     606              :     /// Sum of the size of all layer files.
     607              :     /// If a layer is present in both local FS and S3, it counts only once.
     608              :     pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
     609              :     pub attachment_status: TenantAttachmentStatus,
     610              :     pub generation: u32,
     611              : 
     612              :     /// Opaque explanation if gc is being blocked.
     613              :     ///
     614              :     /// Only looked up for the individual tenant detail, not the listing. This is purely for
     615              :     /// debugging, not included in openapi.
     616              :     #[serde(skip_serializing_if = "Option::is_none")]
     617              :     pub gc_blocking: Option<String>,
     618              : }
     619              : 
     620            0 : #[derive(Serialize, Deserialize, Clone)]
     621              : pub struct TenantDetails {
     622              :     #[serde(flatten)]
     623              :     pub tenant_info: TenantInfo,
     624              : 
     625              :     pub walredo: Option<WalRedoManagerStatus>,
     626              : 
     627              :     pub timelines: Vec<TimelineId>,
     628              : }
     629              : 
     630            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug)]
     631              : pub enum TimelineArchivalState {
     632              :     Archived,
     633              :     Unarchived,
     634              : }
     635              : 
     636            0 : #[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
     637              : pub struct TimelineArchivalConfigRequest {
     638              :     pub state: TimelineArchivalState,
     639              : }
     640              : 
     641            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     642              : pub struct TimelinesInfoAndOffloaded {
     643              :     pub timelines: Vec<TimelineInfo>,
     644              :     pub offloaded: Vec<OffloadedTimelineInfo>,
     645              : }
     646              : 
     647              : /// Analog of [`TimelineInfo`] for offloaded timelines.
     648            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     649              : pub struct OffloadedTimelineInfo {
     650              :     pub tenant_id: TenantShardId,
     651              :     pub timeline_id: TimelineId,
     652              :     /// Whether the timeline has a parent it has been branched off from or not
     653              :     pub ancestor_timeline_id: Option<TimelineId>,
     654              :     /// Whether to retain the branch lsn at the ancestor or not
     655              :     pub ancestor_retain_lsn: Option<Lsn>,
     656              :     /// The time point when the timeline was archived
     657              :     pub archived_at: chrono::DateTime<chrono::Utc>,
     658              : }
     659              : 
     660              : /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
     661            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     662              : pub struct TimelineInfo {
     663              :     pub tenant_id: TenantShardId,
     664              :     pub timeline_id: TimelineId,
     665              : 
     666              :     pub ancestor_timeline_id: Option<TimelineId>,
     667              :     pub ancestor_lsn: Option<Lsn>,
     668              :     pub last_record_lsn: Lsn,
     669              :     pub prev_record_lsn: Option<Lsn>,
     670              :     pub latest_gc_cutoff_lsn: Lsn,
     671              :     pub disk_consistent_lsn: Lsn,
     672              : 
     673              :     /// The LSN that we have succesfully uploaded to remote storage
     674              :     pub remote_consistent_lsn: Lsn,
     675              : 
     676              :     /// The LSN that we are advertizing to safekeepers
     677              :     pub remote_consistent_lsn_visible: Lsn,
     678              : 
     679              :     /// The LSN from the start of the root timeline (never changes)
     680              :     pub initdb_lsn: Lsn,
     681              : 
     682              :     pub current_logical_size: u64,
     683              :     pub current_logical_size_is_accurate: bool,
     684              : 
     685              :     pub directory_entries_counts: Vec<u64>,
     686              : 
     687              :     /// Sum of the size of all layer files.
     688              :     /// If a layer is present in both local FS and S3, it counts only once.
     689              :     pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
     690              :     pub current_logical_size_non_incremental: Option<u64>,
     691              : 
     692              :     /// How many bytes of WAL are within this branch's pitr_interval.  If the pitr_interval goes
     693              :     /// beyond the branch's branch point, we only count up to the branch point.
     694              :     pub pitr_history_size: u64,
     695              : 
     696              :     /// Whether this branch's branch point is within its ancestor's PITR interval (i.e. any
     697              :     /// ancestor data used by this branch would have been retained anyway).  If this is false, then
     698              :     /// this branch may be imposing a cost on the ancestor by causing it to retain layers that it would
     699              :     /// otherwise be able to GC.
     700              :     pub within_ancestor_pitr: bool,
     701              : 
     702              :     pub timeline_dir_layer_file_size_sum: Option<u64>,
     703              : 
     704              :     pub wal_source_connstr: Option<String>,
     705              :     pub last_received_msg_lsn: Option<Lsn>,
     706              :     /// the timestamp (in microseconds) of the last received message
     707              :     pub last_received_msg_ts: Option<u128>,
     708              :     pub pg_version: u32,
     709              : 
     710              :     pub state: TimelineState,
     711              : 
     712              :     pub walreceiver_status: String,
     713              : 
     714              :     // ALWAYS add new fields at the end of the struct with `Option` to ensure forward/backward compatibility.
     715              :     // Backward compatibility: you will get a JSON not containing the newly-added field.
     716              :     // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does
     717              :     // not deny unknown fields by default so it's safe to set the field to some value, though it won't be
     718              :     // read.
     719              :     pub is_archived: Option<bool>,
     720              : }
     721              : 
     722            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     723              : pub struct LayerMapInfo {
     724              :     pub in_memory_layers: Vec<InMemoryLayerInfo>,
     725              :     pub historic_layers: Vec<HistoricLayerInfo>,
     726              : }
     727              : 
     728              : /// The residence status of a layer
     729            0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
     730              : pub enum LayerResidenceStatus {
     731              :     /// Residence status for a layer file that exists locally.
     732              :     /// It may also exist on the remote, we don't care here.
     733              :     Resident,
     734              :     /// Residence status for a layer file that only exists on the remote.
     735              :     Evicted,
     736              : }
     737              : 
     738              : #[serde_as]
     739            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     740              : pub struct LayerAccessStats {
     741              :     #[serde_as(as = "serde_with::TimestampMilliSeconds")]
     742              :     pub access_time: SystemTime,
     743              : 
     744              :     #[serde_as(as = "serde_with::TimestampMilliSeconds")]
     745              :     pub residence_time: SystemTime,
     746              : 
     747              :     pub visible: bool,
     748              : }
     749              : 
     750            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     751              : #[serde(tag = "kind")]
     752              : pub enum InMemoryLayerInfo {
     753              :     Open { lsn_start: Lsn },
     754              :     Frozen { lsn_start: Lsn, lsn_end: Lsn },
     755              : }
     756              : 
     757            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     758              : #[serde(tag = "kind")]
     759              : pub enum HistoricLayerInfo {
     760              :     Delta {
     761              :         layer_file_name: String,
     762              :         layer_file_size: u64,
     763              : 
     764              :         lsn_start: Lsn,
     765              :         lsn_end: Lsn,
     766              :         remote: bool,
     767              :         access_stats: LayerAccessStats,
     768              : 
     769              :         l0: bool,
     770              :     },
     771              :     Image {
     772              :         layer_file_name: String,
     773              :         layer_file_size: u64,
     774              : 
     775              :         lsn_start: Lsn,
     776              :         remote: bool,
     777              :         access_stats: LayerAccessStats,
     778              :     },
     779              : }
     780              : 
     781              : impl HistoricLayerInfo {
     782            0 :     pub fn layer_file_name(&self) -> &str {
     783            0 :         match self {
     784              :             HistoricLayerInfo::Delta {
     785            0 :                 layer_file_name, ..
     786            0 :             } => layer_file_name,
     787              :             HistoricLayerInfo::Image {
     788            0 :                 layer_file_name, ..
     789            0 :             } => layer_file_name,
     790              :         }
     791            0 :     }
     792            0 :     pub fn is_remote(&self) -> bool {
     793            0 :         match self {
     794            0 :             HistoricLayerInfo::Delta { remote, .. } => *remote,
     795            0 :             HistoricLayerInfo::Image { remote, .. } => *remote,
     796              :         }
     797            0 :     }
     798            0 :     pub fn set_remote(&mut self, value: bool) {
     799            0 :         let field = match self {
     800            0 :             HistoricLayerInfo::Delta { remote, .. } => remote,
     801            0 :             HistoricLayerInfo::Image { remote, .. } => remote,
     802              :         };
     803            0 :         *field = value;
     804            0 :     }
     805            0 :     pub fn layer_file_size(&self) -> u64 {
     806            0 :         match self {
     807              :             HistoricLayerInfo::Delta {
     808            0 :                 layer_file_size, ..
     809            0 :             } => *layer_file_size,
     810              :             HistoricLayerInfo::Image {
     811            0 :                 layer_file_size, ..
     812            0 :             } => *layer_file_size,
     813              :         }
     814            0 :     }
     815              : }
     816              : 
     817            0 : #[derive(Debug, Serialize, Deserialize)]
     818              : pub struct DownloadRemoteLayersTaskSpawnRequest {
     819              :     pub max_concurrent_downloads: NonZeroUsize,
     820              : }
     821              : 
     822            0 : #[derive(Debug, Serialize, Deserialize)]
     823              : pub struct IngestAuxFilesRequest {
     824              :     pub aux_files: HashMap<String, String>,
     825              : }
     826              : 
     827            0 : #[derive(Debug, Serialize, Deserialize)]
     828              : pub struct ListAuxFilesRequest {
     829              :     pub lsn: Lsn,
     830              : }
     831              : 
     832            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     833              : pub struct DownloadRemoteLayersTaskInfo {
     834              :     pub task_id: String,
     835              :     pub state: DownloadRemoteLayersTaskState,
     836              :     pub total_layer_count: u64,         // stable once `completed`
     837              :     pub successful_download_count: u64, // stable once `completed`
     838              :     pub failed_download_count: u64,     // stable once `completed`
     839              : }
     840              : 
     841            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     842              : pub enum DownloadRemoteLayersTaskState {
     843              :     Running,
     844              :     Completed,
     845              :     ShutDown,
     846              : }
     847              : 
     848            0 : #[derive(Debug, Serialize, Deserialize)]
     849              : pub struct TimelineGcRequest {
     850              :     pub gc_horizon: Option<u64>,
     851              : }
     852              : 
     853            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     854              : pub struct WalRedoManagerProcessStatus {
     855              :     pub pid: u32,
     856              : }
     857              : 
     858            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     859              : pub struct WalRedoManagerStatus {
     860              :     pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,
     861              :     pub process: Option<WalRedoManagerProcessStatus>,
     862              : }
     863              : 
     864              : /// The progress of a secondary tenant.
     865              : ///
     866              : /// It is mostly useful when doing a long running download: e.g. initiating
     867              : /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
     868              : /// what's happening.
     869            0 : #[derive(Default, Debug, Serialize, Deserialize, Clone)]
     870              : pub struct SecondaryProgress {
     871              :     /// The remote storage LastModified time of the heatmap object we last downloaded.
     872              :     pub heatmap_mtime: Option<serde_system_time::SystemTime>,
     873              : 
     874              :     /// The number of layers currently on-disk
     875              :     pub layers_downloaded: usize,
     876              :     /// The number of layers in the most recently seen heatmap
     877              :     pub layers_total: usize,
     878              : 
     879              :     /// The number of layer bytes currently on-disk
     880              :     pub bytes_downloaded: u64,
     881              :     /// The number of layer bytes in the most recently seen heatmap
     882              :     pub bytes_total: u64,
     883              : }
     884              : 
     885            0 : #[derive(Serialize, Deserialize, Debug)]
     886              : pub struct TenantScanRemoteStorageShard {
     887              :     pub tenant_shard_id: TenantShardId,
     888              :     pub generation: Option<u32>,
     889              : }
     890              : 
     891            0 : #[derive(Serialize, Deserialize, Debug, Default)]
     892              : pub struct TenantScanRemoteStorageResponse {
     893              :     pub shards: Vec<TenantScanRemoteStorageShard>,
     894              : }
     895              : 
     896            0 : #[derive(Serialize, Deserialize, Debug, Clone)]
     897              : #[serde(rename_all = "snake_case")]
     898              : pub enum TenantSorting {
     899              :     ResidentSize,
     900              :     MaxLogicalSize,
     901              : }
     902              : 
     903              : impl Default for TenantSorting {
     904            0 :     fn default() -> Self {
     905            0 :         Self::ResidentSize
     906            0 :     }
     907              : }
     908              : 
     909            0 : #[derive(Serialize, Deserialize, Debug, Clone)]
     910              : pub struct TopTenantShardsRequest {
     911              :     // How would you like to sort the tenants?
     912              :     pub order_by: TenantSorting,
     913              : 
     914              :     // How many results?
     915              :     pub limit: usize,
     916              : 
     917              :     // Omit tenants with more than this many shards (e.g. if this is the max number of shards
     918              :     // that the caller would ever split to)
     919              :     pub where_shards_lt: Option<ShardCount>,
     920              : 
     921              :     // Omit tenants where the ordering metric is less than this (this is an optimization to
     922              :     // let us quickly exclude numerous tiny shards)
     923              :     pub where_gt: Option<u64>,
     924              : }
     925              : 
     926            0 : #[derive(Serialize, Deserialize, Debug, PartialEq, Eq)]
     927              : pub struct TopTenantShardItem {
     928              :     pub id: TenantShardId,
     929              : 
     930              :     /// Total size of layers on local disk for all timelines in this tenant
     931              :     pub resident_size: u64,
     932              : 
     933              :     /// Total size of layers in remote storage for all timelines in this tenant
     934              :     pub physical_size: u64,
     935              : 
     936              :     /// The largest logical size of a timeline within this tenant
     937              :     pub max_logical_size: u64,
     938              : }
     939              : 
     940            0 : #[derive(Serialize, Deserialize, Debug, Default)]
     941              : pub struct TopTenantShardsResponse {
     942              :     pub shards: Vec<TopTenantShardItem>,
     943              : }
     944              : 
     945              : pub mod virtual_file {
     946              :     #[derive(
     947              :         Copy,
     948              :         Clone,
     949              :         PartialEq,
     950              :         Eq,
     951              :         Hash,
     952          204 :         strum_macros::EnumString,
     953            0 :         strum_macros::Display,
     954            0 :         serde_with::DeserializeFromStr,
     955              :         serde_with::SerializeDisplay,
     956              :         Debug,
     957              :     )]
     958              :     #[strum(serialize_all = "kebab-case")]
     959              :     pub enum IoEngineKind {
     960              :         StdFs,
     961              :         #[cfg(target_os = "linux")]
     962              :         TokioEpollUring,
     963              :     }
     964              : 
     965              :     /// Direct IO modes for a pageserver.
     966              :     #[derive(
     967              :         Copy,
     968              :         Clone,
     969              :         PartialEq,
     970              :         Eq,
     971              :         Hash,
     972            0 :         strum_macros::EnumString,
     973            0 :         strum_macros::Display,
     974            0 :         serde_with::DeserializeFromStr,
     975              :         serde_with::SerializeDisplay,
     976              :         Debug,
     977              :     )]
     978              :     #[strum(serialize_all = "kebab-case")]
     979              :     #[repr(u8)]
     980              :     pub enum IoMode {
     981              :         /// Uses buffered IO.
     982              :         Buffered,
     983              :         /// Uses direct IO, error out if the operation fails.
     984              :         #[cfg(target_os = "linux")]
     985              :         Direct,
     986              :     }
     987              : 
     988              :     impl IoMode {
     989          210 :         pub const fn preferred() -> Self {
     990          210 :             Self::Buffered
     991          210 :         }
     992              :     }
     993              : 
     994              :     impl TryFrom<u8> for IoMode {
     995              :         type Error = u8;
     996              : 
     997         1190 :         fn try_from(value: u8) -> Result<Self, Self::Error> {
     998         1190 :             Ok(match value {
     999         1190 :                 v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
    1000              :                 #[cfg(target_os = "linux")]
    1001            0 :                 v if v == (IoMode::Direct as u8) => IoMode::Direct,
    1002            0 :                 x => return Err(x),
    1003              :             })
    1004         1190 :         }
    1005              :     }
    1006              : }
    1007              : 
    1008            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
    1009              : pub struct ScanDisposableKeysResponse {
    1010              :     pub disposable_count: usize,
    1011              :     pub not_disposable_count: usize,
    1012              : }
    1013              : 
    1014              : // Wrapped in libpq CopyData
    1015              : #[derive(PartialEq, Eq, Debug)]
    1016              : pub enum PagestreamFeMessage {
    1017              :     Exists(PagestreamExistsRequest),
    1018              :     Nblocks(PagestreamNblocksRequest),
    1019              :     GetPage(PagestreamGetPageRequest),
    1020              :     DbSize(PagestreamDbSizeRequest),
    1021              :     GetSlruSegment(PagestreamGetSlruSegmentRequest),
    1022              : }
    1023              : 
    1024              : // Wrapped in libpq CopyData
    1025            0 : #[derive(strum_macros::EnumProperty)]
    1026              : pub enum PagestreamBeMessage {
    1027              :     Exists(PagestreamExistsResponse),
    1028              :     Nblocks(PagestreamNblocksResponse),
    1029              :     GetPage(PagestreamGetPageResponse),
    1030              :     Error(PagestreamErrorResponse),
    1031              :     DbSize(PagestreamDbSizeResponse),
    1032              :     GetSlruSegment(PagestreamGetSlruSegmentResponse),
    1033              : }
    1034              : 
    1035              : // Keep in sync with `pagestore_client.h`
    1036              : #[repr(u8)]
    1037              : enum PagestreamBeMessageTag {
    1038              :     Exists = 100,
    1039              :     Nblocks = 101,
    1040              :     GetPage = 102,
    1041              :     Error = 103,
    1042              :     DbSize = 104,
    1043              :     GetSlruSegment = 105,
    1044              : }
    1045              : impl TryFrom<u8> for PagestreamBeMessageTag {
    1046              :     type Error = u8;
    1047            0 :     fn try_from(value: u8) -> Result<Self, u8> {
    1048            0 :         match value {
    1049            0 :             100 => Ok(PagestreamBeMessageTag::Exists),
    1050            0 :             101 => Ok(PagestreamBeMessageTag::Nblocks),
    1051            0 :             102 => Ok(PagestreamBeMessageTag::GetPage),
    1052            0 :             103 => Ok(PagestreamBeMessageTag::Error),
    1053            0 :             104 => Ok(PagestreamBeMessageTag::DbSize),
    1054            0 :             105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
    1055            0 :             _ => Err(value),
    1056              :         }
    1057            0 :     }
    1058              : }
    1059              : 
    1060              : // A GetPage request contains two LSN values:
    1061              : //
    1062              : // request_lsn: Get the page version at this point in time.  Lsn::Max is a special value that means
    1063              : // "get the latest version present". It's used by the primary server, which knows that no one else
    1064              : // is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
    1065              : // Lsn::Max. Standby servers use the current replay LSN as the request LSN.
    1066              : //
    1067              : // not_modified_since: Hint to the pageserver that the client knows that the page has not been
    1068              : // modified between 'not_modified_since' and the request LSN. It's always correct to set
    1069              : // 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
    1070              : // passing an earlier LSN can speed up the request, by allowing the pageserver to process the
    1071              : // request without waiting for 'request_lsn' to arrive.
    1072              : //
    1073              : // The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
    1074              : // sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
    1075              : // 'latest' was set to true. The V2 interface was added because there was no correct way for a
    1076              : // standby to request a page at a particular non-latest LSN, and also include the
    1077              : // 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
    1078              : // request, if the standby knows that the page hasn't been modified since, and risk getting an error
    1079              : // if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
    1080              : // require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
    1081              : // interface allows sending both LSNs, and let the pageserver do the right thing. There was no
    1082              : // difference in the responses between V1 and V2.
    1083              : //
    1084              : #[derive(Clone, Copy)]
    1085              : pub enum PagestreamProtocolVersion {
    1086              :     V2,
    1087              : }
    1088              : 
    1089              : #[derive(Debug, PartialEq, Eq)]
    1090              : pub struct PagestreamExistsRequest {
    1091              :     pub request_lsn: Lsn,
    1092              :     pub not_modified_since: Lsn,
    1093              :     pub rel: RelTag,
    1094              : }
    1095              : 
    1096              : #[derive(Debug, PartialEq, Eq)]
    1097              : pub struct PagestreamNblocksRequest {
    1098              :     pub request_lsn: Lsn,
    1099              :     pub not_modified_since: Lsn,
    1100              :     pub rel: RelTag,
    1101              : }
    1102              : 
    1103              : #[derive(Debug, PartialEq, Eq)]
    1104              : pub struct PagestreamGetPageRequest {
    1105              :     pub request_lsn: Lsn,
    1106              :     pub not_modified_since: Lsn,
    1107              :     pub rel: RelTag,
    1108              :     pub blkno: u32,
    1109              : }
    1110              : 
    1111              : #[derive(Debug, PartialEq, Eq)]
    1112              : pub struct PagestreamDbSizeRequest {
    1113              :     pub request_lsn: Lsn,
    1114              :     pub not_modified_since: Lsn,
    1115              :     pub dbnode: u32,
    1116              : }
    1117              : 
    1118              : #[derive(Debug, PartialEq, Eq)]
    1119              : pub struct PagestreamGetSlruSegmentRequest {
    1120              :     pub request_lsn: Lsn,
    1121              :     pub not_modified_since: Lsn,
    1122              :     pub kind: u8,
    1123              :     pub segno: u32,
    1124              : }
    1125              : 
    1126              : #[derive(Debug)]
    1127              : pub struct PagestreamExistsResponse {
    1128              :     pub exists: bool,
    1129              : }
    1130              : 
    1131              : #[derive(Debug)]
    1132              : pub struct PagestreamNblocksResponse {
    1133              :     pub n_blocks: u32,
    1134              : }
    1135              : 
    1136              : #[derive(Debug)]
    1137              : pub struct PagestreamGetPageResponse {
    1138              :     pub page: Bytes,
    1139              : }
    1140              : 
    1141              : #[derive(Debug)]
    1142              : pub struct PagestreamGetSlruSegmentResponse {
    1143              :     pub segment: Bytes,
    1144              : }
    1145              : 
    1146              : #[derive(Debug)]
    1147              : pub struct PagestreamErrorResponse {
    1148              :     pub message: String,
    1149              : }
    1150              : 
    1151              : #[derive(Debug)]
    1152              : pub struct PagestreamDbSizeResponse {
    1153              :     pub db_size: i64,
    1154              : }
    1155              : 
    1156              : // This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
    1157              : // that require pageserver-internal types.  It is sufficient to get the total size.
    1158            0 : #[derive(Serialize, Deserialize, Debug)]
    1159              : pub struct TenantHistorySize {
    1160              :     pub id: TenantId,
    1161              :     /// Size is a mixture of WAL and logical size, so the unit is bytes.
    1162              :     ///
    1163              :     /// Will be none if `?inputs_only=true` was given.
    1164              :     pub size: Option<u64>,
    1165              : }
    1166              : 
    1167              : impl PagestreamFeMessage {
    1168              :     /// Serialize a compute -> pageserver message. This is currently only used in testing
    1169              :     /// tools. Always uses protocol version 2.
    1170            4 :     pub fn serialize(&self) -> Bytes {
    1171            4 :         let mut bytes = BytesMut::new();
    1172            4 : 
    1173            4 :         match self {
    1174            1 :             Self::Exists(req) => {
    1175            1 :                 bytes.put_u8(0);
    1176            1 :                 bytes.put_u64(req.request_lsn.0);
    1177            1 :                 bytes.put_u64(req.not_modified_since.0);
    1178            1 :                 bytes.put_u32(req.rel.spcnode);
    1179            1 :                 bytes.put_u32(req.rel.dbnode);
    1180            1 :                 bytes.put_u32(req.rel.relnode);
    1181            1 :                 bytes.put_u8(req.rel.forknum);
    1182            1 :             }
    1183              : 
    1184            1 :             Self::Nblocks(req) => {
    1185            1 :                 bytes.put_u8(1);
    1186            1 :                 bytes.put_u64(req.request_lsn.0);
    1187            1 :                 bytes.put_u64(req.not_modified_since.0);
    1188            1 :                 bytes.put_u32(req.rel.spcnode);
    1189            1 :                 bytes.put_u32(req.rel.dbnode);
    1190            1 :                 bytes.put_u32(req.rel.relnode);
    1191            1 :                 bytes.put_u8(req.rel.forknum);
    1192            1 :             }
    1193              : 
    1194            1 :             Self::GetPage(req) => {
    1195            1 :                 bytes.put_u8(2);
    1196            1 :                 bytes.put_u64(req.request_lsn.0);
    1197            1 :                 bytes.put_u64(req.not_modified_since.0);
    1198            1 :                 bytes.put_u32(req.rel.spcnode);
    1199            1 :                 bytes.put_u32(req.rel.dbnode);
    1200            1 :                 bytes.put_u32(req.rel.relnode);
    1201            1 :                 bytes.put_u8(req.rel.forknum);
    1202            1 :                 bytes.put_u32(req.blkno);
    1203            1 :             }
    1204              : 
    1205            1 :             Self::DbSize(req) => {
    1206            1 :                 bytes.put_u8(3);
    1207            1 :                 bytes.put_u64(req.request_lsn.0);
    1208            1 :                 bytes.put_u64(req.not_modified_since.0);
    1209            1 :                 bytes.put_u32(req.dbnode);
    1210            1 :             }
    1211              : 
    1212            0 :             Self::GetSlruSegment(req) => {
    1213            0 :                 bytes.put_u8(4);
    1214            0 :                 bytes.put_u64(req.request_lsn.0);
    1215            0 :                 bytes.put_u64(req.not_modified_since.0);
    1216            0 :                 bytes.put_u8(req.kind);
    1217            0 :                 bytes.put_u32(req.segno);
    1218            0 :             }
    1219              :         }
    1220              : 
    1221            4 :         bytes.into()
    1222            4 :     }
    1223              : 
    1224            4 :     pub fn parse<R: std::io::Read>(body: &mut R) -> anyhow::Result<PagestreamFeMessage> {
    1225              :         // these correspond to the NeonMessageTag enum in pagestore_client.h
    1226              :         //
    1227              :         // TODO: consider using protobuf or serde bincode for less error prone
    1228              :         // serialization.
    1229            4 :         let msg_tag = body.read_u8()?;
    1230              : 
    1231              :         // these two fields are the same for every request type
    1232            4 :         let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
    1233            4 :         let not_modified_since = Lsn::from(body.read_u64::<BigEndian>()?);
    1234              : 
    1235            4 :         match msg_tag {
    1236              :             0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
    1237            1 :                 request_lsn,
    1238            1 :                 not_modified_since,
    1239            1 :                 rel: RelTag {
    1240            1 :                     spcnode: body.read_u32::<BigEndian>()?,
    1241            1 :                     dbnode: body.read_u32::<BigEndian>()?,
    1242            1 :                     relnode: body.read_u32::<BigEndian>()?,
    1243            1 :                     forknum: body.read_u8()?,
    1244              :                 },
    1245              :             })),
    1246              :             1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    1247            1 :                 request_lsn,
    1248            1 :                 not_modified_since,
    1249            1 :                 rel: RelTag {
    1250            1 :                     spcnode: body.read_u32::<BigEndian>()?,
    1251            1 :                     dbnode: body.read_u32::<BigEndian>()?,
    1252            1 :                     relnode: body.read_u32::<BigEndian>()?,
    1253            1 :                     forknum: body.read_u8()?,
    1254              :                 },
    1255              :             })),
    1256              :             2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    1257            1 :                 request_lsn,
    1258            1 :                 not_modified_since,
    1259            1 :                 rel: RelTag {
    1260            1 :                     spcnode: body.read_u32::<BigEndian>()?,
    1261            1 :                     dbnode: body.read_u32::<BigEndian>()?,
    1262            1 :                     relnode: body.read_u32::<BigEndian>()?,
    1263            1 :                     forknum: body.read_u8()?,
    1264              :                 },
    1265            1 :                 blkno: body.read_u32::<BigEndian>()?,
    1266              :             })),
    1267              :             3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    1268            1 :                 request_lsn,
    1269            1 :                 not_modified_since,
    1270            1 :                 dbnode: body.read_u32::<BigEndian>()?,
    1271              :             })),
    1272              :             4 => Ok(PagestreamFeMessage::GetSlruSegment(
    1273              :                 PagestreamGetSlruSegmentRequest {
    1274            0 :                     request_lsn,
    1275            0 :                     not_modified_since,
    1276            0 :                     kind: body.read_u8()?,
    1277            0 :                     segno: body.read_u32::<BigEndian>()?,
    1278              :                 },
    1279              :             )),
    1280            0 :             _ => bail!("unknown smgr message tag: {:?}", msg_tag),
    1281              :         }
    1282            4 :     }
    1283              : }
    1284              : 
    1285              : impl PagestreamBeMessage {
    1286            0 :     pub fn serialize(&self) -> Bytes {
    1287            0 :         let mut bytes = BytesMut::new();
    1288              : 
    1289              :         use PagestreamBeMessageTag as Tag;
    1290            0 :         match self {
    1291            0 :             Self::Exists(resp) => {
    1292            0 :                 bytes.put_u8(Tag::Exists as u8);
    1293            0 :                 bytes.put_u8(resp.exists as u8);
    1294            0 :             }
    1295              : 
    1296            0 :             Self::Nblocks(resp) => {
    1297            0 :                 bytes.put_u8(Tag::Nblocks as u8);
    1298            0 :                 bytes.put_u32(resp.n_blocks);
    1299            0 :             }
    1300              : 
    1301            0 :             Self::GetPage(resp) => {
    1302            0 :                 bytes.put_u8(Tag::GetPage as u8);
    1303            0 :                 bytes.put(&resp.page[..]);
    1304            0 :             }
    1305              : 
    1306            0 :             Self::Error(resp) => {
    1307            0 :                 bytes.put_u8(Tag::Error as u8);
    1308            0 :                 bytes.put(resp.message.as_bytes());
    1309            0 :                 bytes.put_u8(0); // null terminator
    1310            0 :             }
    1311            0 :             Self::DbSize(resp) => {
    1312            0 :                 bytes.put_u8(Tag::DbSize as u8);
    1313            0 :                 bytes.put_i64(resp.db_size);
    1314            0 :             }
    1315              : 
    1316            0 :             Self::GetSlruSegment(resp) => {
    1317            0 :                 bytes.put_u8(Tag::GetSlruSegment as u8);
    1318            0 :                 bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
    1319            0 :                 bytes.put(&resp.segment[..]);
    1320            0 :             }
    1321              :         }
    1322              : 
    1323            0 :         bytes.into()
    1324            0 :     }
    1325              : 
    1326            0 :     pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
    1327            0 :         let mut buf = buf.reader();
    1328            0 :         let msg_tag = buf.read_u8()?;
    1329              : 
    1330              :         use PagestreamBeMessageTag as Tag;
    1331            0 :         let ok =
    1332            0 :             match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
    1333              :                 Tag::Exists => {
    1334            0 :                     let exists = buf.read_u8()?;
    1335            0 :                     Self::Exists(PagestreamExistsResponse {
    1336            0 :                         exists: exists != 0,
    1337            0 :                     })
    1338              :                 }
    1339              :                 Tag::Nblocks => {
    1340            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    1341            0 :                     Self::Nblocks(PagestreamNblocksResponse { n_blocks })
    1342              :                 }
    1343              :                 Tag::GetPage => {
    1344            0 :                     let mut page = vec![0; 8192]; // TODO: use MaybeUninit
    1345            0 :                     buf.read_exact(&mut page)?;
    1346            0 :                     PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
    1347              :                 }
    1348              :                 Tag::Error => {
    1349            0 :                     let mut msg = Vec::new();
    1350            0 :                     buf.read_until(0, &mut msg)?;
    1351            0 :                     let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
    1352            0 :                     let rust_str = cstring.to_str()?;
    1353            0 :                     PagestreamBeMessage::Error(PagestreamErrorResponse {
    1354            0 :                         message: rust_str.to_owned(),
    1355            0 :                     })
    1356              :                 }
    1357              :                 Tag::DbSize => {
    1358            0 :                     let db_size = buf.read_i64::<BigEndian>()?;
    1359            0 :                     Self::DbSize(PagestreamDbSizeResponse { db_size })
    1360              :                 }
    1361              :                 Tag::GetSlruSegment => {
    1362            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    1363            0 :                     let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
    1364            0 :                     buf.read_exact(&mut segment)?;
    1365            0 :                     Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
    1366            0 :                         segment: segment.into(),
    1367            0 :                     })
    1368              :                 }
    1369              :             };
    1370            0 :         let remaining = buf.into_inner();
    1371            0 :         if !remaining.is_empty() {
    1372            0 :             anyhow::bail!(
    1373            0 :                 "remaining bytes in msg with tag={msg_tag}: {}",
    1374            0 :                 remaining.len()
    1375            0 :             );
    1376            0 :         }
    1377            0 :         Ok(ok)
    1378            0 :     }
    1379              : 
    1380            0 :     pub fn kind(&self) -> &'static str {
    1381            0 :         match self {
    1382            0 :             Self::Exists(_) => "Exists",
    1383            0 :             Self::Nblocks(_) => "Nblocks",
    1384            0 :             Self::GetPage(_) => "GetPage",
    1385            0 :             Self::Error(_) => "Error",
    1386            0 :             Self::DbSize(_) => "DbSize",
    1387            0 :             Self::GetSlruSegment(_) => "GetSlruSegment",
    1388              :         }
    1389            0 :     }
    1390              : }
    1391              : 
    1392              : #[cfg(test)]
    1393              : mod tests {
    1394              :     use serde_json::json;
    1395              :     use std::str::FromStr;
    1396              : 
    1397              :     use super::*;
    1398              : 
    1399              :     #[test]
    1400            1 :     fn test_pagestream() {
    1401            1 :         // Test serialization/deserialization of PagestreamFeMessage
    1402            1 :         let messages = vec![
    1403            1 :             PagestreamFeMessage::Exists(PagestreamExistsRequest {
    1404            1 :                 request_lsn: Lsn(4),
    1405            1 :                 not_modified_since: Lsn(3),
    1406            1 :                 rel: RelTag {
    1407            1 :                     forknum: 1,
    1408            1 :                     spcnode: 2,
    1409            1 :                     dbnode: 3,
    1410            1 :                     relnode: 4,
    1411            1 :                 },
    1412            1 :             }),
    1413            1 :             PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    1414            1 :                 request_lsn: Lsn(4),
    1415            1 :                 not_modified_since: Lsn(4),
    1416            1 :                 rel: RelTag {
    1417            1 :                     forknum: 1,
    1418            1 :                     spcnode: 2,
    1419            1 :                     dbnode: 3,
    1420            1 :                     relnode: 4,
    1421            1 :                 },
    1422            1 :             }),
    1423            1 :             PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    1424            1 :                 request_lsn: Lsn(4),
    1425            1 :                 not_modified_since: Lsn(3),
    1426            1 :                 rel: RelTag {
    1427            1 :                     forknum: 1,
    1428            1 :                     spcnode: 2,
    1429            1 :                     dbnode: 3,
    1430            1 :                     relnode: 4,
    1431            1 :                 },
    1432            1 :                 blkno: 7,
    1433            1 :             }),
    1434            1 :             PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    1435            1 :                 request_lsn: Lsn(4),
    1436            1 :                 not_modified_since: Lsn(3),
    1437            1 :                 dbnode: 7,
    1438            1 :             }),
    1439            1 :         ];
    1440            5 :         for msg in messages {
    1441            4 :             let bytes = msg.serialize();
    1442            4 :             let reconstructed = PagestreamFeMessage::parse(&mut bytes.reader()).unwrap();
    1443            4 :             assert!(msg == reconstructed);
    1444              :         }
    1445            1 :     }
    1446              : 
    1447              :     #[test]
    1448            1 :     fn test_tenantinfo_serde() {
    1449            1 :         // Test serialization/deserialization of TenantInfo
    1450            1 :         let original_active = TenantInfo {
    1451            1 :             id: TenantShardId::unsharded(TenantId::generate()),
    1452            1 :             state: TenantState::Active,
    1453            1 :             current_physical_size: Some(42),
    1454            1 :             attachment_status: TenantAttachmentStatus::Attached,
    1455            1 :             generation: 1,
    1456            1 :             gc_blocking: None,
    1457            1 :         };
    1458            1 :         let expected_active = json!({
    1459            1 :             "id": original_active.id.to_string(),
    1460            1 :             "state": {
    1461            1 :                 "slug": "Active",
    1462            1 :             },
    1463            1 :             "current_physical_size": 42,
    1464            1 :             "attachment_status": {
    1465            1 :                 "slug":"attached",
    1466            1 :             },
    1467            1 :             "generation" : 1
    1468            1 :         });
    1469            1 : 
    1470            1 :         let original_broken = TenantInfo {
    1471            1 :             id: TenantShardId::unsharded(TenantId::generate()),
    1472            1 :             state: TenantState::Broken {
    1473            1 :                 reason: "reason".into(),
    1474            1 :                 backtrace: "backtrace info".into(),
    1475            1 :             },
    1476            1 :             current_physical_size: Some(42),
    1477            1 :             attachment_status: TenantAttachmentStatus::Attached,
    1478            1 :             generation: 1,
    1479            1 :             gc_blocking: None,
    1480            1 :         };
    1481            1 :         let expected_broken = json!({
    1482            1 :             "id": original_broken.id.to_string(),
    1483            1 :             "state": {
    1484            1 :                 "slug": "Broken",
    1485            1 :                 "data": {
    1486            1 :                     "backtrace": "backtrace info",
    1487            1 :                     "reason": "reason",
    1488            1 :                 }
    1489            1 :             },
    1490            1 :             "current_physical_size": 42,
    1491            1 :             "attachment_status": {
    1492            1 :                 "slug":"attached",
    1493            1 :             },
    1494            1 :             "generation" : 1
    1495            1 :         });
    1496            1 : 
    1497            1 :         assert_eq!(
    1498            1 :             serde_json::to_value(&original_active).unwrap(),
    1499            1 :             expected_active
    1500            1 :         );
    1501              : 
    1502            1 :         assert_eq!(
    1503            1 :             serde_json::to_value(&original_broken).unwrap(),
    1504            1 :             expected_broken
    1505            1 :         );
    1506            1 :         assert!(format!("{:?}", &original_broken.state).contains("reason"));
    1507            1 :         assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    1508            1 :     }
    1509              : 
    1510              :     #[test]
    1511            1 :     fn test_reject_unknown_field() {
    1512            1 :         let id = TenantId::generate();
    1513            1 :         let config_request = json!({
    1514            1 :             "tenant_id": id.to_string(),
    1515            1 :             "unknown_field": "unknown_value".to_string(),
    1516            1 :         });
    1517            1 :         let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
    1518            1 :         assert!(
    1519            1 :             err.to_string().contains("unknown field `unknown_field`"),
    1520            0 :             "expect unknown field `unknown_field` error, got: {}",
    1521              :             err
    1522              :         );
    1523            1 :     }
    1524              : 
    1525              :     #[test]
    1526            1 :     fn tenantstatus_activating_serde() {
    1527            1 :         let states = [TenantState::Activating(ActivatingFrom::Attaching)];
    1528            1 :         let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
    1529            1 : 
    1530            1 :         let actual = serde_json::to_string(&states).unwrap();
    1531            1 : 
    1532            1 :         assert_eq!(actual, expected);
    1533              : 
    1534            1 :         let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
    1535            1 : 
    1536            1 :         assert_eq!(states.as_slice(), &parsed);
    1537            1 :     }
    1538              : 
    1539              :     #[test]
    1540            1 :     fn tenantstatus_activating_strum() {
    1541            1 :         // tests added, because we use these for metrics
    1542            1 :         let examples = [
    1543            1 :             (line!(), TenantState::Attaching, "Attaching"),
    1544            1 :             (
    1545            1 :                 line!(),
    1546            1 :                 TenantState::Activating(ActivatingFrom::Attaching),
    1547            1 :                 "Activating",
    1548            1 :             ),
    1549            1 :             (line!(), TenantState::Active, "Active"),
    1550            1 :             (
    1551            1 :                 line!(),
    1552            1 :                 TenantState::Stopping {
    1553            1 :                     progress: utils::completion::Barrier::default(),
    1554            1 :                 },
    1555            1 :                 "Stopping",
    1556            1 :             ),
    1557            1 :             (
    1558            1 :                 line!(),
    1559            1 :                 TenantState::Broken {
    1560            1 :                     reason: "Example".into(),
    1561            1 :                     backtrace: "Looooong backtrace".into(),
    1562            1 :                 },
    1563            1 :                 "Broken",
    1564            1 :             ),
    1565            1 :         ];
    1566              : 
    1567            6 :         for (line, rendered, expected) in examples {
    1568            5 :             let actual: &'static str = rendered.into();
    1569            5 :             assert_eq!(actual, expected, "example on {line}");
    1570              :         }
    1571            1 :     }
    1572              : 
    1573              :     #[test]
    1574            1 :     fn test_image_compression_algorithm_parsing() {
    1575              :         use ImageCompressionAlgorithm::*;
    1576            1 :         let cases = [
    1577            1 :             ("disabled", Disabled),
    1578            1 :             ("zstd", Zstd { level: None }),
    1579            1 :             ("zstd(18)", Zstd { level: Some(18) }),
    1580            1 :             ("zstd(-3)", Zstd { level: Some(-3) }),
    1581            1 :         ];
    1582              : 
    1583            5 :         for (display, expected) in cases {
    1584            4 :             assert_eq!(
    1585            4 :                 ImageCompressionAlgorithm::from_str(display).unwrap(),
    1586              :                 expected,
    1587            0 :                 "parsing works"
    1588              :             );
    1589            4 :             assert_eq!(format!("{expected}"), display, "Display FromStr roundtrip");
    1590              : 
    1591            4 :             let ser = serde_json::to_string(&expected).expect("serialization");
    1592            4 :             assert_eq!(
    1593            4 :                 serde_json::from_str::<ImageCompressionAlgorithm>(&ser).unwrap(),
    1594              :                 expected,
    1595            0 :                 "serde roundtrip"
    1596              :             );
    1597              : 
    1598            4 :             assert_eq!(
    1599            4 :                 serde_json::Value::String(display.to_string()),
    1600            4 :                 serde_json::to_value(expected).unwrap(),
    1601            0 :                 "Display is the serde serialization"
    1602              :             );
    1603              :         }
    1604            1 :     }
    1605              : }
        

Generated by: LCOV version 2.1-beta