LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - models.rs (source / functions) Coverage Total Hit
Test: 691a4c28fe7169edd60b367c52d448a0a6605f1f.info Lines: 56.5 % 522 295
Test Date: 2024-05-10 13:18:37 Functions: 4.3 % 949 41

            Line data    Source code
       1              : pub mod detach_ancestor;
       2              : pub mod partitioning;
       3              : pub mod utilization;
       4              : 
       5              : pub use utilization::PageserverUtilization;
       6              : 
       7              : use std::{
       8              :     borrow::Cow,
       9              :     collections::HashMap,
      10              :     io::{BufRead, Read},
      11              :     num::{NonZeroU64, NonZeroUsize},
      12              :     str::FromStr,
      13              :     time::{Duration, SystemTime},
      14              : };
      15              : 
      16              : use byteorder::{BigEndian, ReadBytesExt};
      17              : use postgres_ffi::BLCKSZ;
      18              : use serde::{Deserialize, Serialize};
      19              : use serde_with::serde_as;
      20              : use utils::{
      21              :     completion,
      22              :     history_buffer::HistoryBufferWithDropCounter,
      23              :     id::{NodeId, TenantId, TimelineId},
      24              :     lsn::Lsn,
      25              :     serde_system_time,
      26              : };
      27              : 
      28              : use crate::controller_api::PlacementPolicy;
      29              : use crate::{
      30              :     reltag::RelTag,
      31              :     shard::{ShardCount, ShardStripeSize, TenantShardId},
      32              : };
      33              : use anyhow::bail;
      34              : use bytes::{Buf, BufMut, Bytes, BytesMut};
      35              : 
      36              : /// The state of a tenant in this pageserver.
      37              : ///
      38              : /// ```mermaid
      39              : /// stateDiagram-v2
      40              : ///
      41              : ///     [*] --> Loading: spawn_load()
      42              : ///     [*] --> Attaching: spawn_attach()
      43              : ///
      44              : ///     Loading --> Activating: activate()
      45              : ///     Attaching --> Activating: activate()
      46              : ///     Activating --> Active: infallible
      47              : ///
      48              : ///     Loading --> Broken: load() failure
      49              : ///     Attaching --> Broken: attach() failure
      50              : ///
      51              : ///     Active --> Stopping: set_stopping(), part of shutdown & detach
      52              : ///     Stopping --> Broken: late error in remove_tenant_from_memory
      53              : ///
      54              : ///     Broken --> [*]: ignore / detach / shutdown
      55              : ///     Stopping --> [*]: remove_from_memory complete
      56              : ///
      57              : ///     Active --> Broken: cfg(testing)-only tenant break point
      58              : /// ```
      59              : #[derive(
      60              :     Clone,
      61              :     PartialEq,
      62              :     Eq,
      63            2 :     serde::Serialize,
      64           12 :     serde::Deserialize,
      65            0 :     strum_macros::Display,
      66              :     strum_macros::EnumVariantNames,
      67            0 :     strum_macros::AsRefStr,
      68          248 :     strum_macros::IntoStaticStr,
      69              : )]
      70              : #[serde(tag = "slug", content = "data")]
      71              : pub enum TenantState {
      72              :     /// This tenant is being loaded from local disk.
      73              :     ///
      74              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      75              :     Loading,
      76              :     /// This tenant is being attached to the pageserver.
      77              :     ///
      78              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      79              :     Attaching,
      80              :     /// The tenant is transitioning from Loading/Attaching to Active.
      81              :     ///
      82              :     /// While in this state, the individual timelines are being activated.
      83              :     ///
      84              :     /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
      85              :     Activating(ActivatingFrom),
      86              :     /// The tenant has finished activating and is open for business.
      87              :     ///
      88              :     /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
      89              :     Active,
      90              :     /// The tenant is recognized by pageserver, but it is being detached or the
      91              :     /// system is being shut down.
      92              :     ///
      93              :     /// Transitions out of this state are possible through `set_broken()`.
      94              :     Stopping {
      95              :         // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
      96              :         // otherwise it will not be skipped during deserialization
      97              :         #[serde(skip)]
      98              :         progress: completion::Barrier,
      99              :     },
     100              :     /// The tenant is recognized by the pageserver, but can no longer be used for
     101              :     /// any operations.
     102              :     ///
     103              :     /// If the tenant fails to load or attach, it will transition to this state
     104              :     /// and it is guaranteed that no background tasks are running in its name.
     105              :     ///
     106              :     /// The other way to transition into this state is from `Stopping` state
     107              :     /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
     108              :     /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
     109              :     Broken { reason: String, backtrace: String },
     110              : }
     111              : 
     112              : impl TenantState {
     113            0 :     pub fn attachment_status(&self) -> TenantAttachmentStatus {
     114              :         use TenantAttachmentStatus::*;
     115              : 
     116              :         // Below TenantState::Activating is used as "transient" or "transparent" state for
     117              :         // attachment_status determining.
     118            0 :         match self {
     119              :             // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
     120              :             // So, technically, we can return Attached here.
     121              :             // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
     122              :             // But, our attach task might still be fetching the remote timelines, etc.
     123              :             // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
     124            0 :             Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
     125              :             // tenant mgr startup distinguishes attaching from loading via marker file.
     126            0 :             Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
     127              :             // We only reach Active after successful load / attach.
     128              :             // So, call atttachment status Attached.
     129            0 :             Self::Active => Attached,
     130              :             // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
     131              :             // However, it also becomes Broken if the regular load fails.
     132              :             // From Console's perspective there's no practical difference
     133              :             // because attachment_status is polled by console only during attach operation execution.
     134            0 :             Self::Broken { reason, .. } => Failed {
     135            0 :                 reason: reason.to_owned(),
     136            0 :             },
     137              :             // Why is Stopping a Maybe case? Because, during pageserver shutdown,
     138              :             // we set the Stopping state irrespective of whether the tenant
     139              :             // has finished attaching or not.
     140            0 :             Self::Stopping { .. } => Maybe,
     141              :         }
     142            0 :     }
     143              : 
     144            0 :     pub fn broken_from_reason(reason: String) -> Self {
     145            0 :         let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture());
     146            0 :         Self::Broken {
     147            0 :             reason,
     148            0 :             backtrace: backtrace_str,
     149            0 :         }
     150            0 :     }
     151              : }
     152              : 
     153              : impl std::fmt::Debug for TenantState {
     154            4 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     155            4 :         match self {
     156            4 :             Self::Broken { reason, backtrace } if !reason.is_empty() => {
     157            4 :                 write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}")
     158              :             }
     159            0 :             _ => write!(f, "{self}"),
     160              :         }
     161            4 :     }
     162              : }
     163              : 
     164              : /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
     165            8 : #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     166              : pub enum ActivatingFrom {
     167              :     /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
     168              :     Loading,
     169              :     /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
     170              :     Attaching,
     171              : }
     172              : 
     173              : /// A state of a timeline in pageserver's memory.
     174            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     175              : pub enum TimelineState {
     176              :     /// The timeline is recognized by the pageserver but is not yet operational.
     177              :     /// In particular, the walreceiver connection loop is not running for this timeline.
     178              :     /// It will eventually transition to state Active or Broken.
     179              :     Loading,
     180              :     /// The timeline is fully operational.
     181              :     /// It can be queried, and the walreceiver connection loop is running.
     182              :     Active,
     183              :     /// The timeline was previously Loading or Active but is shutting down.
     184              :     /// It cannot transition back into any other state.
     185              :     Stopping,
     186              :     /// The timeline is broken and not operational (previous states: Loading or Active).
     187              :     Broken { reason: String, backtrace: String },
     188              : }
     189              : 
     190            0 : #[derive(Serialize, Deserialize, Clone)]
     191              : pub struct TimelineCreateRequest {
     192              :     pub new_timeline_id: TimelineId,
     193              :     #[serde(default)]
     194              :     pub ancestor_timeline_id: Option<TimelineId>,
     195              :     #[serde(default)]
     196              :     pub existing_initdb_timeline_id: Option<TimelineId>,
     197              :     #[serde(default)]
     198              :     pub ancestor_start_lsn: Option<Lsn>,
     199              :     pub pg_version: Option<u32>,
     200              : }
     201              : 
     202            0 : #[derive(Serialize, Deserialize)]
     203              : pub struct TenantShardSplitRequest {
     204              :     pub new_shard_count: u8,
     205              : 
     206              :     // A tenant's stripe size is only meaningful the first time their shard count goes
     207              :     // above 1: therefore during a split from 1->N shards, we may modify the stripe size.
     208              :     //
     209              :     // If this is set while the stripe count is being increased from an already >1 value,
     210              :     // then the request will fail with 400.
     211              :     pub new_stripe_size: Option<ShardStripeSize>,
     212              : }
     213              : 
     214            0 : #[derive(Serialize, Deserialize)]
     215              : pub struct TenantShardSplitResponse {
     216              :     pub new_shards: Vec<TenantShardId>,
     217              : }
     218              : 
     219              : /// Parameters that apply to all shards in a tenant.  Used during tenant creation.
     220            0 : #[derive(Serialize, Deserialize, Debug)]
     221              : #[serde(deny_unknown_fields)]
     222              : pub struct ShardParameters {
     223              :     pub count: ShardCount,
     224              :     pub stripe_size: ShardStripeSize,
     225              : }
     226              : 
     227              : impl ShardParameters {
     228              :     pub const DEFAULT_STRIPE_SIZE: ShardStripeSize = ShardStripeSize(256 * 1024 / 8);
     229              : 
     230            0 :     pub fn is_unsharded(&self) -> bool {
     231            0 :         self.count.is_unsharded()
     232            0 :     }
     233              : }
     234              : 
     235              : impl Default for ShardParameters {
     236          120 :     fn default() -> Self {
     237          120 :         Self {
     238          120 :             count: ShardCount::new(0),
     239          120 :             stripe_size: Self::DEFAULT_STRIPE_SIZE,
     240          120 :         }
     241          120 :     }
     242              : }
     243              : 
     244            6 : #[derive(Serialize, Deserialize, Debug)]
     245              : #[serde(deny_unknown_fields)]
     246              : pub struct TenantCreateRequest {
     247              :     pub new_tenant_id: TenantShardId,
     248              :     #[serde(default)]
     249              :     #[serde(skip_serializing_if = "Option::is_none")]
     250              :     pub generation: Option<u32>,
     251              : 
     252              :     // If omitted, create a single shard with TenantShardId::unsharded()
     253              :     #[serde(default)]
     254              :     #[serde(skip_serializing_if = "ShardParameters::is_unsharded")]
     255              :     pub shard_parameters: ShardParameters,
     256              : 
     257              :     // This parameter is only meaningful in requests sent to the storage controller
     258              :     #[serde(default)]
     259              :     #[serde(skip_serializing_if = "Option::is_none")]
     260              :     pub placement_policy: Option<PlacementPolicy>,
     261              : 
     262              :     #[serde(flatten)]
     263              :     pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
     264              : }
     265              : 
     266            0 : #[derive(Deserialize, Debug)]
     267              : #[serde(deny_unknown_fields)]
     268              : pub struct TenantLoadRequest {
     269              :     #[serde(default)]
     270              :     #[serde(skip_serializing_if = "Option::is_none")]
     271              :     pub generation: Option<u32>,
     272              : }
     273              : 
     274              : impl std::ops::Deref for TenantCreateRequest {
     275              :     type Target = TenantConfig;
     276              : 
     277            0 :     fn deref(&self) -> &Self::Target {
     278            0 :         &self.config
     279            0 :     }
     280              : }
     281              : 
     282              : /// An alternative representation of `pageserver::tenant::TenantConf` with
     283              : /// simpler types.
     284            6 : #[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
     285              : pub struct TenantConfig {
     286              :     pub checkpoint_distance: Option<u64>,
     287              :     pub checkpoint_timeout: Option<String>,
     288              :     pub compaction_target_size: Option<u64>,
     289              :     pub compaction_period: Option<String>,
     290              :     pub compaction_threshold: Option<usize>,
     291              :     // defer parsing compaction_algorithm, like eviction_policy
     292              :     pub compaction_algorithm: Option<CompactionAlgorithm>,
     293              :     pub gc_horizon: Option<u64>,
     294              :     pub gc_period: Option<String>,
     295              :     pub image_creation_threshold: Option<usize>,
     296              :     pub pitr_interval: Option<String>,
     297              :     pub walreceiver_connect_timeout: Option<String>,
     298              :     pub lagging_wal_timeout: Option<String>,
     299              :     pub max_lsn_wal_lag: Option<NonZeroU64>,
     300              :     pub trace_read_requests: Option<bool>,
     301              :     pub eviction_policy: Option<EvictionPolicy>,
     302              :     pub min_resident_size_override: Option<u64>,
     303              :     pub evictions_low_residence_duration_metric_threshold: Option<String>,
     304              :     pub heatmap_period: Option<String>,
     305              :     pub lazy_slru_download: Option<bool>,
     306              :     pub timeline_get_throttle: Option<ThrottleConfig>,
     307              :     pub image_layer_creation_check_threshold: Option<u8>,
     308              :     pub switch_aux_file_policy: Option<AuxFilePolicy>,
     309              : }
     310              : 
     311            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     312              : pub enum AuxFilePolicy {
     313              :     V1,
     314              :     V2,
     315              :     CrossValidation,
     316              : }
     317              : 
     318              : impl FromStr for AuxFilePolicy {
     319              :     type Err = anyhow::Error;
     320              : 
     321            0 :     fn from_str(s: &str) -> Result<Self, Self::Err> {
     322            0 :         let s = s.to_lowercase();
     323            0 :         if s == "v1" {
     324            0 :             Ok(Self::V1)
     325            0 :         } else if s == "v2" {
     326            0 :             Ok(Self::V2)
     327            0 :         } else if s == "crossvalidation" || s == "cross_validation" {
     328            0 :             Ok(Self::CrossValidation)
     329              :         } else {
     330            0 :             anyhow::bail!("cannot parse {} to aux file policy", s)
     331              :         }
     332            0 :     }
     333              : }
     334              : 
     335            4 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     336              : #[serde(tag = "kind")]
     337              : pub enum EvictionPolicy {
     338              :     NoEviction,
     339              :     LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
     340              :     OnlyImitiate(EvictionPolicyLayerAccessThreshold),
     341              : }
     342              : 
     343              : impl EvictionPolicy {
     344            0 :     pub fn discriminant_str(&self) -> &'static str {
     345            0 :         match self {
     346            0 :             EvictionPolicy::NoEviction => "NoEviction",
     347            0 :             EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
     348            0 :             EvictionPolicy::OnlyImitiate(_) => "OnlyImitiate",
     349              :         }
     350            0 :     }
     351              : }
     352              : 
     353            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     354              : #[serde(tag = "kind")]
     355              : pub enum CompactionAlgorithm {
     356              :     Legacy,
     357              :     Tiered,
     358              : }
     359              : 
     360           20 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     361              : pub struct EvictionPolicyLayerAccessThreshold {
     362              :     #[serde(with = "humantime_serde")]
     363              :     pub period: Duration,
     364              :     #[serde(with = "humantime_serde")]
     365              :     pub threshold: Duration,
     366              : }
     367              : 
     368            0 : #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
     369              : pub struct ThrottleConfig {
     370              :     pub task_kinds: Vec<String>, // TaskKind
     371              :     pub initial: usize,
     372              :     #[serde(with = "humantime_serde")]
     373              :     pub refill_interval: Duration,
     374              :     pub refill_amount: NonZeroUsize,
     375              :     pub max: usize,
     376              :     pub fair: bool,
     377              : }
     378              : 
     379              : impl ThrottleConfig {
     380          284 :     pub fn disabled() -> Self {
     381          284 :         Self {
     382          284 :             task_kinds: vec![], // effectively disables the throttle
     383          284 :             // other values don't matter with emtpy `task_kinds`.
     384          284 :             initial: 0,
     385          284 :             refill_interval: Duration::from_millis(1),
     386          284 :             refill_amount: NonZeroUsize::new(1).unwrap(),
     387          284 :             max: 1,
     388          284 :             fair: true,
     389          284 :         }
     390          284 :     }
     391              :     /// The requests per second allowed  by the given config.
     392            0 :     pub fn steady_rps(&self) -> f64 {
     393            0 :         (self.refill_amount.get() as f64) / (self.refill_interval.as_secs_f64())
     394            0 :     }
     395              : }
     396              : 
     397              : /// A flattened analog of a `pagesever::tenant::LocationMode`, which
     398              : /// lists out all possible states (and the virtual "Detached" state)
     399              : /// in a flat form rather than using rust-style enums.
     400            0 : #[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
     401              : pub enum LocationConfigMode {
     402              :     AttachedSingle,
     403              :     AttachedMulti,
     404              :     AttachedStale,
     405              :     Secondary,
     406              :     Detached,
     407              : }
     408              : 
     409            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
     410              : pub struct LocationConfigSecondary {
     411              :     pub warm: bool,
     412              : }
     413              : 
     414              : /// An alternative representation of `pageserver::tenant::LocationConf`,
     415              : /// for use in external-facing APIs.
     416            0 : #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)]
     417              : pub struct LocationConfig {
     418              :     pub mode: LocationConfigMode,
     419              :     /// If attaching, in what generation?
     420              :     #[serde(default)]
     421              :     pub generation: Option<u32>,
     422              : 
     423              :     // If requesting mode `Secondary`, configuration for that.
     424              :     #[serde(default)]
     425              :     pub secondary_conf: Option<LocationConfigSecondary>,
     426              : 
     427              :     // Shard parameters: if shard_count is nonzero, then other shard_* fields
     428              :     // must be set accurately.
     429              :     #[serde(default)]
     430              :     pub shard_number: u8,
     431              :     #[serde(default)]
     432              :     pub shard_count: u8,
     433              :     #[serde(default)]
     434              :     pub shard_stripe_size: u32,
     435              : 
     436              :     // This configuration only affects attached mode, but should be provided irrespective
     437              :     // of the mode, as a secondary location might transition on startup if the response
     438              :     // to the `/re-attach` control plane API requests it.
     439              :     pub tenant_conf: TenantConfig,
     440              : }
     441              : 
     442            0 : #[derive(Serialize, Deserialize)]
     443              : pub struct LocationConfigListResponse {
     444              :     pub tenant_shards: Vec<(TenantShardId, Option<LocationConfig>)>,
     445              : }
     446              : 
     447            0 : #[derive(Serialize, Deserialize)]
     448              : #[serde(transparent)]
     449              : pub struct TenantCreateResponse(pub TenantId);
     450              : 
     451              : #[derive(Serialize)]
     452              : pub struct StatusResponse {
     453              :     pub id: NodeId,
     454              : }
     455              : 
     456            0 : #[derive(Serialize, Deserialize, Debug)]
     457              : #[serde(deny_unknown_fields)]
     458              : pub struct TenantLocationConfigRequest {
     459              :     #[serde(flatten)]
     460              :     pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
     461              : }
     462              : 
     463            0 : #[derive(Serialize, Deserialize, Debug)]
     464              : #[serde(deny_unknown_fields)]
     465              : pub struct TenantTimeTravelRequest {
     466              :     pub shard_counts: Vec<ShardCount>,
     467              : }
     468              : 
     469            0 : #[derive(Serialize, Deserialize, Debug)]
     470              : #[serde(deny_unknown_fields)]
     471              : pub struct TenantShardLocation {
     472              :     pub shard_id: TenantShardId,
     473              :     pub node_id: NodeId,
     474              : }
     475              : 
     476            0 : #[derive(Serialize, Deserialize, Debug)]
     477              : #[serde(deny_unknown_fields)]
     478              : pub struct TenantLocationConfigResponse {
     479              :     pub shards: Vec<TenantShardLocation>,
     480              :     // If the shards' ShardCount count is >1, stripe_size will be set.
     481              :     pub stripe_size: Option<ShardStripeSize>,
     482              : }
     483              : 
     484            6 : #[derive(Serialize, Deserialize, Debug)]
     485              : #[serde(deny_unknown_fields)]
     486              : pub struct TenantConfigRequest {
     487              :     pub tenant_id: TenantId,
     488              :     #[serde(flatten)]
     489              :     pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
     490              : }
     491              : 
     492              : impl std::ops::Deref for TenantConfigRequest {
     493              :     type Target = TenantConfig;
     494              : 
     495            0 :     fn deref(&self) -> &Self::Target {
     496            0 :         &self.config
     497            0 :     }
     498              : }
     499              : 
     500              : impl TenantConfigRequest {
     501            0 :     pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
     502            0 :         let config = TenantConfig::default();
     503            0 :         TenantConfigRequest { tenant_id, config }
     504            0 :     }
     505              : }
     506              : 
     507            6 : #[derive(Debug, Deserialize)]
     508              : pub struct TenantAttachRequest {
     509              :     #[serde(default)]
     510              :     pub config: TenantAttachConfig,
     511              :     #[serde(default)]
     512              :     pub generation: Option<u32>,
     513              : }
     514              : 
     515              : /// Newtype to enforce deny_unknown_fields on TenantConfig for
     516              : /// its usage inside `TenantAttachRequest`.
     517            2 : #[derive(Debug, Serialize, Deserialize, Default)]
     518              : #[serde(deny_unknown_fields)]
     519              : pub struct TenantAttachConfig {
     520              :     #[serde(flatten)]
     521              :     allowing_unknown_fields: TenantConfig,
     522              : }
     523              : 
     524              : impl std::ops::Deref for TenantAttachConfig {
     525              :     type Target = TenantConfig;
     526              : 
     527            0 :     fn deref(&self) -> &Self::Target {
     528            0 :         &self.allowing_unknown_fields
     529            0 :     }
     530              : }
     531              : 
     532              : /// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
     533            0 : #[derive(Serialize, Deserialize, Clone)]
     534              : #[serde(tag = "slug", content = "data", rename_all = "snake_case")]
     535              : pub enum TenantAttachmentStatus {
     536              :     Maybe,
     537              :     Attached,
     538              :     Failed { reason: String },
     539              : }
     540              : 
     541            0 : #[derive(Serialize, Deserialize, Clone)]
     542              : pub struct TenantInfo {
     543              :     pub id: TenantShardId,
     544              :     // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
     545              :     pub state: TenantState,
     546              :     /// Sum of the size of all layer files.
     547              :     /// If a layer is present in both local FS and S3, it counts only once.
     548              :     pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
     549              :     pub attachment_status: TenantAttachmentStatus,
     550              :     #[serde(skip_serializing_if = "Option::is_none")]
     551              :     pub generation: Option<u32>,
     552              : }
     553              : 
     554            0 : #[derive(Serialize, Deserialize, Clone)]
     555              : pub struct TenantDetails {
     556              :     #[serde(flatten)]
     557              :     pub tenant_info: TenantInfo,
     558              : 
     559              :     pub walredo: Option<WalRedoManagerStatus>,
     560              : 
     561              :     pub timelines: Vec<TimelineId>,
     562              : }
     563              : 
     564              : /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
     565            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     566              : pub struct TimelineInfo {
     567              :     pub tenant_id: TenantShardId,
     568              :     pub timeline_id: TimelineId,
     569              : 
     570              :     pub ancestor_timeline_id: Option<TimelineId>,
     571              :     pub ancestor_lsn: Option<Lsn>,
     572              :     pub last_record_lsn: Lsn,
     573              :     pub prev_record_lsn: Option<Lsn>,
     574              :     pub latest_gc_cutoff_lsn: Lsn,
     575              :     pub disk_consistent_lsn: Lsn,
     576              : 
     577              :     /// The LSN that we have succesfully uploaded to remote storage
     578              :     pub remote_consistent_lsn: Lsn,
     579              : 
     580              :     /// The LSN that we are advertizing to safekeepers
     581              :     pub remote_consistent_lsn_visible: Lsn,
     582              : 
     583              :     /// The LSN from the start of the root timeline (never changes)
     584              :     pub initdb_lsn: Lsn,
     585              : 
     586              :     pub current_logical_size: u64,
     587              :     pub current_logical_size_is_accurate: bool,
     588              : 
     589              :     pub directory_entries_counts: Vec<u64>,
     590              : 
     591              :     /// Sum of the size of all layer files.
     592              :     /// If a layer is present in both local FS and S3, it counts only once.
     593              :     pub current_physical_size: Option<u64>, // is None when timeline is Unloaded
     594              :     pub current_logical_size_non_incremental: Option<u64>,
     595              : 
     596              :     pub timeline_dir_layer_file_size_sum: Option<u64>,
     597              : 
     598              :     pub wal_source_connstr: Option<String>,
     599              :     pub last_received_msg_lsn: Option<Lsn>,
     600              :     /// the timestamp (in microseconds) of the last received message
     601              :     pub last_received_msg_ts: Option<u128>,
     602              :     pub pg_version: u32,
     603              : 
     604              :     pub state: TimelineState,
     605              : 
     606              :     pub walreceiver_status: String,
     607              : }
     608              : 
     609            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     610              : pub struct LayerMapInfo {
     611              :     pub in_memory_layers: Vec<InMemoryLayerInfo>,
     612              :     pub historic_layers: Vec<HistoricLayerInfo>,
     613              : }
     614              : 
     615            0 : #[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, Serialize, Deserialize, enum_map::Enum)]
     616              : #[repr(usize)]
     617              : pub enum LayerAccessKind {
     618              :     GetValueReconstructData,
     619              :     Iter,
     620              :     KeyIter,
     621              :     Dump,
     622              : }
     623              : 
     624            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     625              : pub struct LayerAccessStatFullDetails {
     626              :     pub when_millis_since_epoch: u64,
     627              :     pub task_kind: Cow<'static, str>,
     628              :     pub access_kind: LayerAccessKind,
     629              : }
     630              : 
     631              : /// An event that impacts the layer's residence status.
     632              : #[serde_as]
     633            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     634              : pub struct LayerResidenceEvent {
     635              :     /// The time when the event occurred.
     636              :     /// NB: this timestamp is captured while the residence status changes.
     637              :     /// So, it might be behind/ahead of the actual residence change by a short amount of time.
     638              :     ///
     639              :     #[serde(rename = "timestamp_millis_since_epoch")]
     640              :     #[serde_as(as = "serde_with::TimestampMilliSeconds")]
     641              :     pub timestamp: SystemTime,
     642              :     /// The new residence status of the layer.
     643              :     pub status: LayerResidenceStatus,
     644              :     /// The reason why we had to record this event.
     645              :     pub reason: LayerResidenceEventReason,
     646              : }
     647              : 
     648              : /// The reason for recording a given [`LayerResidenceEvent`].
     649            0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
     650              : pub enum LayerResidenceEventReason {
     651              :     /// The layer map is being populated, e.g. during timeline load or attach.
     652              :     /// This includes [`RemoteLayer`] objects created in [`reconcile_with_remote`].
     653              :     /// We need to record such events because there is no persistent storage for the events.
     654              :     ///
     655              :     // https://github.com/rust-lang/rust/issues/74481
     656              :     /// [`RemoteLayer`]: ../../tenant/storage_layer/struct.RemoteLayer.html
     657              :     /// [`reconcile_with_remote`]: ../../tenant/struct.Timeline.html#method.reconcile_with_remote
     658              :     LayerLoad,
     659              :     /// We just created the layer (e.g., freeze_and_flush or compaction).
     660              :     /// Such layers are always [`LayerResidenceStatus::Resident`].
     661              :     LayerCreate,
     662              :     /// We on-demand downloaded or evicted the given layer.
     663              :     ResidenceChange,
     664              : }
     665              : 
     666              : /// The residence status of the layer, after the given [`LayerResidenceEvent`].
     667            0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
     668              : pub enum LayerResidenceStatus {
     669              :     /// Residence status for a layer file that exists locally.
     670              :     /// It may also exist on the remote, we don't care here.
     671              :     Resident,
     672              :     /// Residence status for a layer file that only exists on the remote.
     673              :     Evicted,
     674              : }
     675              : 
     676              : impl LayerResidenceEvent {
     677         2452 :     pub fn new(status: LayerResidenceStatus, reason: LayerResidenceEventReason) -> Self {
     678         2452 :         Self {
     679         2452 :             status,
     680         2452 :             reason,
     681         2452 :             timestamp: SystemTime::now(),
     682         2452 :         }
     683         2452 :     }
     684              : }
     685              : 
     686            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     687              : pub struct LayerAccessStats {
     688              :     pub access_count_by_access_kind: HashMap<LayerAccessKind, u64>,
     689              :     pub task_kind_access_flag: Vec<Cow<'static, str>>,
     690              :     pub first: Option<LayerAccessStatFullDetails>,
     691              :     pub accesses_history: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
     692              :     pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
     693              : }
     694              : 
     695            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     696              : #[serde(tag = "kind")]
     697              : pub enum InMemoryLayerInfo {
     698              :     Open { lsn_start: Lsn },
     699              :     Frozen { lsn_start: Lsn, lsn_end: Lsn },
     700              : }
     701              : 
     702            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     703              : #[serde(tag = "kind")]
     704              : pub enum HistoricLayerInfo {
     705              :     Delta {
     706              :         layer_file_name: String,
     707              :         layer_file_size: u64,
     708              : 
     709              :         lsn_start: Lsn,
     710              :         lsn_end: Lsn,
     711              :         remote: bool,
     712              :         access_stats: LayerAccessStats,
     713              :     },
     714              :     Image {
     715              :         layer_file_name: String,
     716              :         layer_file_size: u64,
     717              : 
     718              :         lsn_start: Lsn,
     719              :         remote: bool,
     720              :         access_stats: LayerAccessStats,
     721              :     },
     722              : }
     723              : 
     724              : impl HistoricLayerInfo {
     725            0 :     pub fn layer_file_name(&self) -> &str {
     726            0 :         match self {
     727              :             HistoricLayerInfo::Delta {
     728            0 :                 layer_file_name, ..
     729            0 :             } => layer_file_name,
     730              :             HistoricLayerInfo::Image {
     731            0 :                 layer_file_name, ..
     732            0 :             } => layer_file_name,
     733              :         }
     734            0 :     }
     735            0 :     pub fn is_remote(&self) -> bool {
     736            0 :         match self {
     737            0 :             HistoricLayerInfo::Delta { remote, .. } => *remote,
     738            0 :             HistoricLayerInfo::Image { remote, .. } => *remote,
     739              :         }
     740            0 :     }
     741            0 :     pub fn set_remote(&mut self, value: bool) {
     742            0 :         let field = match self {
     743            0 :             HistoricLayerInfo::Delta { remote, .. } => remote,
     744            0 :             HistoricLayerInfo::Image { remote, .. } => remote,
     745              :         };
     746            0 :         *field = value;
     747            0 :     }
     748              : }
     749              : 
     750            0 : #[derive(Debug, Serialize, Deserialize)]
     751              : pub struct DownloadRemoteLayersTaskSpawnRequest {
     752              :     pub max_concurrent_downloads: NonZeroUsize,
     753              : }
     754              : 
     755            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     756              : pub struct DownloadRemoteLayersTaskInfo {
     757              :     pub task_id: String,
     758              :     pub state: DownloadRemoteLayersTaskState,
     759              :     pub total_layer_count: u64,         // stable once `completed`
     760              :     pub successful_download_count: u64, // stable once `completed`
     761              :     pub failed_download_count: u64,     // stable once `completed`
     762              : }
     763              : 
     764            0 : #[derive(Debug, Serialize, Deserialize, Clone)]
     765              : pub enum DownloadRemoteLayersTaskState {
     766              :     Running,
     767              :     Completed,
     768              :     ShutDown,
     769              : }
     770              : 
     771            0 : #[derive(Debug, Serialize, Deserialize)]
     772              : pub struct TimelineGcRequest {
     773              :     pub gc_horizon: Option<u64>,
     774              : }
     775              : 
     776            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     777              : pub struct WalRedoManagerProcessStatus {
     778              :     pub pid: u32,
     779              :     /// The strum-generated `into::<&'static str>()` for `pageserver::walredo::ProcessKind`.
     780              :     /// `ProcessKind` are a transitory thing, so, they have no enum representation in `pageserver_api`.
     781              :     pub kind: Cow<'static, str>,
     782              : }
     783              : 
     784            0 : #[derive(Debug, Clone, Serialize, Deserialize)]
     785              : pub struct WalRedoManagerStatus {
     786              :     pub last_redo_at: Option<chrono::DateTime<chrono::Utc>>,
     787              :     pub process: Option<WalRedoManagerProcessStatus>,
     788              : }
     789              : 
     790              : /// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating
     791              : /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
     792              : /// what's happening.
     793            0 : #[derive(Default, Debug, Serialize, Deserialize, Clone)]
     794              : pub struct SecondaryProgress {
     795              :     /// The remote storage LastModified time of the heatmap object we last downloaded.
     796              :     pub heatmap_mtime: Option<serde_system_time::SystemTime>,
     797              : 
     798              :     /// The number of layers currently on-disk
     799              :     pub layers_downloaded: usize,
     800              :     /// The number of layers in the most recently seen heatmap
     801              :     pub layers_total: usize,
     802              : 
     803              :     /// The number of layer bytes currently on-disk
     804              :     pub bytes_downloaded: u64,
     805              :     /// The number of layer bytes in the most recently seen heatmap
     806              :     pub bytes_total: u64,
     807              : }
     808              : 
     809            0 : #[derive(Serialize, Deserialize, Debug)]
     810              : pub struct TenantScanRemoteStorageShard {
     811              :     pub tenant_shard_id: TenantShardId,
     812              :     pub generation: Option<u32>,
     813              : }
     814              : 
     815            0 : #[derive(Serialize, Deserialize, Debug, Default)]
     816              : pub struct TenantScanRemoteStorageResponse {
     817              :     pub shards: Vec<TenantScanRemoteStorageShard>,
     818              : }
     819              : 
     820              : pub mod virtual_file {
     821              :     #[derive(
     822              :         Copy,
     823              :         Clone,
     824              :         PartialEq,
     825              :         Eq,
     826              :         Hash,
     827          276 :         strum_macros::EnumString,
     828            0 :         strum_macros::Display,
     829            0 :         serde_with::DeserializeFromStr,
     830              :         serde_with::SerializeDisplay,
     831              :         Debug,
     832              :     )]
     833              :     #[strum(serialize_all = "kebab-case")]
     834              :     pub enum IoEngineKind {
     835              :         StdFs,
     836              :         #[cfg(target_os = "linux")]
     837              :         TokioEpollUring,
     838              :     }
     839              : }
     840              : 
     841              : // Wrapped in libpq CopyData
     842              : #[derive(PartialEq, Eq, Debug)]
     843              : pub enum PagestreamFeMessage {
     844              :     Exists(PagestreamExistsRequest),
     845              :     Nblocks(PagestreamNblocksRequest),
     846              :     GetPage(PagestreamGetPageRequest),
     847              :     DbSize(PagestreamDbSizeRequest),
     848              :     GetSlruSegment(PagestreamGetSlruSegmentRequest),
     849              : }
     850              : 
     851              : // Wrapped in libpq CopyData
     852            0 : #[derive(strum_macros::EnumProperty)]
     853              : pub enum PagestreamBeMessage {
     854              :     Exists(PagestreamExistsResponse),
     855              :     Nblocks(PagestreamNblocksResponse),
     856              :     GetPage(PagestreamGetPageResponse),
     857              :     Error(PagestreamErrorResponse),
     858              :     DbSize(PagestreamDbSizeResponse),
     859              :     GetSlruSegment(PagestreamGetSlruSegmentResponse),
     860              : }
     861              : 
     862              : // Keep in sync with `pagestore_client.h`
     863              : #[repr(u8)]
     864              : enum PagestreamBeMessageTag {
     865              :     Exists = 100,
     866              :     Nblocks = 101,
     867              :     GetPage = 102,
     868              :     Error = 103,
     869              :     DbSize = 104,
     870              :     GetSlruSegment = 105,
     871              : }
     872              : impl TryFrom<u8> for PagestreamBeMessageTag {
     873              :     type Error = u8;
     874            0 :     fn try_from(value: u8) -> Result<Self, u8> {
     875            0 :         match value {
     876            0 :             100 => Ok(PagestreamBeMessageTag::Exists),
     877            0 :             101 => Ok(PagestreamBeMessageTag::Nblocks),
     878            0 :             102 => Ok(PagestreamBeMessageTag::GetPage),
     879            0 :             103 => Ok(PagestreamBeMessageTag::Error),
     880            0 :             104 => Ok(PagestreamBeMessageTag::DbSize),
     881            0 :             105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
     882            0 :             _ => Err(value),
     883              :         }
     884            0 :     }
     885              : }
     886              : 
     887              : // In the V2 protocol version, a GetPage request contains two LSN values:
     888              : //
     889              : // request_lsn: Get the page version at this point in time.  Lsn::Max is a special value that means
     890              : // "get the latest version present". It's used by the primary server, which knows that no one else
     891              : // is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
     892              : // Lsn::Max. Standby servers use the current replay LSN as the request LSN.
     893              : //
     894              : // not_modified_since: Hint to the pageserver that the client knows that the page has not been
     895              : // modified between 'not_modified_since' and the request LSN. It's always correct to set
     896              : // 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
     897              : // passing an earlier LSN can speed up the request, by allowing the pageserver to process the
     898              : // request without waiting for 'request_lsn' to arrive.
     899              : //
     900              : // The legacy V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
     901              : // sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
     902              : // 'latest' was set to true. The V2 interface was added because there was no correct way for a
     903              : // standby to request a page at a particular non-latest LSN, and also include the
     904              : // 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
     905              : // request, if the standby knows that the page hasn't been modified since, and risk getting an error
     906              : // if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
     907              : // require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
     908              : // interface allows sending both LSNs, and let the pageserver do the right thing. There is no
     909              : // difference in the responses between V1 and V2.
     910              : //
     911              : // The Request structs below reflect the V2 interface. If V1 is used, the parse function
     912              : // maps the old format requests to the new format.
     913              : //
     914              : #[derive(Clone, Copy)]
     915              : pub enum PagestreamProtocolVersion {
     916              :     V1,
     917              :     V2,
     918              : }
     919              : 
     920              : #[derive(Debug, PartialEq, Eq)]
     921              : pub struct PagestreamExistsRequest {
     922              :     pub request_lsn: Lsn,
     923              :     pub not_modified_since: Lsn,
     924              :     pub rel: RelTag,
     925              : }
     926              : 
     927              : #[derive(Debug, PartialEq, Eq)]
     928              : pub struct PagestreamNblocksRequest {
     929              :     pub request_lsn: Lsn,
     930              :     pub not_modified_since: Lsn,
     931              :     pub rel: RelTag,
     932              : }
     933              : 
     934              : #[derive(Debug, PartialEq, Eq)]
     935              : pub struct PagestreamGetPageRequest {
     936              :     pub request_lsn: Lsn,
     937              :     pub not_modified_since: Lsn,
     938              :     pub rel: RelTag,
     939              :     pub blkno: u32,
     940              : }
     941              : 
     942              : #[derive(Debug, PartialEq, Eq)]
     943              : pub struct PagestreamDbSizeRequest {
     944              :     pub request_lsn: Lsn,
     945              :     pub not_modified_since: Lsn,
     946              :     pub dbnode: u32,
     947              : }
     948              : 
     949              : #[derive(Debug, PartialEq, Eq)]
     950              : pub struct PagestreamGetSlruSegmentRequest {
     951              :     pub request_lsn: Lsn,
     952              :     pub not_modified_since: Lsn,
     953              :     pub kind: u8,
     954              :     pub segno: u32,
     955              : }
     956              : 
     957              : #[derive(Debug)]
     958              : pub struct PagestreamExistsResponse {
     959              :     pub exists: bool,
     960              : }
     961              : 
     962              : #[derive(Debug)]
     963              : pub struct PagestreamNblocksResponse {
     964              :     pub n_blocks: u32,
     965              : }
     966              : 
     967              : #[derive(Debug)]
     968              : pub struct PagestreamGetPageResponse {
     969              :     pub page: Bytes,
     970              : }
     971              : 
     972              : #[derive(Debug)]
     973              : pub struct PagestreamGetSlruSegmentResponse {
     974              :     pub segment: Bytes,
     975              : }
     976              : 
     977              : #[derive(Debug)]
     978              : pub struct PagestreamErrorResponse {
     979              :     pub message: String,
     980              : }
     981              : 
     982              : #[derive(Debug)]
     983              : pub struct PagestreamDbSizeResponse {
     984              :     pub db_size: i64,
     985              : }
     986              : 
     987              : // This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
     988              : // that require pageserver-internal types.  It is sufficient to get the total size.
     989            0 : #[derive(Serialize, Deserialize, Debug)]
     990              : pub struct TenantHistorySize {
     991              :     pub id: TenantId,
     992              :     /// Size is a mixture of WAL and logical size, so the unit is bytes.
     993              :     ///
     994              :     /// Will be none if `?inputs_only=true` was given.
     995              :     pub size: Option<u64>,
     996              : }
     997              : 
     998              : impl PagestreamFeMessage {
     999              :     /// Serialize a compute -> pageserver message. This is currently only used in testing
    1000              :     /// tools. Always uses protocol version 2.
    1001            8 :     pub fn serialize(&self) -> Bytes {
    1002            8 :         let mut bytes = BytesMut::new();
    1003            8 : 
    1004            8 :         match self {
    1005            2 :             Self::Exists(req) => {
    1006            2 :                 bytes.put_u8(0);
    1007            2 :                 bytes.put_u64(req.request_lsn.0);
    1008            2 :                 bytes.put_u64(req.not_modified_since.0);
    1009            2 :                 bytes.put_u32(req.rel.spcnode);
    1010            2 :                 bytes.put_u32(req.rel.dbnode);
    1011            2 :                 bytes.put_u32(req.rel.relnode);
    1012            2 :                 bytes.put_u8(req.rel.forknum);
    1013            2 :             }
    1014              : 
    1015            2 :             Self::Nblocks(req) => {
    1016            2 :                 bytes.put_u8(1);
    1017            2 :                 bytes.put_u64(req.request_lsn.0);
    1018            2 :                 bytes.put_u64(req.not_modified_since.0);
    1019            2 :                 bytes.put_u32(req.rel.spcnode);
    1020            2 :                 bytes.put_u32(req.rel.dbnode);
    1021            2 :                 bytes.put_u32(req.rel.relnode);
    1022            2 :                 bytes.put_u8(req.rel.forknum);
    1023            2 :             }
    1024              : 
    1025            2 :             Self::GetPage(req) => {
    1026            2 :                 bytes.put_u8(2);
    1027            2 :                 bytes.put_u64(req.request_lsn.0);
    1028            2 :                 bytes.put_u64(req.not_modified_since.0);
    1029            2 :                 bytes.put_u32(req.rel.spcnode);
    1030            2 :                 bytes.put_u32(req.rel.dbnode);
    1031            2 :                 bytes.put_u32(req.rel.relnode);
    1032            2 :                 bytes.put_u8(req.rel.forknum);
    1033            2 :                 bytes.put_u32(req.blkno);
    1034            2 :             }
    1035              : 
    1036            2 :             Self::DbSize(req) => {
    1037            2 :                 bytes.put_u8(3);
    1038            2 :                 bytes.put_u64(req.request_lsn.0);
    1039            2 :                 bytes.put_u64(req.not_modified_since.0);
    1040            2 :                 bytes.put_u32(req.dbnode);
    1041            2 :             }
    1042              : 
    1043            0 :             Self::GetSlruSegment(req) => {
    1044            0 :                 bytes.put_u8(4);
    1045            0 :                 bytes.put_u64(req.request_lsn.0);
    1046            0 :                 bytes.put_u64(req.not_modified_since.0);
    1047            0 :                 bytes.put_u8(req.kind);
    1048            0 :                 bytes.put_u32(req.segno);
    1049            0 :             }
    1050              :         }
    1051              : 
    1052            8 :         bytes.into()
    1053            8 :     }
    1054              : 
    1055            8 :     pub fn parse<R: std::io::Read>(
    1056            8 :         body: &mut R,
    1057            8 :         protocol_version: PagestreamProtocolVersion,
    1058            8 :     ) -> anyhow::Result<PagestreamFeMessage> {
    1059              :         // these correspond to the NeonMessageTag enum in pagestore_client.h
    1060              :         //
    1061              :         // TODO: consider using protobuf or serde bincode for less error prone
    1062              :         // serialization.
    1063            8 :         let msg_tag = body.read_u8()?;
    1064              : 
    1065            8 :         let (request_lsn, not_modified_since) = match protocol_version {
    1066              :             PagestreamProtocolVersion::V2 => (
    1067            8 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    1068            8 :                 Lsn::from(body.read_u64::<BigEndian>()?),
    1069              :             ),
    1070              :             PagestreamProtocolVersion::V1 => {
    1071              :                 // In the old protocol, each message starts with a boolean 'latest' flag,
    1072              :                 // followed by 'lsn'. Convert that to the two LSNs, 'request_lsn' and
    1073              :                 // 'not_modified_since', used in the new protocol version.
    1074            0 :                 let latest = body.read_u8()? != 0;
    1075            0 :                 let request_lsn = Lsn::from(body.read_u64::<BigEndian>()?);
    1076            0 :                 if latest {
    1077            0 :                     (Lsn::MAX, request_lsn) // get latest version
    1078              :                 } else {
    1079            0 :                     (request_lsn, request_lsn) // get version at specified LSN
    1080              :                 }
    1081              :             }
    1082              :         };
    1083              : 
    1084              :         // The rest of the messages are the same between V1 and V2
    1085            8 :         match msg_tag {
    1086              :             0 => Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
    1087            2 :                 request_lsn,
    1088            2 :                 not_modified_since,
    1089            2 :                 rel: RelTag {
    1090            2 :                     spcnode: body.read_u32::<BigEndian>()?,
    1091            2 :                     dbnode: body.read_u32::<BigEndian>()?,
    1092            2 :                     relnode: body.read_u32::<BigEndian>()?,
    1093            2 :                     forknum: body.read_u8()?,
    1094              :                 },
    1095              :             })),
    1096              :             1 => Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    1097            2 :                 request_lsn,
    1098            2 :                 not_modified_since,
    1099            2 :                 rel: RelTag {
    1100            2 :                     spcnode: body.read_u32::<BigEndian>()?,
    1101            2 :                     dbnode: body.read_u32::<BigEndian>()?,
    1102            2 :                     relnode: body.read_u32::<BigEndian>()?,
    1103            2 :                     forknum: body.read_u8()?,
    1104              :                 },
    1105              :             })),
    1106              :             2 => Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    1107            2 :                 request_lsn,
    1108            2 :                 not_modified_since,
    1109            2 :                 rel: RelTag {
    1110            2 :                     spcnode: body.read_u32::<BigEndian>()?,
    1111            2 :                     dbnode: body.read_u32::<BigEndian>()?,
    1112            2 :                     relnode: body.read_u32::<BigEndian>()?,
    1113            2 :                     forknum: body.read_u8()?,
    1114              :                 },
    1115            2 :                 blkno: body.read_u32::<BigEndian>()?,
    1116              :             })),
    1117              :             3 => Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    1118            2 :                 request_lsn,
    1119            2 :                 not_modified_since,
    1120            2 :                 dbnode: body.read_u32::<BigEndian>()?,
    1121              :             })),
    1122              :             4 => Ok(PagestreamFeMessage::GetSlruSegment(
    1123              :                 PagestreamGetSlruSegmentRequest {
    1124            0 :                     request_lsn,
    1125            0 :                     not_modified_since,
    1126            0 :                     kind: body.read_u8()?,
    1127            0 :                     segno: body.read_u32::<BigEndian>()?,
    1128              :                 },
    1129              :             )),
    1130            0 :             _ => bail!("unknown smgr message tag: {:?}", msg_tag),
    1131              :         }
    1132            8 :     }
    1133              : }
    1134              : 
    1135              : impl PagestreamBeMessage {
    1136            0 :     pub fn serialize(&self) -> Bytes {
    1137            0 :         let mut bytes = BytesMut::new();
    1138            0 : 
    1139            0 :         use PagestreamBeMessageTag as Tag;
    1140            0 :         match self {
    1141            0 :             Self::Exists(resp) => {
    1142            0 :                 bytes.put_u8(Tag::Exists as u8);
    1143            0 :                 bytes.put_u8(resp.exists as u8);
    1144            0 :             }
    1145              : 
    1146            0 :             Self::Nblocks(resp) => {
    1147            0 :                 bytes.put_u8(Tag::Nblocks as u8);
    1148            0 :                 bytes.put_u32(resp.n_blocks);
    1149            0 :             }
    1150              : 
    1151            0 :             Self::GetPage(resp) => {
    1152            0 :                 bytes.put_u8(Tag::GetPage as u8);
    1153            0 :                 bytes.put(&resp.page[..]);
    1154            0 :             }
    1155              : 
    1156            0 :             Self::Error(resp) => {
    1157            0 :                 bytes.put_u8(Tag::Error as u8);
    1158            0 :                 bytes.put(resp.message.as_bytes());
    1159            0 :                 bytes.put_u8(0); // null terminator
    1160            0 :             }
    1161            0 :             Self::DbSize(resp) => {
    1162            0 :                 bytes.put_u8(Tag::DbSize as u8);
    1163            0 :                 bytes.put_i64(resp.db_size);
    1164            0 :             }
    1165              : 
    1166            0 :             Self::GetSlruSegment(resp) => {
    1167            0 :                 bytes.put_u8(Tag::GetSlruSegment as u8);
    1168            0 :                 bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
    1169            0 :                 bytes.put(&resp.segment[..]);
    1170            0 :             }
    1171              :         }
    1172              : 
    1173            0 :         bytes.into()
    1174            0 :     }
    1175              : 
    1176            0 :     pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
    1177            0 :         let mut buf = buf.reader();
    1178            0 :         let msg_tag = buf.read_u8()?;
    1179              : 
    1180              :         use PagestreamBeMessageTag as Tag;
    1181            0 :         let ok =
    1182            0 :             match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
    1183              :                 Tag::Exists => {
    1184            0 :                     let exists = buf.read_u8()?;
    1185            0 :                     Self::Exists(PagestreamExistsResponse {
    1186            0 :                         exists: exists != 0,
    1187            0 :                     })
    1188              :                 }
    1189              :                 Tag::Nblocks => {
    1190            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    1191            0 :                     Self::Nblocks(PagestreamNblocksResponse { n_blocks })
    1192              :                 }
    1193              :                 Tag::GetPage => {
    1194            0 :                     let mut page = vec![0; 8192]; // TODO: use MaybeUninit
    1195            0 :                     buf.read_exact(&mut page)?;
    1196            0 :                     PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
    1197              :                 }
    1198              :                 Tag::Error => {
    1199            0 :                     let mut msg = Vec::new();
    1200            0 :                     buf.read_until(0, &mut msg)?;
    1201            0 :                     let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
    1202            0 :                     let rust_str = cstring.to_str()?;
    1203            0 :                     PagestreamBeMessage::Error(PagestreamErrorResponse {
    1204            0 :                         message: rust_str.to_owned(),
    1205            0 :                     })
    1206              :                 }
    1207              :                 Tag::DbSize => {
    1208            0 :                     let db_size = buf.read_i64::<BigEndian>()?;
    1209            0 :                     Self::DbSize(PagestreamDbSizeResponse { db_size })
    1210              :                 }
    1211              :                 Tag::GetSlruSegment => {
    1212            0 :                     let n_blocks = buf.read_u32::<BigEndian>()?;
    1213            0 :                     let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
    1214            0 :                     buf.read_exact(&mut segment)?;
    1215            0 :                     Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
    1216            0 :                         segment: segment.into(),
    1217            0 :                     })
    1218              :                 }
    1219              :             };
    1220            0 :         let remaining = buf.into_inner();
    1221            0 :         if !remaining.is_empty() {
    1222            0 :             anyhow::bail!(
    1223            0 :                 "remaining bytes in msg with tag={msg_tag}: {}",
    1224            0 :                 remaining.len()
    1225            0 :             );
    1226            0 :         }
    1227            0 :         Ok(ok)
    1228            0 :     }
    1229              : 
    1230            0 :     pub fn kind(&self) -> &'static str {
    1231            0 :         match self {
    1232            0 :             Self::Exists(_) => "Exists",
    1233            0 :             Self::Nblocks(_) => "Nblocks",
    1234            0 :             Self::GetPage(_) => "GetPage",
    1235            0 :             Self::Error(_) => "Error",
    1236            0 :             Self::DbSize(_) => "DbSize",
    1237            0 :             Self::GetSlruSegment(_) => "GetSlruSegment",
    1238              :         }
    1239            0 :     }
    1240              : }
    1241              : 
    1242              : #[cfg(test)]
    1243              : mod tests {
    1244              :     use serde_json::json;
    1245              : 
    1246              :     use super::*;
    1247              : 
    1248              :     #[test]
    1249            2 :     fn test_pagestream() {
    1250            2 :         // Test serialization/deserialization of PagestreamFeMessage
    1251            2 :         let messages = vec![
    1252            2 :             PagestreamFeMessage::Exists(PagestreamExistsRequest {
    1253            2 :                 request_lsn: Lsn(4),
    1254            2 :                 not_modified_since: Lsn(3),
    1255            2 :                 rel: RelTag {
    1256            2 :                     forknum: 1,
    1257            2 :                     spcnode: 2,
    1258            2 :                     dbnode: 3,
    1259            2 :                     relnode: 4,
    1260            2 :                 },
    1261            2 :             }),
    1262            2 :             PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
    1263            2 :                 request_lsn: Lsn(4),
    1264            2 :                 not_modified_since: Lsn(4),
    1265            2 :                 rel: RelTag {
    1266            2 :                     forknum: 1,
    1267            2 :                     spcnode: 2,
    1268            2 :                     dbnode: 3,
    1269            2 :                     relnode: 4,
    1270            2 :                 },
    1271            2 :             }),
    1272            2 :             PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
    1273            2 :                 request_lsn: Lsn(4),
    1274            2 :                 not_modified_since: Lsn(3),
    1275            2 :                 rel: RelTag {
    1276            2 :                     forknum: 1,
    1277            2 :                     spcnode: 2,
    1278            2 :                     dbnode: 3,
    1279            2 :                     relnode: 4,
    1280            2 :                 },
    1281            2 :                 blkno: 7,
    1282            2 :             }),
    1283            2 :             PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
    1284            2 :                 request_lsn: Lsn(4),
    1285            2 :                 not_modified_since: Lsn(3),
    1286            2 :                 dbnode: 7,
    1287            2 :             }),
    1288            2 :         ];
    1289           10 :         for msg in messages {
    1290            8 :             let bytes = msg.serialize();
    1291            8 :             let reconstructed =
    1292            8 :                 PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V2)
    1293            8 :                     .unwrap();
    1294            8 :             assert!(msg == reconstructed);
    1295              :         }
    1296            2 :     }
    1297              : 
    1298              :     #[test]
    1299            2 :     fn test_tenantinfo_serde() {
    1300            2 :         // Test serialization/deserialization of TenantInfo
    1301            2 :         let original_active = TenantInfo {
    1302            2 :             id: TenantShardId::unsharded(TenantId::generate()),
    1303            2 :             state: TenantState::Active,
    1304            2 :             current_physical_size: Some(42),
    1305            2 :             attachment_status: TenantAttachmentStatus::Attached,
    1306            2 :             generation: None,
    1307            2 :         };
    1308            2 :         let expected_active = json!({
    1309            2 :             "id": original_active.id.to_string(),
    1310            2 :             "state": {
    1311            2 :                 "slug": "Active",
    1312            2 :             },
    1313            2 :             "current_physical_size": 42,
    1314            2 :             "attachment_status": {
    1315            2 :                 "slug":"attached",
    1316            2 :             }
    1317            2 :         });
    1318            2 : 
    1319            2 :         let original_broken = TenantInfo {
    1320            2 :             id: TenantShardId::unsharded(TenantId::generate()),
    1321            2 :             state: TenantState::Broken {
    1322            2 :                 reason: "reason".into(),
    1323            2 :                 backtrace: "backtrace info".into(),
    1324            2 :             },
    1325            2 :             current_physical_size: Some(42),
    1326            2 :             attachment_status: TenantAttachmentStatus::Attached,
    1327            2 :             generation: None,
    1328            2 :         };
    1329            2 :         let expected_broken = json!({
    1330            2 :             "id": original_broken.id.to_string(),
    1331            2 :             "state": {
    1332            2 :                 "slug": "Broken",
    1333            2 :                 "data": {
    1334            2 :                     "backtrace": "backtrace info",
    1335            2 :                     "reason": "reason",
    1336            2 :                 }
    1337            2 :             },
    1338            2 :             "current_physical_size": 42,
    1339            2 :             "attachment_status": {
    1340            2 :                 "slug":"attached",
    1341            2 :             }
    1342            2 :         });
    1343            2 : 
    1344            2 :         assert_eq!(
    1345            2 :             serde_json::to_value(&original_active).unwrap(),
    1346            2 :             expected_active
    1347            2 :         );
    1348              : 
    1349            2 :         assert_eq!(
    1350            2 :             serde_json::to_value(&original_broken).unwrap(),
    1351            2 :             expected_broken
    1352            2 :         );
    1353            2 :         assert!(format!("{:?}", &original_broken.state).contains("reason"));
    1354            2 :         assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    1355            2 :     }
    1356              : 
    1357              :     #[test]
    1358            2 :     fn test_reject_unknown_field() {
    1359            2 :         let id = TenantId::generate();
    1360            2 :         let create_request = json!({
    1361            2 :             "new_tenant_id": id.to_string(),
    1362            2 :             "unknown_field": "unknown_value".to_string(),
    1363            2 :         });
    1364            2 :         let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();
    1365            2 :         assert!(
    1366            2 :             err.to_string().contains("unknown field `unknown_field`"),
    1367            0 :             "expect unknown field `unknown_field` error, got: {}",
    1368              :             err
    1369              :         );
    1370              : 
    1371            2 :         let id = TenantId::generate();
    1372            2 :         let config_request = json!({
    1373            2 :             "tenant_id": id.to_string(),
    1374            2 :             "unknown_field": "unknown_value".to_string(),
    1375            2 :         });
    1376            2 :         let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
    1377            2 :         assert!(
    1378            2 :             err.to_string().contains("unknown field `unknown_field`"),
    1379            0 :             "expect unknown field `unknown_field` error, got: {}",
    1380              :             err
    1381              :         );
    1382              : 
    1383            2 :         let attach_request = json!({
    1384            2 :             "config": {
    1385            2 :                 "unknown_field": "unknown_value".to_string(),
    1386            2 :             },
    1387            2 :         });
    1388            2 :         let err = serde_json::from_value::<TenantAttachRequest>(attach_request).unwrap_err();
    1389            2 :         assert!(
    1390            2 :             err.to_string().contains("unknown field `unknown_field`"),
    1391            0 :             "expect unknown field `unknown_field` error, got: {}",
    1392              :             err
    1393              :         );
    1394            2 :     }
    1395              : 
    1396              :     #[test]
    1397            2 :     fn tenantstatus_activating_serde() {
    1398            2 :         let states = [
    1399            2 :             TenantState::Activating(ActivatingFrom::Loading),
    1400            2 :             TenantState::Activating(ActivatingFrom::Attaching),
    1401            2 :         ];
    1402            2 :         let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
    1403            2 : 
    1404            2 :         let actual = serde_json::to_string(&states).unwrap();
    1405            2 : 
    1406            2 :         assert_eq!(actual, expected);
    1407              : 
    1408            2 :         let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
    1409            2 : 
    1410            2 :         assert_eq!(states.as_slice(), &parsed);
    1411            2 :     }
    1412              : 
    1413              :     #[test]
    1414            2 :     fn tenantstatus_activating_strum() {
    1415            2 :         // tests added, because we use these for metrics
    1416            2 :         let examples = [
    1417            2 :             (line!(), TenantState::Loading, "Loading"),
    1418            2 :             (line!(), TenantState::Attaching, "Attaching"),
    1419            2 :             (
    1420            2 :                 line!(),
    1421            2 :                 TenantState::Activating(ActivatingFrom::Loading),
    1422            2 :                 "Activating",
    1423            2 :             ),
    1424            2 :             (
    1425            2 :                 line!(),
    1426            2 :                 TenantState::Activating(ActivatingFrom::Attaching),
    1427            2 :                 "Activating",
    1428            2 :             ),
    1429            2 :             (line!(), TenantState::Active, "Active"),
    1430            2 :             (
    1431            2 :                 line!(),
    1432            2 :                 TenantState::Stopping {
    1433            2 :                     progress: utils::completion::Barrier::default(),
    1434            2 :                 },
    1435            2 :                 "Stopping",
    1436            2 :             ),
    1437            2 :             (
    1438            2 :                 line!(),
    1439            2 :                 TenantState::Broken {
    1440            2 :                     reason: "Example".into(),
    1441            2 :                     backtrace: "Looooong backtrace".into(),
    1442            2 :                 },
    1443            2 :                 "Broken",
    1444            2 :             ),
    1445            2 :         ];
    1446              : 
    1447           16 :         for (line, rendered, expected) in examples {
    1448           14 :             let actual: &'static str = rendered.into();
    1449           14 :             assert_eq!(actual, expected, "example on {line}");
    1450              :         }
    1451            2 :     }
    1452              : }
        

Generated by: LCOV version 2.1-beta