LCOV - 98683a8629f0f7f0031d02e04512998d589d76ea.info - libs/pageserver

LCOV - code coverage report

Current view:	top level - libs/pageserver_api/src - config.rs (source / functions)		Coverage	Total	Hit
Test:	98683a8629f0f7f0031d02e04512998d589d76ea.info	Lines:	80.2 %	197	158
Test Date:	2025-04-11 16:58:57	Functions:	1.4 %	287	4

            Line data    Source code

       1              : use camino::Utf8PathBuf;
       2              : 
       3              : #[cfg(test)]
       4              : mod tests;
       5              : 
       6              : use const_format::formatcp;
       7              : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
       8              : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
       9              : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
      10              : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
      11              : 
      12              : use std::collections::HashMap;
      13              : use std::num::{NonZeroU64, NonZeroUsize};
      14              : use std::str::FromStr;
      15              : use std::time::Duration;
      16              : 
      17              : use postgres_backend::AuthType;
      18              : use remote_storage::RemoteStorageConfig;
      19              : use serde_with::serde_as;
      20              : use utils::logging::LogFormat;
      21              : use utils::postgres_client::PostgresClientProtocol;
      22              : 
      23              : use crate::models::{ImageCompressionAlgorithm, LsnLease};
      24              : 
      25              : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
      26              : // as a separate structure.  This information is not neeed by the pageserver
      27              : // itself, it is only used for registering the pageserver with the control
      28              : // plane and/or storage controller.
      29              : //
      30            9 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
      31              : pub struct NodeMetadata {
      32              :     #[serde(rename = "host")]
      33              :     pub postgres_host: String,
      34              :     #[serde(rename = "port")]
      35              :     pub postgres_port: u16,
      36              :     pub http_host: String,
      37              :     pub http_port: u16,
      38              :     pub https_port: Option<u16>,
      39              : 
      40              :     // Deployment tools may write fields to the metadata file beyond what we
      41              :     // use in this type: this type intentionally only names fields that require.
      42              :     #[serde(flatten)]
      43              :     pub other: HashMap<String, serde_json::Value>,
      44              : }
      45              : 
      46              : /// `pageserver.toml`
      47              : ///
      48              : /// We use serde derive with `#[serde(default)]` to generate a deserializer
      49              : /// that fills in the default values for each config field.
      50              : ///
      51              : /// If there cannot be a static default value because we need to make runtime
      52              : /// checks to determine the default, make it an `Option` (which defaults to None).
      53              : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
      54              : ///
      55              : /// Unknown fields are silently ignored during deserialization.
      56              : /// The alternative, which we used in the past, was to set `deny_unknown_fields`,
      57              : /// which fails deserialization, and hence pageserver startup, if there is an unknown field.
      58              : /// The reason we don't do that anymore is that it complicates
      59              : /// usage of config fields for feature flagging, which we commonly do for
      60              : /// region-by-region rollouts.
      61              : /// The complications mainly arise because the `pageserver.toml` contents on a
      62              : /// prod server have a separate lifecycle from the pageserver binary.
      63              : /// For instance, `pageserver.toml` contents today are defined in the internal
      64              : /// infra repo, and thus introducing a new config field to pageserver and
      65              : /// rolling it out to prod servers are separate commits in separate repos
      66              : /// that can't be made or rolled back atomically.
      67              : /// Rollbacks in particular pose a risk with deny_unknown_fields because
      68              : /// the old pageserver binary may reject a new config field, resulting in
      69              : /// an outage unless the person doing the pageserver rollback remembers
      70              : /// to also revert the commit that added the config field in to the
      71              : /// `pageserver.toml` templates in the internal infra repo.
      72              : /// (A pre-deploy config check would eliminate this risk during rollbacks,
      73              : ///  cf [here](https://github.com/neondatabase/cloud/issues/24349).)
      74              : /// In addition to this compatibility problem during emergency rollbacks,
      75              : /// deny_unknown_fields adds further complications when decomissioning a feature
      76              : /// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]
      77              : /// until all prod servers' `pageserver.toml` files have been updated to a version
      78              : /// that doesn't specify the flag. Otherwise new software would fail to start up.
      79              : /// This adds the requirement for an intermediate step where the new config field
      80              : /// is accepted but ignored, prolonging the decomissioning process by an entire
      81              : /// release cycle.
      82              : /// By contrast  with unknown fields silently ignored, decomissioning a feature
      83              : /// flag is a one-step process: we can skip the intermediate step and straight
      84              : /// remove the field from the [`ConfigToml`]. We leave the field in the
      85              : /// `pageserver.toml` files on prod servers until we reach certainty that we
      86              : /// will not roll back to old software whose behavior was dependent on config.
      87              : /// Then we can remove the field from the templates in the internal infra repo.
      88              : /// This process is [documented internally](
      89              : /// https://docs.neon.build/storage/pageserver_configuration.html).
      90              : ///
      91              : /// Note that above relaxed compatbility for the config format does NOT APPLY
      92              : /// TO THE STORAGE FORMAT. As general guidance, when introducing storage format
      93              : /// changes, ensure that the potential rollback target version will be compatible
      94              : /// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:
      95              : /// any format version that exists in an environment must be compatible with the software that runs there.
      96              : /// Use a pageserver.toml flag only to gate whether software _writes_ the new format.
      97              : /// For more compatibility considerations, refer to [internal docs](
      98              : /// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)
      99              : #[serde_as]
     100            0 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
     101              : #[serde(default)]
     102              : pub struct ConfigToml {
     103              :     // types mapped 1:1 into the runtime PageServerConfig type
     104              :     pub listen_pg_addr: String,
     105              :     pub listen_http_addr: String,
     106              :     pub listen_https_addr: Option<String>,
     107              :     pub ssl_key_file: Utf8PathBuf,
     108              :     pub ssl_cert_file: Utf8PathBuf,
     109              :     #[serde(with = "humantime_serde")]
     110              :     pub ssl_cert_reload_period: Duration,
     111              :     pub ssl_ca_file: Option<Utf8PathBuf>,
     112              :     pub availability_zone: Option<String>,
     113              :     #[serde(with = "humantime_serde")]
     114              :     pub wait_lsn_timeout: Duration,
     115              :     #[serde(with = "humantime_serde")]
     116              :     pub wal_redo_timeout: Duration,
     117              :     pub superuser: String,
     118              :     pub locale: String,
     119              :     pub page_cache_size: usize,
     120              :     pub max_file_descriptors: usize,
     121              :     pub pg_distrib_dir: Option<Utf8PathBuf>,
     122              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     123              :     pub http_auth_type: AuthType,
     124              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     125              :     pub pg_auth_type: AuthType,
     126              :     pub auth_validation_public_key_path: Option<Utf8PathBuf>,
     127              :     pub remote_storage: Option<RemoteStorageConfig>,
     128              :     pub tenant_config: TenantConfigToml,
     129              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     130              :     pub broker_endpoint: storage_broker::Uri,
     131              :     #[serde(with = "humantime_serde")]
     132              :     pub broker_keepalive_interval: Duration,
     133              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     134              :     pub log_format: LogFormat,
     135              :     pub concurrent_tenant_warmup: NonZeroUsize,
     136              :     pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
     137              :     #[serde(with = "humantime_serde")]
     138              :     pub metric_collection_interval: Duration,
     139              :     pub metric_collection_endpoint: Option<reqwest::Url>,
     140              :     pub metric_collection_bucket: Option<RemoteStorageConfig>,
     141              :     #[serde(with = "humantime_serde")]
     142              :     pub synthetic_size_calculation_interval: Duration,
     143              :     pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
     144              :     pub test_remote_failures: u64,
     145              :     pub ondemand_download_behavior_treat_error_as_warn: bool,
     146              :     #[serde(with = "humantime_serde")]
     147              :     pub background_task_maximum_delay: Duration,
     148              :     pub control_plane_api: Option<reqwest::Url>,
     149              :     pub control_plane_api_token: Option<String>,
     150              :     pub control_plane_emergency_mode: bool,
     151              :     /// Unstable feature: subject to change or removal without notice.
     152              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     153              :     pub import_pgdata_upcall_api: Option<reqwest::Url>,
     154              :     /// Unstable feature: subject to change or removal without notice.
     155              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     156              :     pub import_pgdata_upcall_api_token: Option<String>,
     157              :     /// Unstable feature: subject to change or removal without notice.
     158              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     159              :     pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
     160              :     pub heatmap_upload_concurrency: usize,
     161              :     pub secondary_download_concurrency: usize,
     162              :     pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
     163              :     pub ingest_batch_size: u64,
     164              :     pub max_vectored_read_bytes: MaxVectoredReadBytes,
     165              :     pub image_compression: ImageCompressionAlgorithm,
     166              :     pub timeline_offloading: bool,
     167              :     pub ephemeral_bytes_per_memory_kb: usize,
     168              :     pub l0_flush: Option<crate::models::L0FlushConfig>,
     169              :     pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
     170              :     #[serde(skip_serializing_if = "Option::is_none")]
     171              :     pub no_sync: Option<bool>,
     172              :     pub wal_receiver_protocol: PostgresClientProtocol,
     173              :     pub page_service_pipelining: PageServicePipeliningConfig,
     174              :     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
     175              :     pub enable_read_path_debugging: Option<bool>,
     176              :     #[serde(skip_serializing_if = "Option::is_none")]
     177              :     pub validate_wal_contiguity: Option<bool>,
     178              :     #[serde(skip_serializing_if = "Option::is_none")]
     179              :     pub load_previous_heatmap: Option<bool>,
     180              :     #[serde(skip_serializing_if = "Option::is_none")]
     181              :     pub generate_unarchival_heatmap: Option<bool>,
     182              :     pub tracing: Option<Tracing>,
     183              :     pub enable_tls_page_service_api: bool,
     184              : }
     185              : 
     186            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     187              : pub struct DiskUsageEvictionTaskConfig {
     188              :     pub max_usage_pct: utils::serde_percent::Percent,
     189              :     pub min_avail_bytes: u64,
     190              :     #[serde(with = "humantime_serde")]
     191              :     pub period: Duration,
     192              :     #[cfg(feature = "testing")]
     193              :     pub mock_statvfs: Option<statvfs::mock::Behavior>,
     194              :     /// Select sorting for evicted layers
     195              :     #[serde(default)]
     196              :     pub eviction_order: EvictionOrder,
     197              : }
     198              : 
     199            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     200              : #[serde(tag = "mode", rename_all = "kebab-case")]
     201              : pub enum PageServicePipeliningConfig {
     202              :     Serial,
     203              :     Pipelined(PageServicePipeliningConfigPipelined),
     204              : }
     205            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     206              : pub struct PageServicePipeliningConfigPipelined {
     207              :     /// Causes runtime errors if larger than max get_vectored batch size.
     208              :     pub max_batch_size: NonZeroUsize,
     209              :     pub execution: PageServiceProtocolPipelinedExecutionStrategy,
     210              : }
     211              : 
     212            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     213              : #[serde(rename_all = "kebab-case")]
     214              : pub enum PageServiceProtocolPipelinedExecutionStrategy {
     215              :     ConcurrentFutures,
     216              :     Tasks,
     217              : }
     218              : 
     219            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     220              : #[serde(tag = "mode", rename_all = "kebab-case")]
     221              : pub enum GetVectoredConcurrentIo {
     222              :     /// The read path is fully sequential: layers are visited
     223              :     /// one after the other and IOs are issued and waited upon
     224              :     /// from the same task that traverses the layers.
     225              :     Sequential,
     226              :     /// The read path still traverses layers sequentially, and
     227              :     /// index blocks will be read into the PS PageCache from
     228              :     /// that task, with waiting.
     229              :     /// But data IOs are dispatched and waited upon from a sidecar
     230              :     /// task so that the traversing task can continue to traverse
     231              :     /// layers while the IOs are in flight.
     232              :     /// If the PS PageCache miss rate is low, this improves
     233              :     /// throughput dramatically.
     234              :     SidecarTask,
     235              : }
     236              : 
     237            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     238              : pub struct Ratio {
     239              :     pub numerator: usize,
     240              :     pub denominator: usize,
     241              : }
     242              : 
     243            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     244              : pub struct OtelExporterConfig {
     245              :     pub endpoint: String,
     246              :     pub protocol: OtelExporterProtocol,
     247              :     #[serde(with = "humantime_serde")]
     248              :     pub timeout: Duration,
     249              : }
     250              : 
     251            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     252              : #[serde(rename_all = "kebab-case")]
     253              : pub enum OtelExporterProtocol {
     254              :     Grpc,
     255              :     HttpBinary,
     256              :     HttpJson,
     257              : }
     258              : 
     259            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     260              : pub struct Tracing {
     261              :     pub sampling_ratio: Ratio,
     262              :     pub export_config: OtelExporterConfig,
     263              : }
     264              : 
     265              : impl From<&OtelExporterConfig> for tracing_utils::ExportConfig {
     266            0 :     fn from(val: &OtelExporterConfig) -> Self {
     267            0 :         tracing_utils::ExportConfig {
     268            0 :             endpoint: Some(val.endpoint.clone()),
     269            0 :             protocol: val.protocol.into(),
     270            0 :             timeout: val.timeout,
     271            0 :         }
     272            0 :     }
     273              : }
     274              : 
     275              : impl From<OtelExporterProtocol> for tracing_utils::Protocol {
     276            0 :     fn from(val: OtelExporterProtocol) -> Self {
     277            0 :         match val {
     278            0 :             OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc,
     279            0 :             OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson,
     280            0 :             OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary,
     281              :         }
     282            0 :     }
     283              : }
     284              : 
     285              : pub mod statvfs {
     286              :     pub mod mock {
     287            0 :         #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     288              :         #[serde(tag = "type")]
     289              :         pub enum Behavior {
     290              :             Success {
     291              :                 blocksize: u64,
     292              :                 total_blocks: u64,
     293              :                 name_filter: Option<utils::serde_regex::Regex>,
     294              :             },
     295              :             #[cfg(feature = "testing")]
     296              :             Failure { mocked_error: MockedError },
     297              :         }
     298              : 
     299              :         #[cfg(feature = "testing")]
     300            0 :         #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     301              :         #[allow(clippy::upper_case_acronyms)]
     302              :         pub enum MockedError {
     303              :             EIO,
     304              :         }
     305              : 
     306              :         #[cfg(feature = "testing")]
     307              :         impl From<MockedError> for nix::Error {
     308            0 :             fn from(e: MockedError) -> Self {
     309            0 :                 match e {
     310            0 :                     MockedError::EIO => nix::Error::EIO,
     311            0 :                 }
     312            0 :             }
     313              :         }
     314              :     }
     315              : }
     316              : 
     317            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     318              : #[serde(tag = "type", content = "args")]
     319              : pub enum EvictionOrder {
     320              :     RelativeAccessed {
     321              :         highest_layer_count_loses_first: bool,
     322              :     },
     323              : }
     324              : 
     325              : impl Default for EvictionOrder {
     326            4 :     fn default() -> Self {
     327            4 :         Self::RelativeAccessed {
     328            4 :             highest_layer_count_loses_first: true,
     329            4 :         }
     330            4 :     }
     331              : }
     332              : 
     333            0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     334              : #[serde(transparent)]
     335              : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
     336              : 
     337              : /// Tenant-level configuration values, used for various purposes.
     338            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     339              : #[serde(default)]
     340              : pub struct TenantConfigToml {
     341              :     // Flush out an inmemory layer, if it's holding WAL older than this
     342              :     // This puts a backstop on how much WAL needs to be re-digested if the
     343              :     // page server crashes.
     344              :     // This parameter actually determines L0 layer file size.
     345              :     pub checkpoint_distance: u64,
     346              :     // Inmemory layer is also flushed at least once in checkpoint_timeout to
     347              :     // eventually upload WAL after activity is stopped.
     348              :     #[serde(with = "humantime_serde")]
     349              :     pub checkpoint_timeout: Duration,
     350              :     // Target file size, when creating image and delta layers.
     351              :     // This parameter determines L1 layer file size.
     352              :     pub compaction_target_size: u64,
     353              :     // How often to check if there's compaction work to be done.
     354              :     // Duration::ZERO means automatic compaction is disabled.
     355              :     #[serde(with = "humantime_serde")]
     356              :     pub compaction_period: Duration,
     357              :     /// Level0 delta layer threshold for compaction.
     358              :     pub compaction_threshold: usize,
     359              :     /// Controls the amount of L0 included in a single compaction iteration.
     360              :     /// The unit is `checkpoint_distance`, i.e., a size.
     361              :     /// We add L0s to the set of layers to compact until their cumulative
     362              :     /// size exceeds `compaction_upper_limit * checkpoint_distance`.
     363              :     pub compaction_upper_limit: usize,
     364              :     pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
     365              :     /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
     366              :     /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
     367              :     pub compaction_l0_first: bool,
     368              :     /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
     369              :     /// has an effect if `compaction_l0_first` is true. Defaults to true.
     370              :     pub compaction_l0_semaphore: bool,
     371              :     /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
     372              :     /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
     373              :     /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
     374              :     /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
     375              :     pub l0_flush_delay_threshold: Option<usize>,
     376              :     /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
     377              :     /// to avoid deadlock. 0 to disable. Disabled by default.
     378              :     pub l0_flush_stall_threshold: Option<usize>,
     379              :     // Determines how much history is retained, to allow
     380              :     // branching and read replicas at an older point in time.
     381              :     // The unit is #of bytes of WAL.
     382              :     // Page versions older than this are garbage collected away.
     383              :     pub gc_horizon: u64,
     384              :     // Interval at which garbage collection is triggered.
     385              :     // Duration::ZERO means automatic GC is disabled
     386              :     #[serde(with = "humantime_serde")]
     387              :     pub gc_period: Duration,
     388              :     // Delta layer churn threshold to create L1 image layers.
     389              :     pub image_creation_threshold: usize,
     390              :     // Determines how much history is retained, to allow
     391              :     // branching and read replicas at an older point in time.
     392              :     // The unit is time.
     393              :     // Page versions older than this are garbage collected away.
     394              :     #[serde(with = "humantime_serde")]
     395              :     pub pitr_interval: Duration,
     396              :     /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
     397              :     #[serde(with = "humantime_serde")]
     398              :     pub walreceiver_connect_timeout: Duration,
     399              :     /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
     400              :     /// A stalled safekeeper will be changed to a newer one when it appears.
     401              :     #[serde(with = "humantime_serde")]
     402              :     pub lagging_wal_timeout: Duration,
     403              :     /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
     404              :     /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
     405              :     /// to avoid eager reconnects.
     406              :     pub max_lsn_wal_lag: NonZeroU64,
     407              :     pub eviction_policy: crate::models::EvictionPolicy,
     408              :     pub min_resident_size_override: Option<u64>,
     409              :     // See the corresponding metric's help string.
     410              :     #[serde(with = "humantime_serde")]
     411              :     pub evictions_low_residence_duration_metric_threshold: Duration,
     412              : 
     413              :     /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
     414              :     /// may be disabled if a Tenant will not have secondary locations: only secondary
     415              :     /// locations will use the heatmap uploaded by attached locations.
     416              :     #[serde(with = "humantime_serde")]
     417              :     pub heatmap_period: Duration,
     418              : 
     419              :     /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
     420              :     pub lazy_slru_download: bool,
     421              : 
     422              :     pub timeline_get_throttle: crate::models::ThrottleConfig,
     423              : 
     424              :     // How much WAL must be ingested before checking again whether a new image layer is required.
     425              :     // Expresed in multiples of checkpoint distance.
     426              :     pub image_layer_creation_check_threshold: u8,
     427              : 
     428              :     // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
     429              :     // Set to 0 to disable preemption.
     430              :     pub image_creation_preempt_threshold: usize,
     431              : 
     432              :     /// The length for an explicit LSN lease request.
     433              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     434              :     #[serde(with = "humantime_serde")]
     435              :     pub lsn_lease_length: Duration,
     436              : 
     437              :     /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
     438              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     439              :     #[serde(with = "humantime_serde")]
     440              :     pub lsn_lease_length_for_ts: Duration,
     441              : 
     442              :     /// Enable auto-offloading of timelines.
     443              :     /// (either this flag or the pageserver-global one need to be set)
     444              :     pub timeline_offloading: bool,
     445              : 
     446              :     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     447              : 
     448              :     /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
     449              :     /// `index_part.json`, and it cannot be reversed.
     450              :     pub rel_size_v2_enabled: bool,
     451              : 
     452              :     // gc-compaction related configs
     453              :     /// Enable automatic gc-compaction trigger on this tenant.
     454              :     pub gc_compaction_enabled: bool,
     455              :     /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
     456              :     /// gc-compaction will be triggered.
     457              :     pub gc_compaction_initial_threshold_kb: u64,
     458              :     /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
     459              :     /// is above this ratio, gc-compaction will be triggered.
     460              :     pub gc_compaction_ratio_percent: u64,
     461              :     /// Tenant level performance sampling ratio override. Controls the ratio of get page requests
     462              :     /// that will get perf sampling for the tenant.
     463              :     pub sampling_ratio: Option<Ratio>,
     464              : }
     465              : 
     466              : pub mod defaults {
     467              :     pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
     468              : 
     469              :     use crate::models::ImageCompressionAlgorithm;
     470              : 
     471              :     pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
     472              :     pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
     473              : 
     474              :     pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
     475              :     pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
     476              :         "C"
     477              :     } else {
     478              :         "C.UTF-8"
     479              :     };
     480              : 
     481              :     pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
     482              :     pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
     483              : 
     484              :     pub const DEFAULT_LOG_FORMAT: &str = "plain";
     485              : 
     486              :     pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
     487              : 
     488              :     pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
     489              : 
     490              :     pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
     491              :     pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
     492              :     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
     493              :     pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
     494              : 
     495              :     pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
     496              :     pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
     497              : 
     498              :     pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
     499              : 
     500              :     /// Soft limit for the maximum size of a vectored read.
     501              :     ///
     502              :     /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
     503              :     /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
     504              :     /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
     505              :     /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
     506              :     /// That is, slightly above 128 kB.
     507              :     pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
     508              : 
     509              :     pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
     510              :         ImageCompressionAlgorithm::Zstd { level: Some(1) };
     511              : 
     512              :     pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
     513              : 
     514              :     pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
     515              : 
     516              :     pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
     517              :         utils::postgres_client::PostgresClientProtocol::Vanilla;
     518              : 
     519              :     pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
     520              :     pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
     521              : }
     522              : 
     523              : impl Default for ConfigToml {
     524          496 :     fn default() -> Self {
     525              :         use defaults::*;
     526              : 
     527              :         Self {
     528          496 :             listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
     529          496 :             listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
     530          496 :             listen_https_addr: (None),
     531          496 :             ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
     532          496 :             ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
     533          496 :             ssl_cert_reload_period: Duration::from_secs(60),
     534          496 :             ssl_ca_file: None,
     535          496 :             availability_zone: (None),
     536          496 :             wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
     537          496 :                 .expect("cannot parse default wait lsn timeout")),
     538          496 :             wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
     539          496 :                 .expect("cannot parse default wal redo timeout")),
     540          496 :             superuser: (DEFAULT_SUPERUSER.to_string()),
     541          496 :             locale: DEFAULT_LOCALE.to_string(),
     542          496 :             page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
     543          496 :             max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
     544          496 :             pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
     545          496 :             http_auth_type: (AuthType::Trust),
     546          496 :             pg_auth_type: (AuthType::Trust),
     547          496 :             auth_validation_public_key_path: (None),
     548          496 :             remote_storage: None,
     549          496 :             broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
     550          496 :                 .parse()
     551          496 :                 .expect("failed to parse default broker endpoint")),
     552          496 :             broker_keepalive_interval: (humantime::parse_duration(
     553          496 :                 storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
     554          496 :             )
     555          496 :             .expect("cannot parse default keepalive interval")),
     556          496 :             log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
     557          496 : 
     558          496 :             concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
     559          496 :                 .expect("Invalid default constant")),
     560          496 :             concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
     561          496 :                 DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
     562          496 :             )
     563          496 :             .unwrap(),
     564          496 :             metric_collection_interval: (humantime::parse_duration(
     565          496 :                 DEFAULT_METRIC_COLLECTION_INTERVAL,
     566          496 :             )
     567          496 :             .expect("cannot parse default metric collection interval")),
     568          496 :             synthetic_size_calculation_interval: (humantime::parse_duration(
     569          496 :                 DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
     570          496 :             )
     571          496 :             .expect("cannot parse default synthetic size calculation interval")),
     572          496 :             metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
     573          496 : 
     574          496 :             metric_collection_bucket: (None),
     575          496 : 
     576          496 :             disk_usage_based_eviction: (None),
     577          496 : 
     578          496 :             test_remote_failures: (0),
     579          496 : 
     580          496 :             ondemand_download_behavior_treat_error_as_warn: (false),
     581          496 : 
     582          496 :             background_task_maximum_delay: (humantime::parse_duration(
     583          496 :                 DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
     584          496 :             )
     585          496 :             .unwrap()),
     586          496 : 
     587          496 :             control_plane_api: (None),
     588          496 :             control_plane_api_token: (None),
     589          496 :             control_plane_emergency_mode: (false),
     590          496 : 
     591          496 :             import_pgdata_upcall_api: (None),
     592          496 :             import_pgdata_upcall_api_token: (None),
     593          496 :             import_pgdata_aws_endpoint_url: (None),
     594          496 : 
     595          496 :             heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
     596          496 :             secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
     597          496 : 
     598          496 :             ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
     599          496 : 
     600          496 :             virtual_file_io_engine: None,
     601          496 : 
     602          496 :             max_vectored_read_bytes: (MaxVectoredReadBytes(
     603          496 :                 NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
     604          496 :             )),
     605          496 :             image_compression: (DEFAULT_IMAGE_COMPRESSION),
     606          496 :             timeline_offloading: true,
     607          496 :             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
     608          496 :             l0_flush: None,
     609          496 :             virtual_file_io_mode: None,
     610          496 :             tenant_config: TenantConfigToml::default(),
     611          496 :             no_sync: None,
     612          496 :             wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
     613          496 :             page_service_pipelining: if !cfg!(test) {
     614          496 :                 PageServicePipeliningConfig::Serial
     615              :             } else {
     616            0 :                 PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
     617            0 :                     max_batch_size: NonZeroUsize::new(32).unwrap(),
     618            0 :                     execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
     619            0 :                 })
     620              :             },
     621          496 :             get_vectored_concurrent_io: if !cfg!(test) {
     622          496 :                 GetVectoredConcurrentIo::Sequential
     623              :             } else {
     624            0 :                 GetVectoredConcurrentIo::SidecarTask
     625              :             },
     626          496 :             enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
     627          496 :                 Some(true)
     628              :             } else {
     629            0 :                 None
     630              :             },
     631          496 :             validate_wal_contiguity: None,
     632          496 :             load_previous_heatmap: None,
     633          496 :             generate_unarchival_heatmap: None,
     634          496 :             tracing: None,
     635          496 :             enable_tls_page_service_api: false,
     636          496 :         }
     637          496 :     }
     638              : }
     639              : 
     640              : pub mod tenant_conf_defaults {
     641              : 
     642              :     // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
     643              :     // would be more appropriate. But a low value forces the code to be exercised more,
     644              :     // which is good for now to trigger bugs.
     645              :     // This parameter actually determines L0 layer file size.
     646              :     pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
     647              :     pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
     648              : 
     649              :     // FIXME the below configs are only used by legacy algorithm. The new algorithm
     650              :     // has different parameters.
     651              : 
     652              :     // Target file size, when creating image and delta layers.
     653              :     // This parameter determines L1 layer file size.
     654              :     pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
     655              : 
     656              :     pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
     657              :     pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
     658              : 
     659              :     // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
     660              :     // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
     661              :     // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
     662              :     // with this config, we can get a maximum peak compaction usage of 9 GB.
     663              :     pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
     664              :     // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
     665              :     // read amp.
     666              :     pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
     667              :     pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
     668              : 
     669              :     pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
     670              :         crate::models::CompactionAlgorithm::Legacy;
     671              : 
     672              :     pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
     673              : 
     674              :     // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
     675              :     // If there's a need to decrease this value, first make sure that GC
     676              :     // doesn't hold a layer map write lock for non-trivial operations.
     677              :     // Relevant: https://github.com/neondatabase/neon/issues/3394
     678              :     pub const DEFAULT_GC_PERIOD: &str = "1 hr";
     679              :     pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
     680              :     // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
     681              :     // layer creation will end immediately. Set to 0 to disable.
     682              :     pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
     683              :     pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
     684              :     pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
     685              :     pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
     686              :     // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
     687              :     // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
     688              :     // throughputs up to 1GiB/s per timeline.
     689              :     pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
     690              :     pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
     691              :     // By default ingest enough WAL for two new L0 layers before checking if new image
     692              :     // image layers should be created.
     693              :     pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
     694              :     pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
     695              :     pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
     696              :     pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
     697              : }
     698              : 
     699              : impl Default for TenantConfigToml {
     700          496 :     fn default() -> Self {
     701              :         use tenant_conf_defaults::*;
     702          496 :         Self {
     703          496 :             checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
     704          496 :             checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
     705          496 :                 .expect("cannot parse default checkpoint timeout"),
     706          496 :             compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
     707          496 :             compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
     708          496 :                 .expect("cannot parse default compaction period"),
     709          496 :             compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
     710          496 :             compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
     711          496 :             compaction_algorithm: crate::models::CompactionAlgorithmSettings {
     712          496 :                 kind: DEFAULT_COMPACTION_ALGORITHM,
     713          496 :             },
     714          496 :             compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
     715          496 :             compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
     716          496 :             l0_flush_delay_threshold: None,
     717          496 :             l0_flush_stall_threshold: None,
     718          496 :             gc_horizon: DEFAULT_GC_HORIZON,
     719          496 :             gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
     720          496 :                 .expect("cannot parse default gc period"),
     721          496 :             image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
     722          496 :             pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
     723          496 :                 .expect("cannot parse default PITR interval"),
     724          496 :             walreceiver_connect_timeout: humantime::parse_duration(
     725          496 :                 DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
     726          496 :             )
     727          496 :             .expect("cannot parse default walreceiver connect timeout"),
     728          496 :             lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
     729          496 :                 .expect("cannot parse default walreceiver lagging wal timeout"),
     730          496 :             max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
     731          496 :                 .expect("cannot parse default max walreceiver Lsn wal lag"),
     732          496 :             eviction_policy: crate::models::EvictionPolicy::NoEviction,
     733          496 :             min_resident_size_override: None,
     734          496 :             evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
     735          496 :                 DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
     736          496 :             )
     737          496 :             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
     738          496 :             heatmap_period: Duration::ZERO,
     739          496 :             lazy_slru_download: false,
     740          496 :             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
     741          496 :             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
     742          496 :             image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
     743          496 :             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
     744          496 :             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
     745          496 :             timeline_offloading: true,
     746          496 :             wal_receiver_protocol_override: None,
     747          496 :             rel_size_v2_enabled: false,
     748          496 :             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
     749          496 :             gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
     750          496 :             gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
     751          496 :             sampling_ratio: None,
     752          496 :         }
     753          496 :     }
     754              : }

Generated by: LCOV version 2.1-beta