LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - config.rs (source / functions) Coverage Total Hit
Test: aca806cab4756d7eb6a304846130f4a73a5d5393.info Lines: 79.7 % 202 161
Test Date: 2025-04-24 20:31:15 Functions: 1.4 % 293 4

            Line data    Source code
       1              : use camino::Utf8PathBuf;
       2              : 
       3              : #[cfg(test)]
       4              : mod tests;
       5              : 
       6              : use const_format::formatcp;
       7              : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
       8              : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
       9              : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
      10              : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
      11              : 
      12              : use std::collections::HashMap;
      13              : use std::num::{NonZeroU64, NonZeroUsize};
      14              : use std::str::FromStr;
      15              : use std::time::Duration;
      16              : 
      17              : use postgres_backend::AuthType;
      18              : use remote_storage::RemoteStorageConfig;
      19              : use serde_with::serde_as;
      20              : use utils::logging::LogFormat;
      21              : use utils::postgres_client::PostgresClientProtocol;
      22              : 
      23              : use crate::models::{ImageCompressionAlgorithm, LsnLease};
      24              : 
      25              : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
      26              : // as a separate structure.  This information is not neeed by the pageserver
      27              : // itself, it is only used for registering the pageserver with the control
      28              : // plane and/or storage controller.
      29              : //
      30            9 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
      31              : pub struct NodeMetadata {
      32              :     #[serde(rename = "host")]
      33              :     pub postgres_host: String,
      34              :     #[serde(rename = "port")]
      35              :     pub postgres_port: u16,
      36              :     pub http_host: String,
      37              :     pub http_port: u16,
      38              :     pub https_port: Option<u16>,
      39              : 
      40              :     // Deployment tools may write fields to the metadata file beyond what we
      41              :     // use in this type: this type intentionally only names fields that require.
      42              :     #[serde(flatten)]
      43              :     pub other: HashMap<String, serde_json::Value>,
      44              : }
      45              : 
      46              : /// `pageserver.toml`
      47              : ///
      48              : /// We use serde derive with `#[serde(default)]` to generate a deserializer
      49              : /// that fills in the default values for each config field.
      50              : ///
      51              : /// If there cannot be a static default value because we need to make runtime
      52              : /// checks to determine the default, make it an `Option` (which defaults to None).
      53              : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
      54              : ///
      55              : /// Unknown fields are silently ignored during deserialization.
      56              : /// The alternative, which we used in the past, was to set `deny_unknown_fields`,
      57              : /// which fails deserialization, and hence pageserver startup, if there is an unknown field.
      58              : /// The reason we don't do that anymore is that it complicates
      59              : /// usage of config fields for feature flagging, which we commonly do for
      60              : /// region-by-region rollouts.
      61              : /// The complications mainly arise because the `pageserver.toml` contents on a
      62              : /// prod server have a separate lifecycle from the pageserver binary.
      63              : /// For instance, `pageserver.toml` contents today are defined in the internal
      64              : /// infra repo, and thus introducing a new config field to pageserver and
      65              : /// rolling it out to prod servers are separate commits in separate repos
      66              : /// that can't be made or rolled back atomically.
      67              : /// Rollbacks in particular pose a risk with deny_unknown_fields because
      68              : /// the old pageserver binary may reject a new config field, resulting in
      69              : /// an outage unless the person doing the pageserver rollback remembers
      70              : /// to also revert the commit that added the config field in to the
      71              : /// `pageserver.toml` templates in the internal infra repo.
      72              : /// (A pre-deploy config check would eliminate this risk during rollbacks,
      73              : ///  cf [here](https://github.com/neondatabase/cloud/issues/24349).)
      74              : /// In addition to this compatibility problem during emergency rollbacks,
      75              : /// deny_unknown_fields adds further complications when decomissioning a feature
      76              : /// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]
      77              : /// until all prod servers' `pageserver.toml` files have been updated to a version
      78              : /// that doesn't specify the flag. Otherwise new software would fail to start up.
      79              : /// This adds the requirement for an intermediate step where the new config field
      80              : /// is accepted but ignored, prolonging the decomissioning process by an entire
      81              : /// release cycle.
      82              : /// By contrast  with unknown fields silently ignored, decomissioning a feature
      83              : /// flag is a one-step process: we can skip the intermediate step and straight
      84              : /// remove the field from the [`ConfigToml`]. We leave the field in the
      85              : /// `pageserver.toml` files on prod servers until we reach certainty that we
      86              : /// will not roll back to old software whose behavior was dependent on config.
      87              : /// Then we can remove the field from the templates in the internal infra repo.
      88              : /// This process is [documented internally](
      89              : /// https://docs.neon.build/storage/pageserver_configuration.html).
      90              : ///
      91              : /// Note that above relaxed compatbility for the config format does NOT APPLY
      92              : /// TO THE STORAGE FORMAT. As general guidance, when introducing storage format
      93              : /// changes, ensure that the potential rollback target version will be compatible
      94              : /// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:
      95              : /// any format version that exists in an environment must be compatible with the software that runs there.
      96              : /// Use a pageserver.toml flag only to gate whether software _writes_ the new format.
      97              : /// For more compatibility considerations, refer to [internal docs](
      98              : /// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)
      99              : #[serde_as]
     100            0 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
     101              : #[serde(default)]
     102              : pub struct ConfigToml {
     103              :     // types mapped 1:1 into the runtime PageServerConfig type
     104              :     pub listen_pg_addr: String,
     105              :     pub listen_http_addr: String,
     106              :     pub listen_https_addr: Option<String>,
     107              :     pub ssl_key_file: Utf8PathBuf,
     108              :     pub ssl_cert_file: Utf8PathBuf,
     109              :     #[serde(with = "humantime_serde")]
     110              :     pub ssl_cert_reload_period: Duration,
     111              :     pub ssl_ca_file: Option<Utf8PathBuf>,
     112              :     pub availability_zone: Option<String>,
     113              :     #[serde(with = "humantime_serde")]
     114              :     pub wait_lsn_timeout: Duration,
     115              :     #[serde(with = "humantime_serde")]
     116              :     pub wal_redo_timeout: Duration,
     117              :     pub superuser: String,
     118              :     pub locale: String,
     119              :     pub page_cache_size: usize,
     120              :     pub max_file_descriptors: usize,
     121              :     pub pg_distrib_dir: Option<Utf8PathBuf>,
     122              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     123              :     pub http_auth_type: AuthType,
     124              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     125              :     pub pg_auth_type: AuthType,
     126              :     pub auth_validation_public_key_path: Option<Utf8PathBuf>,
     127              :     pub remote_storage: Option<RemoteStorageConfig>,
     128              :     pub tenant_config: TenantConfigToml,
     129              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     130              :     pub broker_endpoint: storage_broker::Uri,
     131              :     #[serde(with = "humantime_serde")]
     132              :     pub broker_keepalive_interval: Duration,
     133              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     134              :     pub log_format: LogFormat,
     135              :     pub concurrent_tenant_warmup: NonZeroUsize,
     136              :     pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
     137              :     #[serde(with = "humantime_serde")]
     138              :     pub metric_collection_interval: Duration,
     139              :     pub metric_collection_endpoint: Option<reqwest::Url>,
     140              :     pub metric_collection_bucket: Option<RemoteStorageConfig>,
     141              :     #[serde(with = "humantime_serde")]
     142              :     pub synthetic_size_calculation_interval: Duration,
     143              :     pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
     144              :     pub test_remote_failures: u64,
     145              :     pub ondemand_download_behavior_treat_error_as_warn: bool,
     146              :     #[serde(with = "humantime_serde")]
     147              :     pub background_task_maximum_delay: Duration,
     148              :     pub control_plane_api: Option<reqwest::Url>,
     149              :     pub control_plane_api_token: Option<String>,
     150              :     pub control_plane_emergency_mode: bool,
     151              :     /// Unstable feature: subject to change or removal without notice.
     152              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     153              :     pub import_pgdata_upcall_api: Option<reqwest::Url>,
     154              :     /// Unstable feature: subject to change or removal without notice.
     155              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     156              :     pub import_pgdata_upcall_api_token: Option<String>,
     157              :     /// Unstable feature: subject to change or removal without notice.
     158              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     159              :     pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
     160              :     pub heatmap_upload_concurrency: usize,
     161              :     pub secondary_download_concurrency: usize,
     162              :     pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
     163              :     pub ingest_batch_size: u64,
     164              :     pub max_vectored_read_bytes: MaxVectoredReadBytes,
     165              :     pub image_compression: ImageCompressionAlgorithm,
     166              :     pub timeline_offloading: bool,
     167              :     pub ephemeral_bytes_per_memory_kb: usize,
     168              :     pub l0_flush: Option<crate::models::L0FlushConfig>,
     169              :     pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
     170              :     #[serde(skip_serializing_if = "Option::is_none")]
     171              :     pub no_sync: Option<bool>,
     172              :     pub wal_receiver_protocol: PostgresClientProtocol,
     173              :     pub page_service_pipelining: PageServicePipeliningConfig,
     174              :     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
     175              :     pub enable_read_path_debugging: Option<bool>,
     176              :     #[serde(skip_serializing_if = "Option::is_none")]
     177              :     pub validate_wal_contiguity: Option<bool>,
     178              :     #[serde(skip_serializing_if = "Option::is_none")]
     179              :     pub load_previous_heatmap: Option<bool>,
     180              :     #[serde(skip_serializing_if = "Option::is_none")]
     181              :     pub generate_unarchival_heatmap: Option<bool>,
     182              :     pub tracing: Option<Tracing>,
     183              :     pub enable_tls_page_service_api: bool,
     184              :     pub dev_mode: bool,
     185              : }
     186              : 
     187            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     188              : pub struct DiskUsageEvictionTaskConfig {
     189              :     pub max_usage_pct: utils::serde_percent::Percent,
     190              :     pub min_avail_bytes: u64,
     191              :     #[serde(with = "humantime_serde")]
     192              :     pub period: Duration,
     193              :     #[cfg(feature = "testing")]
     194              :     pub mock_statvfs: Option<statvfs::mock::Behavior>,
     195              :     /// Select sorting for evicted layers
     196              :     #[serde(default)]
     197              :     pub eviction_order: EvictionOrder,
     198              : }
     199              : 
     200            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     201              : #[serde(tag = "mode", rename_all = "kebab-case")]
     202              : pub enum PageServicePipeliningConfig {
     203              :     Serial,
     204              :     Pipelined(PageServicePipeliningConfigPipelined),
     205              : }
     206            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     207              : pub struct PageServicePipeliningConfigPipelined {
     208              :     /// Causes runtime errors if larger than max get_vectored batch size.
     209              :     pub max_batch_size: NonZeroUsize,
     210              :     pub execution: PageServiceProtocolPipelinedExecutionStrategy,
     211              :     // The default below is such that new versions of the software can start
     212              :     // with the old configuration.
     213              :     #[serde(default)]
     214              :     pub batching: PageServiceProtocolPipelinedBatchingStrategy,
     215              : }
     216              : 
     217            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     218              : #[serde(rename_all = "kebab-case")]
     219              : pub enum PageServiceProtocolPipelinedExecutionStrategy {
     220              :     ConcurrentFutures,
     221              :     Tasks,
     222              : }
     223              : 
     224            0 : #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     225              : #[serde(rename_all = "kebab-case")]
     226              : pub enum PageServiceProtocolPipelinedBatchingStrategy {
     227              :     /// All get page requests in a batch will be at the same LSN
     228              :     #[default]
     229              :     UniformLsn,
     230              :     /// Get page requests in a batch may be at different LSN
     231              :     ///
     232              :     /// One key cannot be present more than once at different LSNs in
     233              :     /// the same batch.
     234              :     ScatteredLsn,
     235              : }
     236              : 
     237            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     238              : #[serde(tag = "mode", rename_all = "kebab-case")]
     239              : pub enum GetVectoredConcurrentIo {
     240              :     /// The read path is fully sequential: layers are visited
     241              :     /// one after the other and IOs are issued and waited upon
     242              :     /// from the same task that traverses the layers.
     243              :     Sequential,
     244              :     /// The read path still traverses layers sequentially, and
     245              :     /// index blocks will be read into the PS PageCache from
     246              :     /// that task, with waiting.
     247              :     /// But data IOs are dispatched and waited upon from a sidecar
     248              :     /// task so that the traversing task can continue to traverse
     249              :     /// layers while the IOs are in flight.
     250              :     /// If the PS PageCache miss rate is low, this improves
     251              :     /// throughput dramatically.
     252              :     SidecarTask,
     253              : }
     254              : 
     255            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     256              : pub struct Ratio {
     257              :     pub numerator: usize,
     258              :     pub denominator: usize,
     259              : }
     260              : 
     261            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     262              : pub struct OtelExporterConfig {
     263              :     pub endpoint: String,
     264              :     pub protocol: OtelExporterProtocol,
     265              :     #[serde(with = "humantime_serde")]
     266              :     pub timeout: Duration,
     267              : }
     268              : 
     269            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     270              : #[serde(rename_all = "kebab-case")]
     271              : pub enum OtelExporterProtocol {
     272              :     Grpc,
     273              :     HttpBinary,
     274              :     HttpJson,
     275              : }
     276              : 
     277            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     278              : pub struct Tracing {
     279              :     pub sampling_ratio: Ratio,
     280              :     pub export_config: OtelExporterConfig,
     281              : }
     282              : 
     283              : impl From<&OtelExporterConfig> for tracing_utils::ExportConfig {
     284            0 :     fn from(val: &OtelExporterConfig) -> Self {
     285            0 :         tracing_utils::ExportConfig {
     286            0 :             endpoint: Some(val.endpoint.clone()),
     287            0 :             protocol: val.protocol.into(),
     288            0 :             timeout: val.timeout,
     289            0 :         }
     290            0 :     }
     291              : }
     292              : 
     293              : impl From<OtelExporterProtocol> for tracing_utils::Protocol {
     294            0 :     fn from(val: OtelExporterProtocol) -> Self {
     295            0 :         match val {
     296            0 :             OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc,
     297            0 :             OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson,
     298            0 :             OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary,
     299              :         }
     300            0 :     }
     301              : }
     302              : 
     303              : pub mod statvfs {
     304              :     pub mod mock {
     305            0 :         #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     306              :         #[serde(tag = "type")]
     307              :         pub enum Behavior {
     308              :             Success {
     309              :                 blocksize: u64,
     310              :                 total_blocks: u64,
     311              :                 name_filter: Option<utils::serde_regex::Regex>,
     312              :             },
     313              :             #[cfg(feature = "testing")]
     314              :             Failure { mocked_error: MockedError },
     315              :         }
     316              : 
     317              :         #[cfg(feature = "testing")]
     318            0 :         #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     319              :         #[allow(clippy::upper_case_acronyms)]
     320              :         pub enum MockedError {
     321              :             EIO,
     322              :         }
     323              : 
     324              :         #[cfg(feature = "testing")]
     325              :         impl From<MockedError> for nix::Error {
     326            0 :             fn from(e: MockedError) -> Self {
     327            0 :                 match e {
     328            0 :                     MockedError::EIO => nix::Error::EIO,
     329            0 :                 }
     330            0 :             }
     331              :         }
     332              :     }
     333              : }
     334              : 
     335            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     336              : #[serde(tag = "type", content = "args")]
     337              : pub enum EvictionOrder {
     338              :     RelativeAccessed {
     339              :         highest_layer_count_loses_first: bool,
     340              :     },
     341              : }
     342              : 
     343              : impl Default for EvictionOrder {
     344           12 :     fn default() -> Self {
     345           12 :         Self::RelativeAccessed {
     346           12 :             highest_layer_count_loses_first: true,
     347           12 :         }
     348           12 :     }
     349              : }
     350              : 
     351            0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     352              : #[serde(transparent)]
     353              : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
     354              : 
     355              : /// Tenant-level configuration values, used for various purposes.
     356            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     357              : #[serde(default)]
     358              : pub struct TenantConfigToml {
     359              :     // Flush out an inmemory layer, if it's holding WAL older than this
     360              :     // This puts a backstop on how much WAL needs to be re-digested if the
     361              :     // page server crashes.
     362              :     // This parameter actually determines L0 layer file size.
     363              :     pub checkpoint_distance: u64,
     364              :     // Inmemory layer is also flushed at least once in checkpoint_timeout to
     365              :     // eventually upload WAL after activity is stopped.
     366              :     #[serde(with = "humantime_serde")]
     367              :     pub checkpoint_timeout: Duration,
     368              :     // Target file size, when creating image and delta layers.
     369              :     // This parameter determines L1 layer file size.
     370              :     pub compaction_target_size: u64,
     371              :     // How often to check if there's compaction work to be done.
     372              :     // Duration::ZERO means automatic compaction is disabled.
     373              :     #[serde(with = "humantime_serde")]
     374              :     pub compaction_period: Duration,
     375              :     /// Level0 delta layer threshold for compaction.
     376              :     pub compaction_threshold: usize,
     377              :     /// Controls the amount of L0 included in a single compaction iteration.
     378              :     /// The unit is `checkpoint_distance`, i.e., a size.
     379              :     /// We add L0s to the set of layers to compact until their cumulative
     380              :     /// size exceeds `compaction_upper_limit * checkpoint_distance`.
     381              :     pub compaction_upper_limit: usize,
     382              :     pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
     383              :     /// If true, enable shard ancestor compaction (enabled by default).
     384              :     pub compaction_shard_ancestor: bool,
     385              :     /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
     386              :     /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
     387              :     pub compaction_l0_first: bool,
     388              :     /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
     389              :     /// has an effect if `compaction_l0_first` is true. Defaults to true.
     390              :     pub compaction_l0_semaphore: bool,
     391              :     /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
     392              :     /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
     393              :     /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
     394              :     /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
     395              :     pub l0_flush_delay_threshold: Option<usize>,
     396              :     /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
     397              :     /// to avoid deadlock. 0 to disable. Disabled by default.
     398              :     pub l0_flush_stall_threshold: Option<usize>,
     399              :     // Determines how much history is retained, to allow
     400              :     // branching and read replicas at an older point in time.
     401              :     // The unit is #of bytes of WAL.
     402              :     // Page versions older than this are garbage collected away.
     403              :     pub gc_horizon: u64,
     404              :     // Interval at which garbage collection is triggered.
     405              :     // Duration::ZERO means automatic GC is disabled
     406              :     #[serde(with = "humantime_serde")]
     407              :     pub gc_period: Duration,
     408              :     // Delta layer churn threshold to create L1 image layers.
     409              :     pub image_creation_threshold: usize,
     410              :     // Determines how much history is retained, to allow
     411              :     // branching and read replicas at an older point in time.
     412              :     // The unit is time.
     413              :     // Page versions older than this are garbage collected away.
     414              :     #[serde(with = "humantime_serde")]
     415              :     pub pitr_interval: Duration,
     416              :     /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
     417              :     #[serde(with = "humantime_serde")]
     418              :     pub walreceiver_connect_timeout: Duration,
     419              :     /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
     420              :     /// A stalled safekeeper will be changed to a newer one when it appears.
     421              :     #[serde(with = "humantime_serde")]
     422              :     pub lagging_wal_timeout: Duration,
     423              :     /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
     424              :     /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
     425              :     /// to avoid eager reconnects.
     426              :     pub max_lsn_wal_lag: NonZeroU64,
     427              :     pub eviction_policy: crate::models::EvictionPolicy,
     428              :     pub min_resident_size_override: Option<u64>,
     429              :     // See the corresponding metric's help string.
     430              :     #[serde(with = "humantime_serde")]
     431              :     pub evictions_low_residence_duration_metric_threshold: Duration,
     432              : 
     433              :     /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
     434              :     /// may be disabled if a Tenant will not have secondary locations: only secondary
     435              :     /// locations will use the heatmap uploaded by attached locations.
     436              :     #[serde(with = "humantime_serde")]
     437              :     pub heatmap_period: Duration,
     438              : 
     439              :     /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
     440              :     pub lazy_slru_download: bool,
     441              : 
     442              :     pub timeline_get_throttle: crate::models::ThrottleConfig,
     443              : 
     444              :     // How much WAL must be ingested before checking again whether a new image layer is required.
     445              :     // Expresed in multiples of checkpoint distance.
     446              :     pub image_layer_creation_check_threshold: u8,
     447              : 
     448              :     // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
     449              :     // Set to 0 to disable preemption.
     450              :     pub image_creation_preempt_threshold: usize,
     451              : 
     452              :     /// The length for an explicit LSN lease request.
     453              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     454              :     #[serde(with = "humantime_serde")]
     455              :     pub lsn_lease_length: Duration,
     456              : 
     457              :     /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
     458              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     459              :     #[serde(with = "humantime_serde")]
     460              :     pub lsn_lease_length_for_ts: Duration,
     461              : 
     462              :     /// Enable auto-offloading of timelines.
     463              :     /// (either this flag or the pageserver-global one need to be set)
     464              :     pub timeline_offloading: bool,
     465              : 
     466              :     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     467              : 
     468              :     /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
     469              :     /// `index_part.json`, and it cannot be reversed.
     470              :     pub rel_size_v2_enabled: bool,
     471              : 
     472              :     // gc-compaction related configs
     473              :     /// Enable automatic gc-compaction trigger on this tenant.
     474              :     pub gc_compaction_enabled: bool,
     475              :     /// Enable verification of gc-compaction results.
     476              :     pub gc_compaction_verification: bool,
     477              :     /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
     478              :     /// gc-compaction will be triggered.
     479              :     pub gc_compaction_initial_threshold_kb: u64,
     480              :     /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
     481              :     /// is above this ratio, gc-compaction will be triggered.
     482              :     pub gc_compaction_ratio_percent: u64,
     483              :     /// Tenant level performance sampling ratio override. Controls the ratio of get page requests
     484              :     /// that will get perf sampling for the tenant.
     485              :     pub sampling_ratio: Option<Ratio>,
     486              : }
     487              : 
     488              : pub mod defaults {
     489              :     pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
     490              : 
     491              :     use crate::models::ImageCompressionAlgorithm;
     492              : 
     493              :     pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
     494              :     pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
     495              : 
     496              :     pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
     497              :     pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
     498              :         "C"
     499              :     } else {
     500              :         "C.UTF-8"
     501              :     };
     502              : 
     503              :     pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
     504              :     pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
     505              : 
     506              :     pub const DEFAULT_LOG_FORMAT: &str = "plain";
     507              : 
     508              :     pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
     509              : 
     510              :     pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
     511              : 
     512              :     pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
     513              :     pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
     514              :     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
     515              :     pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
     516              : 
     517              :     pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
     518              :     pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
     519              : 
     520              :     pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
     521              : 
     522              :     /// Soft limit for the maximum size of a vectored read.
     523              :     ///
     524              :     /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
     525              :     /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
     526              :     /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
     527              :     /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
     528              :     /// That is, slightly above 128 kB.
     529              :     pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
     530              : 
     531              :     pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
     532              :         ImageCompressionAlgorithm::Zstd { level: Some(1) };
     533              : 
     534              :     pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
     535              : 
     536              :     pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
     537              : 
     538              :     pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
     539              :         utils::postgres_client::PostgresClientProtocol::Vanilla;
     540              : 
     541              :     pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
     542              :     pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
     543              : }
     544              : 
     545              : impl Default for ConfigToml {
     546         1500 :     fn default() -> Self {
     547              :         use defaults::*;
     548              : 
     549              :         Self {
     550         1500 :             listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
     551         1500 :             listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
     552         1500 :             listen_https_addr: (None),
     553         1500 :             ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
     554         1500 :             ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
     555         1500 :             ssl_cert_reload_period: Duration::from_secs(60),
     556         1500 :             ssl_ca_file: None,
     557         1500 :             availability_zone: (None),
     558         1500 :             wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
     559         1500 :                 .expect("cannot parse default wait lsn timeout")),
     560         1500 :             wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
     561         1500 :                 .expect("cannot parse default wal redo timeout")),
     562         1500 :             superuser: (DEFAULT_SUPERUSER.to_string()),
     563         1500 :             locale: DEFAULT_LOCALE.to_string(),
     564         1500 :             page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
     565         1500 :             max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
     566         1500 :             pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
     567         1500 :             http_auth_type: (AuthType::Trust),
     568         1500 :             pg_auth_type: (AuthType::Trust),
     569         1500 :             auth_validation_public_key_path: (None),
     570         1500 :             remote_storage: None,
     571         1500 :             broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
     572         1500 :                 .parse()
     573         1500 :                 .expect("failed to parse default broker endpoint")),
     574         1500 :             broker_keepalive_interval: (humantime::parse_duration(
     575         1500 :                 storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
     576         1500 :             )
     577         1500 :             .expect("cannot parse default keepalive interval")),
     578         1500 :             log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
     579         1500 : 
     580         1500 :             concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
     581         1500 :                 .expect("Invalid default constant")),
     582         1500 :             concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
     583         1500 :                 DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
     584         1500 :             )
     585         1500 :             .unwrap(),
     586         1500 :             metric_collection_interval: (humantime::parse_duration(
     587         1500 :                 DEFAULT_METRIC_COLLECTION_INTERVAL,
     588         1500 :             )
     589         1500 :             .expect("cannot parse default metric collection interval")),
     590         1500 :             synthetic_size_calculation_interval: (humantime::parse_duration(
     591         1500 :                 DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
     592         1500 :             )
     593         1500 :             .expect("cannot parse default synthetic size calculation interval")),
     594         1500 :             metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
     595         1500 : 
     596         1500 :             metric_collection_bucket: (None),
     597         1500 : 
     598         1500 :             disk_usage_based_eviction: (None),
     599         1500 : 
     600         1500 :             test_remote_failures: (0),
     601         1500 : 
     602         1500 :             ondemand_download_behavior_treat_error_as_warn: (false),
     603         1500 : 
     604         1500 :             background_task_maximum_delay: (humantime::parse_duration(
     605         1500 :                 DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
     606         1500 :             )
     607         1500 :             .unwrap()),
     608         1500 : 
     609         1500 :             control_plane_api: (None),
     610         1500 :             control_plane_api_token: (None),
     611         1500 :             control_plane_emergency_mode: (false),
     612         1500 : 
     613         1500 :             import_pgdata_upcall_api: (None),
     614         1500 :             import_pgdata_upcall_api_token: (None),
     615         1500 :             import_pgdata_aws_endpoint_url: (None),
     616         1500 : 
     617         1500 :             heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
     618         1500 :             secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
     619         1500 : 
     620         1500 :             ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
     621         1500 : 
     622         1500 :             virtual_file_io_engine: None,
     623         1500 : 
     624         1500 :             max_vectored_read_bytes: (MaxVectoredReadBytes(
     625         1500 :                 NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
     626         1500 :             )),
     627         1500 :             image_compression: (DEFAULT_IMAGE_COMPRESSION),
     628         1500 :             timeline_offloading: true,
     629         1500 :             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
     630         1500 :             l0_flush: None,
     631         1500 :             virtual_file_io_mode: None,
     632         1500 :             tenant_config: TenantConfigToml::default(),
     633         1500 :             no_sync: None,
     634         1500 :             wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
     635         1500 :             page_service_pipelining: if !cfg!(test) {
     636         1500 :                 PageServicePipeliningConfig::Serial
     637              :             } else {
     638              :                 // Do not turn this into the default until scattered reads have been
     639              :                 // validated and rolled-out fully.
     640            0 :                 PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
     641            0 :                     max_batch_size: NonZeroUsize::new(32).unwrap(),
     642            0 :                     execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
     643            0 :                     batching: PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn,
     644            0 :                 })
     645              :             },
     646         1500 :             get_vectored_concurrent_io: if !cfg!(test) {
     647         1500 :                 GetVectoredConcurrentIo::Sequential
     648              :             } else {
     649            0 :                 GetVectoredConcurrentIo::SidecarTask
     650              :             },
     651         1500 :             enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
     652         1500 :                 Some(true)
     653              :             } else {
     654            0 :                 None
     655              :             },
     656         1500 :             validate_wal_contiguity: None,
     657         1500 :             load_previous_heatmap: None,
     658         1500 :             generate_unarchival_heatmap: None,
     659         1500 :             tracing: None,
     660         1500 :             enable_tls_page_service_api: false,
     661         1500 :             dev_mode: false,
     662         1500 :         }
     663         1500 :     }
     664              : }
     665              : 
     666              : pub mod tenant_conf_defaults {
     667              : 
     668              :     // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
     669              :     // would be more appropriate. But a low value forces the code to be exercised more,
     670              :     // which is good for now to trigger bugs.
     671              :     // This parameter actually determines L0 layer file size.
     672              :     pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
     673              :     pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
     674              : 
     675              :     // FIXME the below configs are only used by legacy algorithm. The new algorithm
     676              :     // has different parameters.
     677              : 
     678              :     // Target file size, when creating image and delta layers.
     679              :     // This parameter determines L1 layer file size.
     680              :     pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
     681              : 
     682              :     pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
     683              :     pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
     684              :     pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;
     685              : 
     686              :     // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
     687              :     // 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of
     688              :     // DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak
     689              :     // compaction usage of 15360MB.
     690              :     pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;
     691              :     // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
     692              :     // read amp.
     693              :     pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
     694              :     pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
     695              : 
     696              :     pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
     697              :         crate::models::CompactionAlgorithm::Legacy;
     698              : 
     699              :     pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
     700              : 
     701              :     // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
     702              :     // If there's a need to decrease this value, first make sure that GC
     703              :     // doesn't hold a layer map write lock for non-trivial operations.
     704              :     // Relevant: https://github.com/neondatabase/neon/issues/3394
     705              :     pub const DEFAULT_GC_PERIOD: &str = "1 hr";
     706              :     pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
     707              :     // Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure
     708              :     // without looking at the exact number of L0 layers.
     709              :     // It was expected to have the following behavior:
     710              :     // > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
     711              :     // > layer creation will end immediately. Set to 0 to disable.
     712              :     pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
     713              :     pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
     714              :     pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
     715              :     pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
     716              :     // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
     717              :     // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
     718              :     // throughputs up to 1GiB/s per timeline.
     719              :     pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
     720              :     pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
     721              :     // By default ingest enough WAL for two new L0 layers before checking if new image
     722              :     // image layers should be created.
     723              :     pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
     724              :     pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
     725              :     pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
     726              :     pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
     727              :     pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
     728              : }
     729              : 
     730              : impl Default for TenantConfigToml {
     731         1500 :     fn default() -> Self {
     732              :         use tenant_conf_defaults::*;
     733         1500 :         Self {
     734         1500 :             checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
     735         1500 :             checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
     736         1500 :                 .expect("cannot parse default checkpoint timeout"),
     737         1500 :             compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
     738         1500 :             compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
     739         1500 :                 .expect("cannot parse default compaction period"),
     740         1500 :             compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
     741         1500 :             compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
     742         1500 :             compaction_algorithm: crate::models::CompactionAlgorithmSettings {
     743         1500 :                 kind: DEFAULT_COMPACTION_ALGORITHM,
     744         1500 :             },
     745         1500 :             compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,
     746         1500 :             compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
     747         1500 :             compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
     748         1500 :             l0_flush_delay_threshold: None,
     749         1500 :             l0_flush_stall_threshold: None,
     750         1500 :             gc_horizon: DEFAULT_GC_HORIZON,
     751         1500 :             gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
     752         1500 :                 .expect("cannot parse default gc period"),
     753         1500 :             image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
     754         1500 :             pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
     755         1500 :                 .expect("cannot parse default PITR interval"),
     756         1500 :             walreceiver_connect_timeout: humantime::parse_duration(
     757         1500 :                 DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
     758         1500 :             )
     759         1500 :             .expect("cannot parse default walreceiver connect timeout"),
     760         1500 :             lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
     761         1500 :                 .expect("cannot parse default walreceiver lagging wal timeout"),
     762         1500 :             max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
     763         1500 :                 .expect("cannot parse default max walreceiver Lsn wal lag"),
     764         1500 :             eviction_policy: crate::models::EvictionPolicy::NoEviction,
     765         1500 :             min_resident_size_override: None,
     766         1500 :             evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
     767         1500 :                 DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
     768         1500 :             )
     769         1500 :             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
     770         1500 :             heatmap_period: Duration::ZERO,
     771         1500 :             lazy_slru_download: false,
     772         1500 :             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
     773         1500 :             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
     774         1500 :             image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
     775         1500 :             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
     776         1500 :             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
     777         1500 :             timeline_offloading: true,
     778         1500 :             wal_receiver_protocol_override: None,
     779         1500 :             rel_size_v2_enabled: false,
     780         1500 :             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
     781         1500 :             gc_compaction_verification: DEFAULT_GC_COMPACTION_VERIFICATION,
     782         1500 :             gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
     783         1500 :             gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
     784         1500 :             sampling_ratio: None,
     785         1500 :         }
     786         1500 :     }
     787              : }
        

Generated by: LCOV version 2.1-beta