LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - config.rs (source / functions) Coverage Total Hit
Test: 13fa4b48c3603751d5b1568465c493b8925758a2.info Lines: 87.5 % 176 154
Test Date: 2025-03-19 18:46:26 Functions: 2.0 % 203 4

            Line data    Source code
       1              : use camino::Utf8PathBuf;
       2              : 
       3              : #[cfg(test)]
       4              : mod tests;
       5              : 
       6              : use const_format::formatcp;
       7              : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
       8              : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
       9              : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
      10              : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
      11              : 
      12              : use std::collections::HashMap;
      13              : use std::num::{NonZeroU64, NonZeroUsize};
      14              : use std::str::FromStr;
      15              : use std::time::Duration;
      16              : 
      17              : use postgres_backend::AuthType;
      18              : use remote_storage::RemoteStorageConfig;
      19              : use serde_with::serde_as;
      20              : use utils::logging::LogFormat;
      21              : use utils::postgres_client::PostgresClientProtocol;
      22              : 
      23              : use crate::models::{ImageCompressionAlgorithm, LsnLease};
      24              : 
      25              : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
      26              : // as a separate structure.  This information is not neeed by the pageserver
      27              : // itself, it is only used for registering the pageserver with the control
      28              : // plane and/or storage controller.
      29              : //
      30            9 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
      31              : pub struct NodeMetadata {
      32              :     #[serde(rename = "host")]
      33              :     pub postgres_host: String,
      34              :     #[serde(rename = "port")]
      35              :     pub postgres_port: u16,
      36              :     pub http_host: String,
      37              :     pub http_port: u16,
      38              :     pub https_port: Option<u16>,
      39              : 
      40              :     // Deployment tools may write fields to the metadata file beyond what we
      41              :     // use in this type: this type intentionally only names fields that require.
      42              :     #[serde(flatten)]
      43              :     pub other: HashMap<String, serde_json::Value>,
      44              : }
      45              : 
      46              : /// `pageserver.toml`
      47              : ///
      48              : /// We use serde derive with `#[serde(default)]` to generate a deserializer
      49              : /// that fills in the default values for each config field.
      50              : ///
      51              : /// If there cannot be a static default value because we need to make runtime
      52              : /// checks to determine the default, make it an `Option` (which defaults to None).
      53              : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
      54              : ///
      55              : /// Unknown fields are silently ignored during deserialization.
      56              : /// The alternative, which we used in the past, was to set `deny_unknown_fields`,
      57              : /// which fails deserialization, and hence pageserver startup, if there is an unknown field.
      58              : /// The reason we don't do that anymore is that it complicates
      59              : /// usage of config fields for feature flagging, which we commonly do for
      60              : /// region-by-region rollouts.
      61              : /// The complications mainly arise because the `pageserver.toml` contents on a
      62              : /// prod server have a separate lifecycle from the pageserver binary.
      63              : /// For instance, `pageserver.toml` contents today are defined in the internal
      64              : /// infra repo, and thus introducing a new config field to pageserver and
      65              : /// rolling it out to prod servers are separate commits in separate repos
      66              : /// that can't be made or rolled back atomically.
      67              : /// Rollbacks in particular pose a risk with deny_unknown_fields because
      68              : /// the old pageserver binary may reject a new config field, resulting in
      69              : /// an outage unless the person doing the pageserver rollback remembers
      70              : /// to also revert the commit that added the config field in to the
      71              : /// `pageserver.toml` templates in the internal infra repo.
      72              : /// (A pre-deploy config check would eliminate this risk during rollbacks,
      73              : ///  cf [here](https://github.com/neondatabase/cloud/issues/24349).)
      74              : /// In addition to this compatibility problem during emergency rollbacks,
      75              : /// deny_unknown_fields adds further complications when decomissioning a feature
      76              : /// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]
      77              : /// until all prod servers' `pageserver.toml` files have been updated to a version
      78              : /// that doesn't specify the flag. Otherwise new software would fail to start up.
      79              : /// This adds the requirement for an intermediate step where the new config field
      80              : /// is accepted but ignored, prolonging the decomissioning process by an entire
      81              : /// release cycle.
      82              : /// By contrast  with unknown fields silently ignored, decomissioning a feature
      83              : /// flag is a one-step process: we can skip the intermediate step and straight
      84              : /// remove the field from the [`ConfigToml`]. We leave the field in the
      85              : /// `pageserver.toml` files on prod servers until we reach certainty that we
      86              : /// will not roll back to old software whose behavior was dependent on config.
      87              : /// Then we can remove the field from the templates in the internal infra repo.
      88              : /// This process is [documented internally](
      89              : /// https://docs.neon.build/storage/pageserver_configuration.html).
      90              : ///
      91              : /// Note that above relaxed compatbility for the config format does NOT APPLY
      92              : /// TO THE STORAGE FORMAT. As general guidance, when introducing storage format
      93              : /// changes, ensure that the potential rollback target version will be compatible
      94              : /// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:
      95              : /// any format version that exists in an environment must be compatible with the software that runs there.
      96              : /// Use a pageserver.toml flag only to gate whether software _writes_ the new format.
      97              : /// For more compatibility considerations, refer to [internal docs](
      98              : /// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)
      99              : #[serde_as]
     100            0 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
     101              : #[serde(default)]
     102              : pub struct ConfigToml {
     103              :     // types mapped 1:1 into the runtime PageServerConfig type
     104              :     pub listen_pg_addr: String,
     105              :     pub listen_http_addr: String,
     106              :     pub listen_https_addr: Option<String>,
     107              :     pub ssl_key_file: Utf8PathBuf,
     108              :     pub ssl_cert_file: Utf8PathBuf,
     109              :     pub availability_zone: Option<String>,
     110              :     #[serde(with = "humantime_serde")]
     111              :     pub wait_lsn_timeout: Duration,
     112              :     #[serde(with = "humantime_serde")]
     113              :     pub wal_redo_timeout: Duration,
     114              :     pub superuser: String,
     115              :     pub locale: String,
     116              :     pub page_cache_size: usize,
     117              :     pub max_file_descriptors: usize,
     118              :     pub pg_distrib_dir: Option<Utf8PathBuf>,
     119              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     120              :     pub http_auth_type: AuthType,
     121              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     122              :     pub pg_auth_type: AuthType,
     123              :     pub auth_validation_public_key_path: Option<Utf8PathBuf>,
     124              :     pub remote_storage: Option<RemoteStorageConfig>,
     125              :     pub tenant_config: TenantConfigToml,
     126              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     127              :     pub broker_endpoint: storage_broker::Uri,
     128              :     #[serde(with = "humantime_serde")]
     129              :     pub broker_keepalive_interval: Duration,
     130              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     131              :     pub log_format: LogFormat,
     132              :     pub concurrent_tenant_warmup: NonZeroUsize,
     133              :     pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
     134              :     #[serde(with = "humantime_serde")]
     135              :     pub metric_collection_interval: Duration,
     136              :     pub metric_collection_endpoint: Option<reqwest::Url>,
     137              :     pub metric_collection_bucket: Option<RemoteStorageConfig>,
     138              :     #[serde(with = "humantime_serde")]
     139              :     pub synthetic_size_calculation_interval: Duration,
     140              :     pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
     141              :     pub test_remote_failures: u64,
     142              :     pub ondemand_download_behavior_treat_error_as_warn: bool,
     143              :     #[serde(with = "humantime_serde")]
     144              :     pub background_task_maximum_delay: Duration,
     145              :     pub control_plane_api: Option<reqwest::Url>,
     146              :     pub control_plane_api_token: Option<String>,
     147              :     pub control_plane_emergency_mode: bool,
     148              :     /// Unstable feature: subject to change or removal without notice.
     149              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     150              :     pub import_pgdata_upcall_api: Option<reqwest::Url>,
     151              :     /// Unstable feature: subject to change or removal without notice.
     152              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     153              :     pub import_pgdata_upcall_api_token: Option<String>,
     154              :     /// Unstable feature: subject to change or removal without notice.
     155              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     156              :     pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
     157              :     pub heatmap_upload_concurrency: usize,
     158              :     pub secondary_download_concurrency: usize,
     159              :     pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
     160              :     pub ingest_batch_size: u64,
     161              :     pub max_vectored_read_bytes: MaxVectoredReadBytes,
     162              :     pub image_compression: ImageCompressionAlgorithm,
     163              :     pub timeline_offloading: bool,
     164              :     pub ephemeral_bytes_per_memory_kb: usize,
     165              :     pub l0_flush: Option<crate::models::L0FlushConfig>,
     166              :     pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
     167              :     #[serde(skip_serializing_if = "Option::is_none")]
     168              :     pub no_sync: Option<bool>,
     169              :     pub wal_receiver_protocol: PostgresClientProtocol,
     170              :     pub page_service_pipelining: PageServicePipeliningConfig,
     171              :     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
     172              :     pub enable_read_path_debugging: Option<bool>,
     173              :     #[serde(skip_serializing_if = "Option::is_none")]
     174              :     pub validate_wal_contiguity: Option<bool>,
     175              :     #[serde(skip_serializing_if = "Option::is_none")]
     176              :     pub load_previous_heatmap: Option<bool>,
     177              :     #[serde(skip_serializing_if = "Option::is_none")]
     178              :     pub generate_unarchival_heatmap: Option<bool>,
     179              : }
     180              : 
     181            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     182              : pub struct DiskUsageEvictionTaskConfig {
     183              :     pub max_usage_pct: utils::serde_percent::Percent,
     184              :     pub min_avail_bytes: u64,
     185              :     #[serde(with = "humantime_serde")]
     186              :     pub period: Duration,
     187              :     #[cfg(feature = "testing")]
     188              :     pub mock_statvfs: Option<statvfs::mock::Behavior>,
     189              :     /// Select sorting for evicted layers
     190              :     #[serde(default)]
     191              :     pub eviction_order: EvictionOrder,
     192              : }
     193              : 
     194            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     195              : #[serde(tag = "mode", rename_all = "kebab-case")]
     196              : pub enum PageServicePipeliningConfig {
     197              :     Serial,
     198              :     Pipelined(PageServicePipeliningConfigPipelined),
     199              : }
     200            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     201              : pub struct PageServicePipeliningConfigPipelined {
     202              :     /// Causes runtime errors if larger than max get_vectored batch size.
     203              :     pub max_batch_size: NonZeroUsize,
     204              :     pub execution: PageServiceProtocolPipelinedExecutionStrategy,
     205              : }
     206              : 
     207            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     208              : #[serde(rename_all = "kebab-case")]
     209              : pub enum PageServiceProtocolPipelinedExecutionStrategy {
     210              :     ConcurrentFutures,
     211              :     Tasks,
     212              : }
     213              : 
     214            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     215              : #[serde(tag = "mode", rename_all = "kebab-case")]
     216              : pub enum GetVectoredConcurrentIo {
     217              :     /// The read path is fully sequential: layers are visited
     218              :     /// one after the other and IOs are issued and waited upon
     219              :     /// from the same task that traverses the layers.
     220              :     Sequential,
     221              :     /// The read path still traverses layers sequentially, and
     222              :     /// index blocks will be read into the PS PageCache from
     223              :     /// that task, with waiting.
     224              :     /// But data IOs are dispatched and waited upon from a sidecar
     225              :     /// task so that the traversing task can continue to traverse
     226              :     /// layers while the IOs are in flight.
     227              :     /// If the PS PageCache miss rate is low, this improves
     228              :     /// throughput dramatically.
     229              :     SidecarTask,
     230              : }
     231              : 
     232              : pub mod statvfs {
     233              :     pub mod mock {
     234            0 :         #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     235              :         #[serde(tag = "type")]
     236              :         pub enum Behavior {
     237              :             Success {
     238              :                 blocksize: u64,
     239              :                 total_blocks: u64,
     240              :                 name_filter: Option<utils::serde_regex::Regex>,
     241              :             },
     242              :             #[cfg(feature = "testing")]
     243              :             Failure { mocked_error: MockedError },
     244              :         }
     245              : 
     246              :         #[cfg(feature = "testing")]
     247            0 :         #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     248              :         #[allow(clippy::upper_case_acronyms)]
     249              :         pub enum MockedError {
     250              :             EIO,
     251              :         }
     252              : 
     253              :         #[cfg(feature = "testing")]
     254              :         impl From<MockedError> for nix::Error {
     255            0 :             fn from(e: MockedError) -> Self {
     256            0 :                 match e {
     257            0 :                     MockedError::EIO => nix::Error::EIO,
     258            0 :                 }
     259            0 :             }
     260              :         }
     261              :     }
     262              : }
     263              : 
     264            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     265              : #[serde(tag = "type", content = "args")]
     266              : pub enum EvictionOrder {
     267              :     RelativeAccessed {
     268              :         highest_layer_count_loses_first: bool,
     269              :     },
     270              : }
     271              : 
     272              : impl Default for EvictionOrder {
     273            4 :     fn default() -> Self {
     274            4 :         Self::RelativeAccessed {
     275            4 :             highest_layer_count_loses_first: true,
     276            4 :         }
     277            4 :     }
     278              : }
     279              : 
     280            0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     281              : #[serde(transparent)]
     282              : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
     283              : 
     284              : /// Tenant-level configuration values, used for various purposes.
     285            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     286              : #[serde(default)]
     287              : pub struct TenantConfigToml {
     288              :     // Flush out an inmemory layer, if it's holding WAL older than this
     289              :     // This puts a backstop on how much WAL needs to be re-digested if the
     290              :     // page server crashes.
     291              :     // This parameter actually determines L0 layer file size.
     292              :     pub checkpoint_distance: u64,
     293              :     // Inmemory layer is also flushed at least once in checkpoint_timeout to
     294              :     // eventually upload WAL after activity is stopped.
     295              :     #[serde(with = "humantime_serde")]
     296              :     pub checkpoint_timeout: Duration,
     297              :     // Target file size, when creating image and delta layers.
     298              :     // This parameter determines L1 layer file size.
     299              :     pub compaction_target_size: u64,
     300              :     // How often to check if there's compaction work to be done.
     301              :     // Duration::ZERO means automatic compaction is disabled.
     302              :     #[serde(with = "humantime_serde")]
     303              :     pub compaction_period: Duration,
     304              :     /// Level0 delta layer threshold for compaction.
     305              :     pub compaction_threshold: usize,
     306              :     /// Controls the amount of L0 included in a single compaction iteration.
     307              :     /// The unit is `checkpoint_distance`, i.e., a size.
     308              :     /// We add L0s to the set of layers to compact until their cumulative
     309              :     /// size exceeds `compaction_upper_limit * checkpoint_distance`.
     310              :     pub compaction_upper_limit: usize,
     311              :     pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
     312              :     /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
     313              :     /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
     314              :     pub compaction_l0_first: bool,
     315              :     /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
     316              :     /// has an effect if `compaction_l0_first` is true. Defaults to true.
     317              :     pub compaction_l0_semaphore: bool,
     318              :     /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
     319              :     /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
     320              :     /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
     321              :     /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
     322              :     pub l0_flush_delay_threshold: Option<usize>,
     323              :     /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
     324              :     /// to avoid deadlock. 0 to disable. Disabled by default.
     325              :     pub l0_flush_stall_threshold: Option<usize>,
     326              :     /// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
     327              :     /// layer. This is a temporary backpressure mechanism which should be removed once
     328              :     /// l0_flush_{delay,stall}_threshold is fully enabled.
     329              :     ///
     330              :     /// TODO: this is no longer enabled, remove it when the config option is no longer set.
     331              :     pub l0_flush_wait_upload: bool,
     332              :     // Determines how much history is retained, to allow
     333              :     // branching and read replicas at an older point in time.
     334              :     // The unit is #of bytes of WAL.
     335              :     // Page versions older than this are garbage collected away.
     336              :     pub gc_horizon: u64,
     337              :     // Interval at which garbage collection is triggered.
     338              :     // Duration::ZERO means automatic GC is disabled
     339              :     #[serde(with = "humantime_serde")]
     340              :     pub gc_period: Duration,
     341              :     // Delta layer churn threshold to create L1 image layers.
     342              :     pub image_creation_threshold: usize,
     343              :     // Determines how much history is retained, to allow
     344              :     // branching and read replicas at an older point in time.
     345              :     // The unit is time.
     346              :     // Page versions older than this are garbage collected away.
     347              :     #[serde(with = "humantime_serde")]
     348              :     pub pitr_interval: Duration,
     349              :     /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
     350              :     #[serde(with = "humantime_serde")]
     351              :     pub walreceiver_connect_timeout: Duration,
     352              :     /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
     353              :     /// A stalled safekeeper will be changed to a newer one when it appears.
     354              :     #[serde(with = "humantime_serde")]
     355              :     pub lagging_wal_timeout: Duration,
     356              :     /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
     357              :     /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
     358              :     /// to avoid eager reconnects.
     359              :     pub max_lsn_wal_lag: NonZeroU64,
     360              :     pub eviction_policy: crate::models::EvictionPolicy,
     361              :     pub min_resident_size_override: Option<u64>,
     362              :     // See the corresponding metric's help string.
     363              :     #[serde(with = "humantime_serde")]
     364              :     pub evictions_low_residence_duration_metric_threshold: Duration,
     365              : 
     366              :     /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
     367              :     /// may be disabled if a Tenant will not have secondary locations: only secondary
     368              :     /// locations will use the heatmap uploaded by attached locations.
     369              :     #[serde(with = "humantime_serde")]
     370              :     pub heatmap_period: Duration,
     371              : 
     372              :     /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
     373              :     pub lazy_slru_download: bool,
     374              : 
     375              :     pub timeline_get_throttle: crate::models::ThrottleConfig,
     376              : 
     377              :     // How much WAL must be ingested before checking again whether a new image layer is required.
     378              :     // Expresed in multiples of checkpoint distance.
     379              :     pub image_layer_creation_check_threshold: u8,
     380              : 
     381              :     // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
     382              :     // Set to 0 to disable preemption.
     383              :     pub image_creation_preempt_threshold: usize,
     384              : 
     385              :     /// The length for an explicit LSN lease request.
     386              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     387              :     #[serde(with = "humantime_serde")]
     388              :     pub lsn_lease_length: Duration,
     389              : 
     390              :     /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
     391              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     392              :     #[serde(with = "humantime_serde")]
     393              :     pub lsn_lease_length_for_ts: Duration,
     394              : 
     395              :     /// Enable auto-offloading of timelines.
     396              :     /// (either this flag or the pageserver-global one need to be set)
     397              :     pub timeline_offloading: bool,
     398              : 
     399              :     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     400              : 
     401              :     /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
     402              :     /// `index_part.json`, and it cannot be reversed.
     403              :     pub rel_size_v2_enabled: bool,
     404              : 
     405              :     // gc-compaction related configs
     406              :     /// Enable automatic gc-compaction trigger on this tenant.
     407              :     pub gc_compaction_enabled: bool,
     408              :     /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
     409              :     /// gc-compaction will be triggered.
     410              :     pub gc_compaction_initial_threshold_kb: u64,
     411              :     /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
     412              :     /// is above this ratio, gc-compaction will be triggered.
     413              :     pub gc_compaction_ratio_percent: u64,
     414              : }
     415              : 
     416              : pub mod defaults {
     417              :     pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
     418              : 
     419              :     use crate::models::ImageCompressionAlgorithm;
     420              : 
     421              :     pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
     422              :     pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
     423              : 
     424              :     pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
     425              :     pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
     426              :         "C"
     427              :     } else {
     428              :         "C.UTF-8"
     429              :     };
     430              : 
     431              :     pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
     432              :     pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
     433              : 
     434              :     pub const DEFAULT_LOG_FORMAT: &str = "plain";
     435              : 
     436              :     pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
     437              : 
     438              :     pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
     439              : 
     440              :     pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
     441              :     pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
     442              :     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
     443              :     pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
     444              : 
     445              :     pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
     446              :     pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
     447              : 
     448              :     pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
     449              : 
     450              :     /// Soft limit for the maximum size of a vectored read.
     451              :     ///
     452              :     /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
     453              :     /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
     454              :     /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
     455              :     /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
     456              :     /// That is, slightly above 128 kB.
     457              :     pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
     458              : 
     459              :     pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
     460              :         ImageCompressionAlgorithm::Zstd { level: Some(1) };
     461              : 
     462              :     pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
     463              : 
     464              :     pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
     465              : 
     466              :     pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
     467              :         utils::postgres_client::PostgresClientProtocol::Vanilla;
     468              : 
     469              :     pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
     470              :     pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
     471              : }
     472              : 
     473              : impl Default for ConfigToml {
     474          488 :     fn default() -> Self {
     475              :         use defaults::*;
     476              : 
     477              :         Self {
     478          488 :             listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
     479          488 :             listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
     480          488 :             listen_https_addr: (None),
     481          488 :             ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
     482          488 :             ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
     483          488 :             availability_zone: (None),
     484          488 :             wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
     485          488 :                 .expect("cannot parse default wait lsn timeout")),
     486          488 :             wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
     487          488 :                 .expect("cannot parse default wal redo timeout")),
     488          488 :             superuser: (DEFAULT_SUPERUSER.to_string()),
     489          488 :             locale: DEFAULT_LOCALE.to_string(),
     490          488 :             page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
     491          488 :             max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
     492          488 :             pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
     493          488 :             http_auth_type: (AuthType::Trust),
     494          488 :             pg_auth_type: (AuthType::Trust),
     495          488 :             auth_validation_public_key_path: (None),
     496          488 :             remote_storage: None,
     497          488 :             broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
     498          488 :                 .parse()
     499          488 :                 .expect("failed to parse default broker endpoint")),
     500          488 :             broker_keepalive_interval: (humantime::parse_duration(
     501          488 :                 storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
     502          488 :             )
     503          488 :             .expect("cannot parse default keepalive interval")),
     504          488 :             log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
     505          488 : 
     506          488 :             concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
     507          488 :                 .expect("Invalid default constant")),
     508          488 :             concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
     509          488 :                 DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
     510          488 :             )
     511          488 :             .unwrap(),
     512          488 :             metric_collection_interval: (humantime::parse_duration(
     513          488 :                 DEFAULT_METRIC_COLLECTION_INTERVAL,
     514          488 :             )
     515          488 :             .expect("cannot parse default metric collection interval")),
     516          488 :             synthetic_size_calculation_interval: (humantime::parse_duration(
     517          488 :                 DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
     518          488 :             )
     519          488 :             .expect("cannot parse default synthetic size calculation interval")),
     520          488 :             metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
     521          488 : 
     522          488 :             metric_collection_bucket: (None),
     523          488 : 
     524          488 :             disk_usage_based_eviction: (None),
     525          488 : 
     526          488 :             test_remote_failures: (0),
     527          488 : 
     528          488 :             ondemand_download_behavior_treat_error_as_warn: (false),
     529          488 : 
     530          488 :             background_task_maximum_delay: (humantime::parse_duration(
     531          488 :                 DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
     532          488 :             )
     533          488 :             .unwrap()),
     534          488 : 
     535          488 :             control_plane_api: (None),
     536          488 :             control_plane_api_token: (None),
     537          488 :             control_plane_emergency_mode: (false),
     538          488 : 
     539          488 :             import_pgdata_upcall_api: (None),
     540          488 :             import_pgdata_upcall_api_token: (None),
     541          488 :             import_pgdata_aws_endpoint_url: (None),
     542          488 : 
     543          488 :             heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
     544          488 :             secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
     545          488 : 
     546          488 :             ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
     547          488 : 
     548          488 :             virtual_file_io_engine: None,
     549          488 : 
     550          488 :             max_vectored_read_bytes: (MaxVectoredReadBytes(
     551          488 :                 NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
     552          488 :             )),
     553          488 :             image_compression: (DEFAULT_IMAGE_COMPRESSION),
     554          488 :             timeline_offloading: true,
     555          488 :             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
     556          488 :             l0_flush: None,
     557          488 :             virtual_file_io_mode: None,
     558          488 :             tenant_config: TenantConfigToml::default(),
     559          488 :             no_sync: None,
     560          488 :             wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
     561          488 :             page_service_pipelining: if !cfg!(test) {
     562          488 :                 PageServicePipeliningConfig::Serial
     563              :             } else {
     564            0 :                 PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
     565            0 :                     max_batch_size: NonZeroUsize::new(32).unwrap(),
     566            0 :                     execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
     567            0 :                 })
     568              :             },
     569          488 :             get_vectored_concurrent_io: if !cfg!(test) {
     570          488 :                 GetVectoredConcurrentIo::Sequential
     571              :             } else {
     572            0 :                 GetVectoredConcurrentIo::SidecarTask
     573              :             },
     574          488 :             enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
     575          488 :                 Some(true)
     576              :             } else {
     577            0 :                 None
     578              :             },
     579          488 :             validate_wal_contiguity: None,
     580          488 :             load_previous_heatmap: None,
     581          488 :             generate_unarchival_heatmap: None,
     582          488 :         }
     583          488 :     }
     584              : }
     585              : 
     586              : pub mod tenant_conf_defaults {
     587              : 
     588              :     // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
     589              :     // would be more appropriate. But a low value forces the code to be exercised more,
     590              :     // which is good for now to trigger bugs.
     591              :     // This parameter actually determines L0 layer file size.
     592              :     pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
     593              :     pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
     594              : 
     595              :     // FIXME the below configs are only used by legacy algorithm. The new algorithm
     596              :     // has different parameters.
     597              : 
     598              :     // Target file size, when creating image and delta layers.
     599              :     // This parameter determines L1 layer file size.
     600              :     pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
     601              : 
     602              :     pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
     603              :     pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
     604              : 
     605              :     // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
     606              :     // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
     607              :     // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
     608              :     // with this config, we can get a maximum peak compaction usage of 9 GB.
     609              :     pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
     610              :     // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
     611              :     // read amp.
     612              :     pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
     613              :     pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
     614              : 
     615              :     pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
     616              :         crate::models::CompactionAlgorithm::Legacy;
     617              : 
     618              :     pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false;
     619              : 
     620              :     pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
     621              : 
     622              :     // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
     623              :     // If there's a need to decrease this value, first make sure that GC
     624              :     // doesn't hold a layer map write lock for non-trivial operations.
     625              :     // Relevant: https://github.com/neondatabase/neon/issues/3394
     626              :     pub const DEFAULT_GC_PERIOD: &str = "1 hr";
     627              :     pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
     628              :     // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
     629              :     // layer creation will end immediately. Set to 0 to disable.
     630              :     pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
     631              :     pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
     632              :     pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
     633              :     pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
     634              :     // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
     635              :     // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
     636              :     // throughputs up to 1GiB/s per timeline.
     637              :     pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
     638              :     pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
     639              :     // By default ingest enough WAL for two new L0 layers before checking if new image
     640              :     // image layers should be created.
     641              :     pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
     642              :     pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
     643              :     pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
     644              :     pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
     645              : }
     646              : 
     647              : impl Default for TenantConfigToml {
     648          488 :     fn default() -> Self {
     649              :         use tenant_conf_defaults::*;
     650          488 :         Self {
     651          488 :             checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
     652          488 :             checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
     653          488 :                 .expect("cannot parse default checkpoint timeout"),
     654          488 :             compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
     655          488 :             compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
     656          488 :                 .expect("cannot parse default compaction period"),
     657          488 :             compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
     658          488 :             compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
     659          488 :             compaction_algorithm: crate::models::CompactionAlgorithmSettings {
     660          488 :                 kind: DEFAULT_COMPACTION_ALGORITHM,
     661          488 :             },
     662          488 :             compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
     663          488 :             compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
     664          488 :             l0_flush_delay_threshold: None,
     665          488 :             l0_flush_stall_threshold: None,
     666          488 :             l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
     667          488 :             gc_horizon: DEFAULT_GC_HORIZON,
     668          488 :             gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
     669          488 :                 .expect("cannot parse default gc period"),
     670          488 :             image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
     671          488 :             pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
     672          488 :                 .expect("cannot parse default PITR interval"),
     673          488 :             walreceiver_connect_timeout: humantime::parse_duration(
     674          488 :                 DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
     675          488 :             )
     676          488 :             .expect("cannot parse default walreceiver connect timeout"),
     677          488 :             lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
     678          488 :                 .expect("cannot parse default walreceiver lagging wal timeout"),
     679          488 :             max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
     680          488 :                 .expect("cannot parse default max walreceiver Lsn wal lag"),
     681          488 :             eviction_policy: crate::models::EvictionPolicy::NoEviction,
     682          488 :             min_resident_size_override: None,
     683          488 :             evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
     684          488 :                 DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
     685          488 :             )
     686          488 :             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
     687          488 :             heatmap_period: Duration::ZERO,
     688          488 :             lazy_slru_download: false,
     689          488 :             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
     690          488 :             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
     691          488 :             image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
     692          488 :             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
     693          488 :             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
     694          488 :             timeline_offloading: true,
     695          488 :             wal_receiver_protocol_override: None,
     696          488 :             rel_size_v2_enabled: false,
     697          488 :             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
     698          488 :             gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
     699          488 :             gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
     700          488 :         }
     701          488 :     }
     702              : }
        

Generated by: LCOV version 2.1-beta