LCOV - code coverage report
Current view: top level - libs/pageserver_api/src - config.rs (source / functions) Coverage Total Hit
Test: 53536e7d038dd1afd98124ffab7571882048d4d5.info Lines: 79.6 % 201 160
Test Date: 2025-04-24 12:00:37 Functions: 1.4 % 293 4

            Line data    Source code
       1              : use camino::Utf8PathBuf;
       2              : 
       3              : #[cfg(test)]
       4              : mod tests;
       5              : 
       6              : use const_format::formatcp;
       7              : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
       8              : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
       9              : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
      10              : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
      11              : 
      12              : use std::collections::HashMap;
      13              : use std::num::{NonZeroU64, NonZeroUsize};
      14              : use std::str::FromStr;
      15              : use std::time::Duration;
      16              : 
      17              : use postgres_backend::AuthType;
      18              : use remote_storage::RemoteStorageConfig;
      19              : use serde_with::serde_as;
      20              : use utils::logging::LogFormat;
      21              : use utils::postgres_client::PostgresClientProtocol;
      22              : 
      23              : use crate::models::{ImageCompressionAlgorithm, LsnLease};
      24              : 
      25              : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
      26              : // as a separate structure.  This information is not neeed by the pageserver
      27              : // itself, it is only used for registering the pageserver with the control
      28              : // plane and/or storage controller.
      29              : //
      30            9 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
      31              : pub struct NodeMetadata {
      32              :     #[serde(rename = "host")]
      33              :     pub postgres_host: String,
      34              :     #[serde(rename = "port")]
      35              :     pub postgres_port: u16,
      36              :     pub http_host: String,
      37              :     pub http_port: u16,
      38              :     pub https_port: Option<u16>,
      39              : 
      40              :     // Deployment tools may write fields to the metadata file beyond what we
      41              :     // use in this type: this type intentionally only names fields that require.
      42              :     #[serde(flatten)]
      43              :     pub other: HashMap<String, serde_json::Value>,
      44              : }
      45              : 
      46              : /// `pageserver.toml`
      47              : ///
      48              : /// We use serde derive with `#[serde(default)]` to generate a deserializer
      49              : /// that fills in the default values for each config field.
      50              : ///
      51              : /// If there cannot be a static default value because we need to make runtime
      52              : /// checks to determine the default, make it an `Option` (which defaults to None).
      53              : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
      54              : ///
      55              : /// Unknown fields are silently ignored during deserialization.
      56              : /// The alternative, which we used in the past, was to set `deny_unknown_fields`,
      57              : /// which fails deserialization, and hence pageserver startup, if there is an unknown field.
      58              : /// The reason we don't do that anymore is that it complicates
      59              : /// usage of config fields for feature flagging, which we commonly do for
      60              : /// region-by-region rollouts.
      61              : /// The complications mainly arise because the `pageserver.toml` contents on a
      62              : /// prod server have a separate lifecycle from the pageserver binary.
      63              : /// For instance, `pageserver.toml` contents today are defined in the internal
      64              : /// infra repo, and thus introducing a new config field to pageserver and
      65              : /// rolling it out to prod servers are separate commits in separate repos
      66              : /// that can't be made or rolled back atomically.
      67              : /// Rollbacks in particular pose a risk with deny_unknown_fields because
      68              : /// the old pageserver binary may reject a new config field, resulting in
      69              : /// an outage unless the person doing the pageserver rollback remembers
      70              : /// to also revert the commit that added the config field in to the
      71              : /// `pageserver.toml` templates in the internal infra repo.
      72              : /// (A pre-deploy config check would eliminate this risk during rollbacks,
      73              : ///  cf [here](https://github.com/neondatabase/cloud/issues/24349).)
      74              : /// In addition to this compatibility problem during emergency rollbacks,
      75              : /// deny_unknown_fields adds further complications when decomissioning a feature
      76              : /// flag: with deny_unknown_fields, we can't remove a flag from the [`ConfigToml`]
      77              : /// until all prod servers' `pageserver.toml` files have been updated to a version
      78              : /// that doesn't specify the flag. Otherwise new software would fail to start up.
      79              : /// This adds the requirement for an intermediate step where the new config field
      80              : /// is accepted but ignored, prolonging the decomissioning process by an entire
      81              : /// release cycle.
      82              : /// By contrast  with unknown fields silently ignored, decomissioning a feature
      83              : /// flag is a one-step process: we can skip the intermediate step and straight
      84              : /// remove the field from the [`ConfigToml`]. We leave the field in the
      85              : /// `pageserver.toml` files on prod servers until we reach certainty that we
      86              : /// will not roll back to old software whose behavior was dependent on config.
      87              : /// Then we can remove the field from the templates in the internal infra repo.
      88              : /// This process is [documented internally](
      89              : /// https://docs.neon.build/storage/pageserver_configuration.html).
      90              : ///
      91              : /// Note that above relaxed compatbility for the config format does NOT APPLY
      92              : /// TO THE STORAGE FORMAT. As general guidance, when introducing storage format
      93              : /// changes, ensure that the potential rollback target version will be compatible
      94              : /// with the new format. This must hold regardless of what flags are set in in the `pageserver.toml`:
      95              : /// any format version that exists in an environment must be compatible with the software that runs there.
      96              : /// Use a pageserver.toml flag only to gate whether software _writes_ the new format.
      97              : /// For more compatibility considerations, refer to [internal docs](
      98              : /// https://docs.neon.build/storage/compat.html?highlight=compat#format-versions--compatibility)
      99              : #[serde_as]
     100            0 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
     101              : #[serde(default)]
     102              : pub struct ConfigToml {
     103              :     // types mapped 1:1 into the runtime PageServerConfig type
     104              :     pub listen_pg_addr: String,
     105              :     pub listen_http_addr: String,
     106              :     pub listen_https_addr: Option<String>,
     107              :     pub ssl_key_file: Utf8PathBuf,
     108              :     pub ssl_cert_file: Utf8PathBuf,
     109              :     #[serde(with = "humantime_serde")]
     110              :     pub ssl_cert_reload_period: Duration,
     111              :     pub ssl_ca_file: Option<Utf8PathBuf>,
     112              :     pub availability_zone: Option<String>,
     113              :     #[serde(with = "humantime_serde")]
     114              :     pub wait_lsn_timeout: Duration,
     115              :     #[serde(with = "humantime_serde")]
     116              :     pub wal_redo_timeout: Duration,
     117              :     pub superuser: String,
     118              :     pub locale: String,
     119              :     pub page_cache_size: usize,
     120              :     pub max_file_descriptors: usize,
     121              :     pub pg_distrib_dir: Option<Utf8PathBuf>,
     122              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     123              :     pub http_auth_type: AuthType,
     124              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     125              :     pub pg_auth_type: AuthType,
     126              :     pub auth_validation_public_key_path: Option<Utf8PathBuf>,
     127              :     pub remote_storage: Option<RemoteStorageConfig>,
     128              :     pub tenant_config: TenantConfigToml,
     129              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     130              :     pub broker_endpoint: storage_broker::Uri,
     131              :     #[serde(with = "humantime_serde")]
     132              :     pub broker_keepalive_interval: Duration,
     133              :     #[serde_as(as = "serde_with::DisplayFromStr")]
     134              :     pub log_format: LogFormat,
     135              :     pub concurrent_tenant_warmup: NonZeroUsize,
     136              :     pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
     137              :     #[serde(with = "humantime_serde")]
     138              :     pub metric_collection_interval: Duration,
     139              :     pub metric_collection_endpoint: Option<reqwest::Url>,
     140              :     pub metric_collection_bucket: Option<RemoteStorageConfig>,
     141              :     #[serde(with = "humantime_serde")]
     142              :     pub synthetic_size_calculation_interval: Duration,
     143              :     pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
     144              :     pub test_remote_failures: u64,
     145              :     pub ondemand_download_behavior_treat_error_as_warn: bool,
     146              :     #[serde(with = "humantime_serde")]
     147              :     pub background_task_maximum_delay: Duration,
     148              :     pub control_plane_api: Option<reqwest::Url>,
     149              :     pub control_plane_api_token: Option<String>,
     150              :     pub control_plane_emergency_mode: bool,
     151              :     /// Unstable feature: subject to change or removal without notice.
     152              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     153              :     pub import_pgdata_upcall_api: Option<reqwest::Url>,
     154              :     /// Unstable feature: subject to change or removal without notice.
     155              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     156              :     pub import_pgdata_upcall_api_token: Option<String>,
     157              :     /// Unstable feature: subject to change or removal without notice.
     158              :     /// See <https://github.com/neondatabase/neon/pull/9218>.
     159              :     pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
     160              :     pub heatmap_upload_concurrency: usize,
     161              :     pub secondary_download_concurrency: usize,
     162              :     pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
     163              :     pub ingest_batch_size: u64,
     164              :     pub max_vectored_read_bytes: MaxVectoredReadBytes,
     165              :     pub image_compression: ImageCompressionAlgorithm,
     166              :     pub timeline_offloading: bool,
     167              :     pub ephemeral_bytes_per_memory_kb: usize,
     168              :     pub l0_flush: Option<crate::models::L0FlushConfig>,
     169              :     pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
     170              :     #[serde(skip_serializing_if = "Option::is_none")]
     171              :     pub no_sync: Option<bool>,
     172              :     pub wal_receiver_protocol: PostgresClientProtocol,
     173              :     pub page_service_pipelining: PageServicePipeliningConfig,
     174              :     pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
     175              :     pub enable_read_path_debugging: Option<bool>,
     176              :     #[serde(skip_serializing_if = "Option::is_none")]
     177              :     pub validate_wal_contiguity: Option<bool>,
     178              :     #[serde(skip_serializing_if = "Option::is_none")]
     179              :     pub load_previous_heatmap: Option<bool>,
     180              :     #[serde(skip_serializing_if = "Option::is_none")]
     181              :     pub generate_unarchival_heatmap: Option<bool>,
     182              :     pub tracing: Option<Tracing>,
     183              :     pub enable_tls_page_service_api: bool,
     184              : }
     185              : 
     186            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     187              : pub struct DiskUsageEvictionTaskConfig {
     188              :     pub max_usage_pct: utils::serde_percent::Percent,
     189              :     pub min_avail_bytes: u64,
     190              :     #[serde(with = "humantime_serde")]
     191              :     pub period: Duration,
     192              :     #[cfg(feature = "testing")]
     193              :     pub mock_statvfs: Option<statvfs::mock::Behavior>,
     194              :     /// Select sorting for evicted layers
     195              :     #[serde(default)]
     196              :     pub eviction_order: EvictionOrder,
     197              : }
     198              : 
     199            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     200              : #[serde(tag = "mode", rename_all = "kebab-case")]
     201              : pub enum PageServicePipeliningConfig {
     202              :     Serial,
     203              :     Pipelined(PageServicePipeliningConfigPipelined),
     204              : }
     205            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     206              : pub struct PageServicePipeliningConfigPipelined {
     207              :     /// Causes runtime errors if larger than max get_vectored batch size.
     208              :     pub max_batch_size: NonZeroUsize,
     209              :     pub execution: PageServiceProtocolPipelinedExecutionStrategy,
     210              :     // The default below is such that new versions of the software can start
     211              :     // with the old configuration.
     212              :     #[serde(default)]
     213              :     pub batching: PageServiceProtocolPipelinedBatchingStrategy,
     214              : }
     215              : 
     216            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     217              : #[serde(rename_all = "kebab-case")]
     218              : pub enum PageServiceProtocolPipelinedExecutionStrategy {
     219              :     ConcurrentFutures,
     220              :     Tasks,
     221              : }
     222              : 
     223            0 : #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     224              : #[serde(rename_all = "kebab-case")]
     225              : pub enum PageServiceProtocolPipelinedBatchingStrategy {
     226              :     /// All get page requests in a batch will be at the same LSN
     227              :     #[default]
     228              :     UniformLsn,
     229              :     /// Get page requests in a batch may be at different LSN
     230              :     ///
     231              :     /// One key cannot be present more than once at different LSNs in
     232              :     /// the same batch.
     233              :     ScatteredLsn,
     234              : }
     235              : 
     236            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     237              : #[serde(tag = "mode", rename_all = "kebab-case")]
     238              : pub enum GetVectoredConcurrentIo {
     239              :     /// The read path is fully sequential: layers are visited
     240              :     /// one after the other and IOs are issued and waited upon
     241              :     /// from the same task that traverses the layers.
     242              :     Sequential,
     243              :     /// The read path still traverses layers sequentially, and
     244              :     /// index blocks will be read into the PS PageCache from
     245              :     /// that task, with waiting.
     246              :     /// But data IOs are dispatched and waited upon from a sidecar
     247              :     /// task so that the traversing task can continue to traverse
     248              :     /// layers while the IOs are in flight.
     249              :     /// If the PS PageCache miss rate is low, this improves
     250              :     /// throughput dramatically.
     251              :     SidecarTask,
     252              : }
     253              : 
     254            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     255              : pub struct Ratio {
     256              :     pub numerator: usize,
     257              :     pub denominator: usize,
     258              : }
     259              : 
     260            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     261              : pub struct OtelExporterConfig {
     262              :     pub endpoint: String,
     263              :     pub protocol: OtelExporterProtocol,
     264              :     #[serde(with = "humantime_serde")]
     265              :     pub timeout: Duration,
     266              : }
     267              : 
     268            0 : #[derive(Debug, Copy, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     269              : #[serde(rename_all = "kebab-case")]
     270              : pub enum OtelExporterProtocol {
     271              :     Grpc,
     272              :     HttpBinary,
     273              :     HttpJson,
     274              : }
     275              : 
     276            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     277              : pub struct Tracing {
     278              :     pub sampling_ratio: Ratio,
     279              :     pub export_config: OtelExporterConfig,
     280              : }
     281              : 
     282              : impl From<&OtelExporterConfig> for tracing_utils::ExportConfig {
     283            0 :     fn from(val: &OtelExporterConfig) -> Self {
     284            0 :         tracing_utils::ExportConfig {
     285            0 :             endpoint: Some(val.endpoint.clone()),
     286            0 :             protocol: val.protocol.into(),
     287            0 :             timeout: val.timeout,
     288            0 :         }
     289            0 :     }
     290              : }
     291              : 
     292              : impl From<OtelExporterProtocol> for tracing_utils::Protocol {
     293            0 :     fn from(val: OtelExporterProtocol) -> Self {
     294            0 :         match val {
     295            0 :             OtelExporterProtocol::Grpc => tracing_utils::Protocol::Grpc,
     296            0 :             OtelExporterProtocol::HttpJson => tracing_utils::Protocol::HttpJson,
     297            0 :             OtelExporterProtocol::HttpBinary => tracing_utils::Protocol::HttpBinary,
     298              :         }
     299            0 :     }
     300              : }
     301              : 
     302              : pub mod statvfs {
     303              :     pub mod mock {
     304            0 :         #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     305              :         #[serde(tag = "type")]
     306              :         pub enum Behavior {
     307              :             Success {
     308              :                 blocksize: u64,
     309              :                 total_blocks: u64,
     310              :                 name_filter: Option<utils::serde_regex::Regex>,
     311              :             },
     312              :             #[cfg(feature = "testing")]
     313              :             Failure { mocked_error: MockedError },
     314              :         }
     315              : 
     316              :         #[cfg(feature = "testing")]
     317            0 :         #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     318              :         #[allow(clippy::upper_case_acronyms)]
     319              :         pub enum MockedError {
     320              :             EIO,
     321              :         }
     322              : 
     323              :         #[cfg(feature = "testing")]
     324              :         impl From<MockedError> for nix::Error {
     325            0 :             fn from(e: MockedError) -> Self {
     326            0 :                 match e {
     327            0 :                     MockedError::EIO => nix::Error::EIO,
     328            0 :                 }
     329            0 :             }
     330              :         }
     331              :     }
     332              : }
     333              : 
     334            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     335              : #[serde(tag = "type", content = "args")]
     336              : pub enum EvictionOrder {
     337              :     RelativeAccessed {
     338              :         highest_layer_count_loses_first: bool,
     339              :     },
     340              : }
     341              : 
     342              : impl Default for EvictionOrder {
     343           12 :     fn default() -> Self {
     344           12 :         Self::RelativeAccessed {
     345           12 :             highest_layer_count_loses_first: true,
     346           12 :         }
     347           12 :     }
     348              : }
     349              : 
     350            0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     351              : #[serde(transparent)]
     352              : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
     353              : 
     354              : /// Tenant-level configuration values, used for various purposes.
     355            0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
     356              : #[serde(default)]
     357              : pub struct TenantConfigToml {
     358              :     // Flush out an inmemory layer, if it's holding WAL older than this
     359              :     // This puts a backstop on how much WAL needs to be re-digested if the
     360              :     // page server crashes.
     361              :     // This parameter actually determines L0 layer file size.
     362              :     pub checkpoint_distance: u64,
     363              :     // Inmemory layer is also flushed at least once in checkpoint_timeout to
     364              :     // eventually upload WAL after activity is stopped.
     365              :     #[serde(with = "humantime_serde")]
     366              :     pub checkpoint_timeout: Duration,
     367              :     // Target file size, when creating image and delta layers.
     368              :     // This parameter determines L1 layer file size.
     369              :     pub compaction_target_size: u64,
     370              :     // How often to check if there's compaction work to be done.
     371              :     // Duration::ZERO means automatic compaction is disabled.
     372              :     #[serde(with = "humantime_serde")]
     373              :     pub compaction_period: Duration,
     374              :     /// Level0 delta layer threshold for compaction.
     375              :     pub compaction_threshold: usize,
     376              :     /// Controls the amount of L0 included in a single compaction iteration.
     377              :     /// The unit is `checkpoint_distance`, i.e., a size.
     378              :     /// We add L0s to the set of layers to compact until their cumulative
     379              :     /// size exceeds `compaction_upper_limit * checkpoint_distance`.
     380              :     pub compaction_upper_limit: usize,
     381              :     pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
     382              :     /// If true, enable shard ancestor compaction (enabled by default).
     383              :     pub compaction_shard_ancestor: bool,
     384              :     /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
     385              :     /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
     386              :     pub compaction_l0_first: bool,
     387              :     /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
     388              :     /// has an effect if `compaction_l0_first` is true. Defaults to true.
     389              :     pub compaction_l0_semaphore: bool,
     390              :     /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long,
     391              :     /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This
     392              :     /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up.
     393              :     /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold.
     394              :     pub l0_flush_delay_threshold: Option<usize>,
     395              :     /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
     396              :     /// to avoid deadlock. 0 to disable. Disabled by default.
     397              :     pub l0_flush_stall_threshold: Option<usize>,
     398              :     // Determines how much history is retained, to allow
     399              :     // branching and read replicas at an older point in time.
     400              :     // The unit is #of bytes of WAL.
     401              :     // Page versions older than this are garbage collected away.
     402              :     pub gc_horizon: u64,
     403              :     // Interval at which garbage collection is triggered.
     404              :     // Duration::ZERO means automatic GC is disabled
     405              :     #[serde(with = "humantime_serde")]
     406              :     pub gc_period: Duration,
     407              :     // Delta layer churn threshold to create L1 image layers.
     408              :     pub image_creation_threshold: usize,
     409              :     // Determines how much history is retained, to allow
     410              :     // branching and read replicas at an older point in time.
     411              :     // The unit is time.
     412              :     // Page versions older than this are garbage collected away.
     413              :     #[serde(with = "humantime_serde")]
     414              :     pub pitr_interval: Duration,
     415              :     /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
     416              :     #[serde(with = "humantime_serde")]
     417              :     pub walreceiver_connect_timeout: Duration,
     418              :     /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
     419              :     /// A stalled safekeeper will be changed to a newer one when it appears.
     420              :     #[serde(with = "humantime_serde")]
     421              :     pub lagging_wal_timeout: Duration,
     422              :     /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
     423              :     /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
     424              :     /// to avoid eager reconnects.
     425              :     pub max_lsn_wal_lag: NonZeroU64,
     426              :     pub eviction_policy: crate::models::EvictionPolicy,
     427              :     pub min_resident_size_override: Option<u64>,
     428              :     // See the corresponding metric's help string.
     429              :     #[serde(with = "humantime_serde")]
     430              :     pub evictions_low_residence_duration_metric_threshold: Duration,
     431              : 
     432              :     /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
     433              :     /// may be disabled if a Tenant will not have secondary locations: only secondary
     434              :     /// locations will use the heatmap uploaded by attached locations.
     435              :     #[serde(with = "humantime_serde")]
     436              :     pub heatmap_period: Duration,
     437              : 
     438              :     /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
     439              :     pub lazy_slru_download: bool,
     440              : 
     441              :     pub timeline_get_throttle: crate::models::ThrottleConfig,
     442              : 
     443              :     // How much WAL must be ingested before checking again whether a new image layer is required.
     444              :     // Expresed in multiples of checkpoint distance.
     445              :     pub image_layer_creation_check_threshold: u8,
     446              : 
     447              :     // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
     448              :     // Set to 0 to disable preemption.
     449              :     pub image_creation_preempt_threshold: usize,
     450              : 
     451              :     /// The length for an explicit LSN lease request.
     452              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     453              :     #[serde(with = "humantime_serde")]
     454              :     pub lsn_lease_length: Duration,
     455              : 
     456              :     /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
     457              :     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     458              :     #[serde(with = "humantime_serde")]
     459              :     pub lsn_lease_length_for_ts: Duration,
     460              : 
     461              :     /// Enable auto-offloading of timelines.
     462              :     /// (either this flag or the pageserver-global one need to be set)
     463              :     pub timeline_offloading: bool,
     464              : 
     465              :     pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
     466              : 
     467              :     /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
     468              :     /// `index_part.json`, and it cannot be reversed.
     469              :     pub rel_size_v2_enabled: bool,
     470              : 
     471              :     // gc-compaction related configs
     472              :     /// Enable automatic gc-compaction trigger on this tenant.
     473              :     pub gc_compaction_enabled: bool,
     474              :     /// Enable verification of gc-compaction results.
     475              :     pub gc_compaction_verification: bool,
     476              :     /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
     477              :     /// gc-compaction will be triggered.
     478              :     pub gc_compaction_initial_threshold_kb: u64,
     479              :     /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
     480              :     /// is above this ratio, gc-compaction will be triggered.
     481              :     pub gc_compaction_ratio_percent: u64,
     482              :     /// Tenant level performance sampling ratio override. Controls the ratio of get page requests
     483              :     /// that will get perf sampling for the tenant.
     484              :     pub sampling_ratio: Option<Ratio>,
     485              : }
     486              : 
     487              : pub mod defaults {
     488              :     pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
     489              : 
     490              :     use crate::models::ImageCompressionAlgorithm;
     491              : 
     492              :     pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
     493              :     pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
     494              : 
     495              :     pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
     496              :     pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
     497              :         "C"
     498              :     } else {
     499              :         "C.UTF-8"
     500              :     };
     501              : 
     502              :     pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
     503              :     pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
     504              : 
     505              :     pub const DEFAULT_LOG_FORMAT: &str = "plain";
     506              : 
     507              :     pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
     508              : 
     509              :     pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
     510              : 
     511              :     pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
     512              :     pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
     513              :     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
     514              :     pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
     515              : 
     516              :     pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
     517              :     pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
     518              : 
     519              :     pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
     520              : 
     521              :     /// Soft limit for the maximum size of a vectored read.
     522              :     ///
     523              :     /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
     524              :     /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
     525              :     /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
     526              :     /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
     527              :     /// That is, slightly above 128 kB.
     528              :     pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
     529              : 
     530              :     pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
     531              :         ImageCompressionAlgorithm::Zstd { level: Some(1) };
     532              : 
     533              :     pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
     534              : 
     535              :     pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
     536              : 
     537              :     pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
     538              :         utils::postgres_client::PostgresClientProtocol::Vanilla;
     539              : 
     540              :     pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
     541              :     pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
     542              : }
     543              : 
     544              : impl Default for ConfigToml {
     545         1500 :     fn default() -> Self {
     546              :         use defaults::*;
     547              : 
     548              :         Self {
     549         1500 :             listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
     550         1500 :             listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
     551         1500 :             listen_https_addr: (None),
     552         1500 :             ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
     553         1500 :             ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
     554         1500 :             ssl_cert_reload_period: Duration::from_secs(60),
     555         1500 :             ssl_ca_file: None,
     556         1500 :             availability_zone: (None),
     557         1500 :             wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
     558         1500 :                 .expect("cannot parse default wait lsn timeout")),
     559         1500 :             wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
     560         1500 :                 .expect("cannot parse default wal redo timeout")),
     561         1500 :             superuser: (DEFAULT_SUPERUSER.to_string()),
     562         1500 :             locale: DEFAULT_LOCALE.to_string(),
     563         1500 :             page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
     564         1500 :             max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
     565         1500 :             pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
     566         1500 :             http_auth_type: (AuthType::Trust),
     567         1500 :             pg_auth_type: (AuthType::Trust),
     568         1500 :             auth_validation_public_key_path: (None),
     569         1500 :             remote_storage: None,
     570         1500 :             broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
     571         1500 :                 .parse()
     572         1500 :                 .expect("failed to parse default broker endpoint")),
     573         1500 :             broker_keepalive_interval: (humantime::parse_duration(
     574         1500 :                 storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
     575         1500 :             )
     576         1500 :             .expect("cannot parse default keepalive interval")),
     577         1500 :             log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
     578         1500 : 
     579         1500 :             concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
     580         1500 :                 .expect("Invalid default constant")),
     581         1500 :             concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
     582         1500 :                 DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
     583         1500 :             )
     584         1500 :             .unwrap(),
     585         1500 :             metric_collection_interval: (humantime::parse_duration(
     586         1500 :                 DEFAULT_METRIC_COLLECTION_INTERVAL,
     587         1500 :             )
     588         1500 :             .expect("cannot parse default metric collection interval")),
     589         1500 :             synthetic_size_calculation_interval: (humantime::parse_duration(
     590         1500 :                 DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
     591         1500 :             )
     592         1500 :             .expect("cannot parse default synthetic size calculation interval")),
     593         1500 :             metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
     594         1500 : 
     595         1500 :             metric_collection_bucket: (None),
     596         1500 : 
     597         1500 :             disk_usage_based_eviction: (None),
     598         1500 : 
     599         1500 :             test_remote_failures: (0),
     600         1500 : 
     601         1500 :             ondemand_download_behavior_treat_error_as_warn: (false),
     602         1500 : 
     603         1500 :             background_task_maximum_delay: (humantime::parse_duration(
     604         1500 :                 DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
     605         1500 :             )
     606         1500 :             .unwrap()),
     607         1500 : 
     608         1500 :             control_plane_api: (None),
     609         1500 :             control_plane_api_token: (None),
     610         1500 :             control_plane_emergency_mode: (false),
     611         1500 : 
     612         1500 :             import_pgdata_upcall_api: (None),
     613         1500 :             import_pgdata_upcall_api_token: (None),
     614         1500 :             import_pgdata_aws_endpoint_url: (None),
     615         1500 : 
     616         1500 :             heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
     617         1500 :             secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
     618         1500 : 
     619         1500 :             ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
     620         1500 : 
     621         1500 :             virtual_file_io_engine: None,
     622         1500 : 
     623         1500 :             max_vectored_read_bytes: (MaxVectoredReadBytes(
     624         1500 :                 NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
     625         1500 :             )),
     626         1500 :             image_compression: (DEFAULT_IMAGE_COMPRESSION),
     627         1500 :             timeline_offloading: true,
     628         1500 :             ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
     629         1500 :             l0_flush: None,
     630         1500 :             virtual_file_io_mode: None,
     631         1500 :             tenant_config: TenantConfigToml::default(),
     632         1500 :             no_sync: None,
     633         1500 :             wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
     634         1500 :             page_service_pipelining: if !cfg!(test) {
     635         1500 :                 PageServicePipeliningConfig::Serial
     636              :             } else {
     637              :                 // Do not turn this into the default until scattered reads have been
     638              :                 // validated and rolled-out fully.
     639            0 :                 PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
     640            0 :                     max_batch_size: NonZeroUsize::new(32).unwrap(),
     641            0 :                     execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
     642            0 :                     batching: PageServiceProtocolPipelinedBatchingStrategy::ScatteredLsn,
     643            0 :                 })
     644              :             },
     645         1500 :             get_vectored_concurrent_io: if !cfg!(test) {
     646         1500 :                 GetVectoredConcurrentIo::Sequential
     647              :             } else {
     648            0 :                 GetVectoredConcurrentIo::SidecarTask
     649              :             },
     650         1500 :             enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
     651         1500 :                 Some(true)
     652              :             } else {
     653            0 :                 None
     654              :             },
     655         1500 :             validate_wal_contiguity: None,
     656         1500 :             load_previous_heatmap: None,
     657         1500 :             generate_unarchival_heatmap: None,
     658         1500 :             tracing: None,
     659         1500 :             enable_tls_page_service_api: false,
     660         1500 :         }
     661         1500 :     }
     662              : }
     663              : 
     664              : pub mod tenant_conf_defaults {
     665              : 
     666              :     // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
     667              :     // would be more appropriate. But a low value forces the code to be exercised more,
     668              :     // which is good for now to trigger bugs.
     669              :     // This parameter actually determines L0 layer file size.
     670              :     pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
     671              :     pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
     672              : 
     673              :     // FIXME the below configs are only used by legacy algorithm. The new algorithm
     674              :     // has different parameters.
     675              : 
     676              :     // Target file size, when creating image and delta layers.
     677              :     // This parameter determines L1 layer file size.
     678              :     pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
     679              : 
     680              :     pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
     681              :     pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
     682              :     pub const DEFAULT_COMPACTION_SHARD_ANCESTOR: bool = true;
     683              : 
     684              :     // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
     685              :     // 3/4*8=6 on most of our pageservers. Compacting 10 layers requires a maximum of
     686              :     // DEFAULT_CHECKPOINT_DISTANCE*10 memory, that's 2560MB. So with this config, we can get a maximum peak
     687              :     // compaction usage of 15360MB.
     688              :     pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 10;
     689              :     // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
     690              :     // read amp.
     691              :     pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
     692              :     pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
     693              : 
     694              :     pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
     695              :         crate::models::CompactionAlgorithm::Legacy;
     696              : 
     697              :     pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
     698              : 
     699              :     // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
     700              :     // If there's a need to decrease this value, first make sure that GC
     701              :     // doesn't hold a layer map write lock for non-trivial operations.
     702              :     // Relevant: https://github.com/neondatabase/neon/issues/3394
     703              :     pub const DEFAULT_GC_PERIOD: &str = "1 hr";
     704              :     pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
     705              :     // Currently, any value other than 0 will trigger image layer creation preemption immediately with L0 backpressure
     706              :     // without looking at the exact number of L0 layers.
     707              :     // It was expected to have the following behavior:
     708              :     // > If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
     709              :     // > layer creation will end immediately. Set to 0 to disable.
     710              :     pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3;
     711              :     pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
     712              :     pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
     713              :     pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
     714              :     // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
     715              :     // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
     716              :     // throughputs up to 1GiB/s per timeline.
     717              :     pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
     718              :     pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
     719              :     // By default ingest enough WAL for two new L0 layers before checking if new image
     720              :     // image layers should be created.
     721              :     pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
     722              :     pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
     723              :     pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
     724              :     pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
     725              :     pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
     726              : }
     727              : 
     728              : impl Default for TenantConfigToml {
     729         1500 :     fn default() -> Self {
     730              :         use tenant_conf_defaults::*;
     731         1500 :         Self {
     732         1500 :             checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
     733         1500 :             checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
     734         1500 :                 .expect("cannot parse default checkpoint timeout"),
     735         1500 :             compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
     736         1500 :             compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
     737         1500 :                 .expect("cannot parse default compaction period"),
     738         1500 :             compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
     739         1500 :             compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
     740         1500 :             compaction_algorithm: crate::models::CompactionAlgorithmSettings {
     741         1500 :                 kind: DEFAULT_COMPACTION_ALGORITHM,
     742         1500 :             },
     743         1500 :             compaction_shard_ancestor: DEFAULT_COMPACTION_SHARD_ANCESTOR,
     744         1500 :             compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
     745         1500 :             compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
     746         1500 :             l0_flush_delay_threshold: None,
     747         1500 :             l0_flush_stall_threshold: None,
     748         1500 :             gc_horizon: DEFAULT_GC_HORIZON,
     749         1500 :             gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
     750         1500 :                 .expect("cannot parse default gc period"),
     751         1500 :             image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
     752         1500 :             pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
     753         1500 :                 .expect("cannot parse default PITR interval"),
     754         1500 :             walreceiver_connect_timeout: humantime::parse_duration(
     755         1500 :                 DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
     756         1500 :             )
     757         1500 :             .expect("cannot parse default walreceiver connect timeout"),
     758         1500 :             lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
     759         1500 :                 .expect("cannot parse default walreceiver lagging wal timeout"),
     760         1500 :             max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
     761         1500 :                 .expect("cannot parse default max walreceiver Lsn wal lag"),
     762         1500 :             eviction_policy: crate::models::EvictionPolicy::NoEviction,
     763         1500 :             min_resident_size_override: None,
     764         1500 :             evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
     765         1500 :                 DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
     766         1500 :             )
     767         1500 :             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
     768         1500 :             heatmap_period: Duration::ZERO,
     769         1500 :             lazy_slru_download: false,
     770         1500 :             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
     771         1500 :             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
     772         1500 :             image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
     773         1500 :             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
     774         1500 :             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
     775         1500 :             timeline_offloading: true,
     776         1500 :             wal_receiver_protocol_override: None,
     777         1500 :             rel_size_v2_enabled: false,
     778         1500 :             gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
     779         1500 :             gc_compaction_verification: DEFAULT_GC_COMPACTION_VERIFICATION,
     780         1500 :             gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
     781         1500 :             gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
     782         1500 :             sampling_ratio: None,
     783         1500 :         }
     784         1500 :     }
     785              : }
        

Generated by: LCOV version 2.1-beta