Line data Source code
1 : use camino::Utf8PathBuf;
2 :
3 : #[cfg(test)]
4 : mod tests;
5 :
6 : use const_format::formatcp;
7 : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
8 : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
9 : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
10 : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
11 :
12 : use std::collections::HashMap;
13 : use std::num::{NonZeroU64, NonZeroUsize};
14 : use std::str::FromStr;
15 : use std::time::Duration;
16 :
17 : use postgres_backend::AuthType;
18 : use remote_storage::RemoteStorageConfig;
19 : use serde_with::serde_as;
20 : use utils::logging::LogFormat;
21 : use utils::postgres_client::PostgresClientProtocol;
22 :
23 : use crate::models::{ImageCompressionAlgorithm, LsnLease};
24 :
25 : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
26 : // as a separate structure. This information is not neeed by the pageserver
27 : // itself, it is only used for registering the pageserver with the control
28 : // plane and/or storage controller.
29 : //
30 9 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
31 : pub struct NodeMetadata {
32 : #[serde(rename = "host")]
33 : pub postgres_host: String,
34 : #[serde(rename = "port")]
35 : pub postgres_port: u16,
36 : pub http_host: String,
37 : pub http_port: u16,
38 : pub https_port: Option<u16>,
39 :
40 : // Deployment tools may write fields to the metadata file beyond what we
41 : // use in this type: this type intentionally only names fields that require.
42 : #[serde(flatten)]
43 : pub other: HashMap<String, serde_json::Value>,
44 : }
45 :
46 : /// `pageserver.toml`
47 : ///
48 : /// We use serde derive with `#[serde(default)]` to generate a deserializer
49 : /// that fills in the default values for each config field.
50 : ///
51 : /// If there cannot be a static default value because we need to make runtime
52 : /// checks to determine the default, make it an `Option` (which defaults to None).
53 : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
54 : #[serde_as]
55 20 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
56 : #[serde(default, deny_unknown_fields)]
57 : pub struct ConfigToml {
58 : // types mapped 1:1 into the runtime PageServerConfig type
59 : pub listen_pg_addr: String,
60 : pub listen_http_addr: String,
61 : pub listen_https_addr: Option<String>,
62 : pub ssl_key_file: Utf8PathBuf,
63 : pub ssl_cert_file: Utf8PathBuf,
64 : pub availability_zone: Option<String>,
65 : #[serde(with = "humantime_serde")]
66 : pub wait_lsn_timeout: Duration,
67 : #[serde(with = "humantime_serde")]
68 : pub wal_redo_timeout: Duration,
69 : pub superuser: String,
70 : pub locale: String,
71 : pub page_cache_size: usize,
72 : pub max_file_descriptors: usize,
73 : pub pg_distrib_dir: Option<Utf8PathBuf>,
74 : #[serde_as(as = "serde_with::DisplayFromStr")]
75 : pub http_auth_type: AuthType,
76 : #[serde_as(as = "serde_with::DisplayFromStr")]
77 : pub pg_auth_type: AuthType,
78 : pub auth_validation_public_key_path: Option<Utf8PathBuf>,
79 : pub remote_storage: Option<RemoteStorageConfig>,
80 : pub tenant_config: TenantConfigToml,
81 : #[serde_as(as = "serde_with::DisplayFromStr")]
82 : pub broker_endpoint: storage_broker::Uri,
83 : #[serde(with = "humantime_serde")]
84 : pub broker_keepalive_interval: Duration,
85 : #[serde_as(as = "serde_with::DisplayFromStr")]
86 : pub log_format: LogFormat,
87 : pub concurrent_tenant_warmup: NonZeroUsize,
88 : pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
89 : #[serde(with = "humantime_serde")]
90 : pub metric_collection_interval: Duration,
91 : pub metric_collection_endpoint: Option<reqwest::Url>,
92 : pub metric_collection_bucket: Option<RemoteStorageConfig>,
93 : #[serde(with = "humantime_serde")]
94 : pub synthetic_size_calculation_interval: Duration,
95 : pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
96 : pub test_remote_failures: u64,
97 : pub ondemand_download_behavior_treat_error_as_warn: bool,
98 : #[serde(with = "humantime_serde")]
99 : pub background_task_maximum_delay: Duration,
100 : pub control_plane_api: Option<reqwest::Url>,
101 : pub control_plane_api_token: Option<String>,
102 : pub control_plane_emergency_mode: bool,
103 : /// Unstable feature: subject to change or removal without notice.
104 : /// See <https://github.com/neondatabase/neon/pull/9218>.
105 : pub import_pgdata_upcall_api: Option<reqwest::Url>,
106 : /// Unstable feature: subject to change or removal without notice.
107 : /// See <https://github.com/neondatabase/neon/pull/9218>.
108 : pub import_pgdata_upcall_api_token: Option<String>,
109 : /// Unstable feature: subject to change or removal without notice.
110 : /// See <https://github.com/neondatabase/neon/pull/9218>.
111 : pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
112 : pub heatmap_upload_concurrency: usize,
113 : pub secondary_download_concurrency: usize,
114 : pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
115 : pub ingest_batch_size: u64,
116 : pub max_vectored_read_bytes: MaxVectoredReadBytes,
117 : pub image_compression: ImageCompressionAlgorithm,
118 : pub timeline_offloading: bool,
119 : pub ephemeral_bytes_per_memory_kb: usize,
120 : pub l0_flush: Option<crate::models::L0FlushConfig>,
121 : pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
122 : #[serde(skip_serializing_if = "Option::is_none")]
123 : pub no_sync: Option<bool>,
124 : pub wal_receiver_protocol: PostgresClientProtocol,
125 : pub page_service_pipelining: PageServicePipeliningConfig,
126 : pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
127 : pub enable_read_path_debugging: Option<bool>,
128 : #[serde(skip_serializing_if = "Option::is_none")]
129 : pub validate_wal_contiguity: Option<bool>,
130 : #[serde(skip_serializing_if = "Option::is_none")]
131 : pub load_previous_heatmap: Option<bool>,
132 : #[serde(skip_serializing_if = "Option::is_none")]
133 : pub generate_unarchival_heatmap: Option<bool>,
134 : }
135 :
136 4 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
137 : #[serde(deny_unknown_fields)]
138 : pub struct DiskUsageEvictionTaskConfig {
139 : pub max_usage_pct: utils::serde_percent::Percent,
140 : pub min_avail_bytes: u64,
141 : #[serde(with = "humantime_serde")]
142 : pub period: Duration,
143 : #[cfg(feature = "testing")]
144 : pub mock_statvfs: Option<statvfs::mock::Behavior>,
145 : /// Select sorting for evicted layers
146 : #[serde(default)]
147 : pub eviction_order: EvictionOrder,
148 : }
149 :
150 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
151 : #[serde(tag = "mode", rename_all = "kebab-case")]
152 : #[serde(deny_unknown_fields)]
153 : pub enum PageServicePipeliningConfig {
154 : Serial,
155 : Pipelined(PageServicePipeliningConfigPipelined),
156 : }
157 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
158 : #[serde(deny_unknown_fields)]
159 : pub struct PageServicePipeliningConfigPipelined {
160 : /// Causes runtime errors if larger than max get_vectored batch size.
161 : pub max_batch_size: NonZeroUsize,
162 : pub execution: PageServiceProtocolPipelinedExecutionStrategy,
163 : }
164 :
165 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
166 : #[serde(rename_all = "kebab-case")]
167 : pub enum PageServiceProtocolPipelinedExecutionStrategy {
168 : ConcurrentFutures,
169 : Tasks,
170 : }
171 :
172 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
173 : #[serde(tag = "mode", rename_all = "kebab-case")]
174 : #[serde(deny_unknown_fields)]
175 : pub enum GetVectoredConcurrentIo {
176 : /// The read path is fully sequential: layers are visited
177 : /// one after the other and IOs are issued and waited upon
178 : /// from the same task that traverses the layers.
179 : Sequential,
180 : /// The read path still traverses layers sequentially, and
181 : /// index blocks will be read into the PS PageCache from
182 : /// that task, with waiting.
183 : /// But data IOs are dispatched and waited upon from a sidecar
184 : /// task so that the traversing task can continue to traverse
185 : /// layers while the IOs are in flight.
186 : /// If the PS PageCache miss rate is low, this improves
187 : /// throughput dramatically.
188 : SidecarTask,
189 : }
190 :
191 : pub mod statvfs {
192 : pub mod mock {
193 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
194 : #[serde(tag = "type")]
195 : pub enum Behavior {
196 : Success {
197 : blocksize: u64,
198 : total_blocks: u64,
199 : name_filter: Option<utils::serde_regex::Regex>,
200 : },
201 : #[cfg(feature = "testing")]
202 : Failure { mocked_error: MockedError },
203 : }
204 :
205 : #[cfg(feature = "testing")]
206 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
207 : #[allow(clippy::upper_case_acronyms)]
208 : pub enum MockedError {
209 : EIO,
210 : }
211 :
212 : #[cfg(feature = "testing")]
213 : impl From<MockedError> for nix::Error {
214 0 : fn from(e: MockedError) -> Self {
215 0 : match e {
216 0 : MockedError::EIO => nix::Error::EIO,
217 0 : }
218 0 : }
219 : }
220 : }
221 : }
222 :
223 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
224 : #[serde(tag = "type", content = "args")]
225 : pub enum EvictionOrder {
226 : RelativeAccessed {
227 : highest_layer_count_loses_first: bool,
228 : },
229 : }
230 :
231 : impl Default for EvictionOrder {
232 4 : fn default() -> Self {
233 4 : Self::RelativeAccessed {
234 4 : highest_layer_count_loses_first: true,
235 4 : }
236 4 : }
237 : }
238 :
239 0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
240 : #[serde(transparent)]
241 : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
242 :
243 : /// A tenant's calcuated configuration, which is the result of merging a
244 : /// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
245 : ///
246 : /// For storing and transmitting individual tenant's configuration, see
247 : /// TenantConfOpt.
248 4 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
249 : #[serde(deny_unknown_fields, default)]
250 : pub struct TenantConfigToml {
251 : // Flush out an inmemory layer, if it's holding WAL older than this
252 : // This puts a backstop on how much WAL needs to be re-digested if the
253 : // page server crashes.
254 : // This parameter actually determines L0 layer file size.
255 : pub checkpoint_distance: u64,
256 : // Inmemory layer is also flushed at least once in checkpoint_timeout to
257 : // eventually upload WAL after activity is stopped.
258 : #[serde(with = "humantime_serde")]
259 : pub checkpoint_timeout: Duration,
260 : // Target file size, when creating image and delta layers.
261 : // This parameter determines L1 layer file size.
262 : pub compaction_target_size: u64,
263 : // How often to check if there's compaction work to be done.
264 : // Duration::ZERO means automatic compaction is disabled.
265 : #[serde(with = "humantime_serde")]
266 : pub compaction_period: Duration,
267 : /// Level0 delta layer threshold for compaction.
268 : pub compaction_threshold: usize,
269 : /// Controls the amount of L0 included in a single compaction iteration.
270 : /// The unit is `checkpoint_distance`, i.e., a size.
271 : /// We add L0s to the set of layers to compact until their cumulative
272 : /// size exceeds `compaction_upper_limit * checkpoint_distance`.
273 : pub compaction_upper_limit: usize,
274 : pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
275 : /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0
276 : /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true.
277 : pub compaction_l0_first: bool,
278 : /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only
279 : /// has an effect if `compaction_l0_first` is true. Defaults to true.
280 : pub compaction_l0_semaphore: bool,
281 : /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
282 : /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
283 : /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
284 : /// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
285 : pub l0_flush_delay_threshold: Option<usize>,
286 : /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
287 : /// to avoid deadlock. 0 to disable. Disabled by default.
288 : pub l0_flush_stall_threshold: Option<usize>,
289 : /// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
290 : /// layer. This is a temporary backpressure mechanism which should be removed once
291 : /// l0_flush_{delay,stall}_threshold is fully enabled.
292 : pub l0_flush_wait_upload: bool,
293 : // Determines how much history is retained, to allow
294 : // branching and read replicas at an older point in time.
295 : // The unit is #of bytes of WAL.
296 : // Page versions older than this are garbage collected away.
297 : pub gc_horizon: u64,
298 : // Interval at which garbage collection is triggered.
299 : // Duration::ZERO means automatic GC is disabled
300 : #[serde(with = "humantime_serde")]
301 : pub gc_period: Duration,
302 : // Delta layer churn threshold to create L1 image layers.
303 : pub image_creation_threshold: usize,
304 : // Determines how much history is retained, to allow
305 : // branching and read replicas at an older point in time.
306 : // The unit is time.
307 : // Page versions older than this are garbage collected away.
308 : #[serde(with = "humantime_serde")]
309 : pub pitr_interval: Duration,
310 : /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
311 : #[serde(with = "humantime_serde")]
312 : pub walreceiver_connect_timeout: Duration,
313 : /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
314 : /// A stalled safekeeper will be changed to a newer one when it appears.
315 : #[serde(with = "humantime_serde")]
316 : pub lagging_wal_timeout: Duration,
317 : /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
318 : /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
319 : /// to avoid eager reconnects.
320 : pub max_lsn_wal_lag: NonZeroU64,
321 : pub eviction_policy: crate::models::EvictionPolicy,
322 : pub min_resident_size_override: Option<u64>,
323 : // See the corresponding metric's help string.
324 : #[serde(with = "humantime_serde")]
325 : pub evictions_low_residence_duration_metric_threshold: Duration,
326 :
327 : /// If non-zero, the period between uploads of a heatmap from attached tenants. This
328 : /// may be disabled if a Tenant will not have secondary locations: only secondary
329 : /// locations will use the heatmap uploaded by attached locations.
330 : #[serde(with = "humantime_serde")]
331 : pub heatmap_period: Duration,
332 :
333 : /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
334 : pub lazy_slru_download: bool,
335 :
336 : pub timeline_get_throttle: crate::models::ThrottleConfig,
337 :
338 : // How much WAL must be ingested before checking again whether a new image layer is required.
339 : // Expresed in multiples of checkpoint distance.
340 : pub image_layer_creation_check_threshold: u8,
341 :
342 : // How many multiples of L0 `compaction_threshold` will preempt image layer creation and do L0 compaction.
343 : // Set to 0 to disable preemption.
344 : pub image_creation_preempt_threshold: usize,
345 :
346 : /// The length for an explicit LSN lease request.
347 : /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
348 : #[serde(with = "humantime_serde")]
349 : pub lsn_lease_length: Duration,
350 :
351 : /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
352 : /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
353 : #[serde(with = "humantime_serde")]
354 : pub lsn_lease_length_for_ts: Duration,
355 :
356 : /// Enable auto-offloading of timelines.
357 : /// (either this flag or the pageserver-global one need to be set)
358 : pub timeline_offloading: bool,
359 :
360 : pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
361 :
362 : /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
363 : /// `index_part.json`, and it cannot be reversed.
364 : pub rel_size_v2_enabled: bool,
365 :
366 : // gc-compaction related configs
367 : /// Enable automatic gc-compaction trigger on this tenant.
368 : pub gc_compaction_enabled: bool,
369 : /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
370 : /// gc-compaction will be triggered.
371 : pub gc_compaction_initial_threshold_kb: u64,
372 : /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
373 : /// is above this ratio, gc-compaction will be triggered.
374 : pub gc_compaction_ratio_percent: u64,
375 : }
376 :
377 : pub mod defaults {
378 : pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
379 :
380 : use crate::models::ImageCompressionAlgorithm;
381 :
382 : pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
383 : pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
384 :
385 : pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
386 : pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
387 : "C"
388 : } else {
389 : "C.UTF-8"
390 : };
391 :
392 : pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
393 : pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
394 :
395 : pub const DEFAULT_LOG_FORMAT: &str = "plain";
396 :
397 : pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
398 :
399 : pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
400 :
401 : pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
402 : pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
403 : pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
404 : pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
405 :
406 : pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
407 : pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
408 :
409 : pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
410 :
411 : /// Soft limit for the maximum size of a vectored read.
412 : ///
413 : /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
414 : /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
415 : /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
416 : /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
417 : /// That is, slightly above 128 kB.
418 : pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
419 :
420 : pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
421 : ImageCompressionAlgorithm::Zstd { level: Some(1) };
422 :
423 : pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
424 :
425 : pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
426 :
427 : pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
428 : utils::postgres_client::PostgresClientProtocol::Vanilla;
429 :
430 : pub const DEFAULT_SSL_KEY_FILE: &str = "server.key";
431 : pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt";
432 : }
433 :
434 : impl Default for ConfigToml {
435 490 : fn default() -> Self {
436 : use defaults::*;
437 :
438 : Self {
439 490 : listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
440 490 : listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
441 490 : listen_https_addr: (None),
442 490 : ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE),
443 490 : ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE),
444 490 : availability_zone: (None),
445 490 : wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
446 490 : .expect("cannot parse default wait lsn timeout")),
447 490 : wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
448 490 : .expect("cannot parse default wal redo timeout")),
449 490 : superuser: (DEFAULT_SUPERUSER.to_string()),
450 490 : locale: DEFAULT_LOCALE.to_string(),
451 490 : page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
452 490 : max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
453 490 : pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
454 490 : http_auth_type: (AuthType::Trust),
455 490 : pg_auth_type: (AuthType::Trust),
456 490 : auth_validation_public_key_path: (None),
457 490 : remote_storage: None,
458 490 : broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
459 490 : .parse()
460 490 : .expect("failed to parse default broker endpoint")),
461 490 : broker_keepalive_interval: (humantime::parse_duration(
462 490 : storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
463 490 : )
464 490 : .expect("cannot parse default keepalive interval")),
465 490 : log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
466 490 :
467 490 : concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
468 490 : .expect("Invalid default constant")),
469 490 : concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
470 490 : DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
471 490 : )
472 490 : .unwrap(),
473 490 : metric_collection_interval: (humantime::parse_duration(
474 490 : DEFAULT_METRIC_COLLECTION_INTERVAL,
475 490 : )
476 490 : .expect("cannot parse default metric collection interval")),
477 490 : synthetic_size_calculation_interval: (humantime::parse_duration(
478 490 : DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
479 490 : )
480 490 : .expect("cannot parse default synthetic size calculation interval")),
481 490 : metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
482 490 :
483 490 : metric_collection_bucket: (None),
484 490 :
485 490 : disk_usage_based_eviction: (None),
486 490 :
487 490 : test_remote_failures: (0),
488 490 :
489 490 : ondemand_download_behavior_treat_error_as_warn: (false),
490 490 :
491 490 : background_task_maximum_delay: (humantime::parse_duration(
492 490 : DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
493 490 : )
494 490 : .unwrap()),
495 490 :
496 490 : control_plane_api: (None),
497 490 : control_plane_api_token: (None),
498 490 : control_plane_emergency_mode: (false),
499 490 :
500 490 : import_pgdata_upcall_api: (None),
501 490 : import_pgdata_upcall_api_token: (None),
502 490 : import_pgdata_aws_endpoint_url: (None),
503 490 :
504 490 : heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
505 490 : secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
506 490 :
507 490 : ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
508 490 :
509 490 : virtual_file_io_engine: None,
510 490 :
511 490 : max_vectored_read_bytes: (MaxVectoredReadBytes(
512 490 : NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
513 490 : )),
514 490 : image_compression: (DEFAULT_IMAGE_COMPRESSION),
515 490 : timeline_offloading: true,
516 490 : ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
517 490 : l0_flush: None,
518 490 : virtual_file_io_mode: None,
519 490 : tenant_config: TenantConfigToml::default(),
520 490 : no_sync: None,
521 490 : wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
522 490 : page_service_pipelining: if !cfg!(test) {
523 490 : PageServicePipeliningConfig::Serial
524 : } else {
525 0 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
526 0 : max_batch_size: NonZeroUsize::new(32).unwrap(),
527 0 : execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
528 0 : })
529 : },
530 490 : get_vectored_concurrent_io: if !cfg!(test) {
531 490 : GetVectoredConcurrentIo::Sequential
532 : } else {
533 0 : GetVectoredConcurrentIo::SidecarTask
534 : },
535 490 : enable_read_path_debugging: if cfg!(test) || cfg!(feature = "testing") {
536 490 : Some(true)
537 : } else {
538 0 : None
539 : },
540 490 : validate_wal_contiguity: None,
541 490 : load_previous_heatmap: None,
542 490 : generate_unarchival_heatmap: None,
543 490 : }
544 490 : }
545 : }
546 :
547 : pub mod tenant_conf_defaults {
548 :
549 : // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
550 : // would be more appropriate. But a low value forces the code to be exercised more,
551 : // which is good for now to trigger bugs.
552 : // This parameter actually determines L0 layer file size.
553 : pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
554 : pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
555 :
556 : // FIXME the below configs are only used by legacy algorithm. The new algorithm
557 : // has different parameters.
558 :
559 : // Target file size, when creating image and delta layers.
560 : // This parameter determines L1 layer file size.
561 : pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
562 :
563 : pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
564 : pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
565 :
566 : // This value needs to be tuned to avoid OOM. We have 3/4*CPUs threads for L0 compaction, that's
567 : // 3/4*16=9 on most of our pageservers. Compacting 20 layers requires about 1 GB memory (could
568 : // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So
569 : // with this config, we can get a maximum peak compaction usage of 9 GB.
570 : pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20;
571 : // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid
572 : // read amp.
573 : pub const DEFAULT_COMPACTION_L0_FIRST: bool = true;
574 : pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true;
575 :
576 : pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
577 : crate::models::CompactionAlgorithm::Legacy;
578 :
579 : pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
580 :
581 : pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
582 :
583 : // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
584 : // If there's a need to decrease this value, first make sure that GC
585 : // doesn't hold a layer map write lock for non-trivial operations.
586 : // Relevant: https://github.com/neondatabase/neon/issues/3394
587 : pub const DEFAULT_GC_PERIOD: &str = "1 hr";
588 : pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
589 : // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image
590 : // layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we
591 : // want to enable this feature.
592 : pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0;
593 : pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
594 : pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
595 : pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
596 : // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
597 : // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
598 : // throughputs up to 1GiB/s per timeline.
599 : pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
600 : pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
601 : // By default ingest enough WAL for two new L0 layers before checking if new image
602 : // image layers should be created.
603 : pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
604 : pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
605 : pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
606 : pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
607 : }
608 :
609 : impl Default for TenantConfigToml {
610 928 : fn default() -> Self {
611 : use tenant_conf_defaults::*;
612 928 : Self {
613 928 : checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
614 928 : checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
615 928 : .expect("cannot parse default checkpoint timeout"),
616 928 : compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
617 928 : compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
618 928 : .expect("cannot parse default compaction period"),
619 928 : compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
620 928 : compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
621 928 : compaction_algorithm: crate::models::CompactionAlgorithmSettings {
622 928 : kind: DEFAULT_COMPACTION_ALGORITHM,
623 928 : },
624 928 : compaction_l0_first: DEFAULT_COMPACTION_L0_FIRST,
625 928 : compaction_l0_semaphore: DEFAULT_COMPACTION_L0_SEMAPHORE,
626 928 : l0_flush_delay_threshold: None,
627 928 : l0_flush_stall_threshold: None,
628 928 : l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
629 928 : gc_horizon: DEFAULT_GC_HORIZON,
630 928 : gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
631 928 : .expect("cannot parse default gc period"),
632 928 : image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
633 928 : pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
634 928 : .expect("cannot parse default PITR interval"),
635 928 : walreceiver_connect_timeout: humantime::parse_duration(
636 928 : DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
637 928 : )
638 928 : .expect("cannot parse default walreceiver connect timeout"),
639 928 : lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
640 928 : .expect("cannot parse default walreceiver lagging wal timeout"),
641 928 : max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
642 928 : .expect("cannot parse default max walreceiver Lsn wal lag"),
643 928 : eviction_policy: crate::models::EvictionPolicy::NoEviction,
644 928 : min_resident_size_override: None,
645 928 : evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
646 928 : DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
647 928 : )
648 928 : .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
649 928 : heatmap_period: Duration::ZERO,
650 928 : lazy_slru_download: false,
651 928 : timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
652 928 : image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
653 928 : image_creation_preempt_threshold: DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD,
654 928 : lsn_lease_length: LsnLease::DEFAULT_LENGTH,
655 928 : lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
656 928 : timeline_offloading: true,
657 928 : wal_receiver_protocol_override: None,
658 928 : rel_size_v2_enabled: false,
659 928 : gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
660 928 : gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
661 928 : gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
662 928 : }
663 928 : }
664 : }
|