Line data Source code
1 : use camino::Utf8PathBuf;
2 :
3 : #[cfg(test)]
4 : mod tests;
5 :
6 : use const_format::formatcp;
7 : pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
8 : pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
9 : pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
10 : pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
11 :
12 : use postgres_backend::AuthType;
13 : use remote_storage::RemoteStorageConfig;
14 : use serde_with::serde_as;
15 : use std::{
16 : collections::HashMap,
17 : num::{NonZeroU64, NonZeroUsize},
18 : str::FromStr,
19 : time::Duration,
20 : };
21 : use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol};
22 :
23 : use crate::models::ImageCompressionAlgorithm;
24 : use crate::models::LsnLease;
25 :
26 : // Certain metadata (e.g. externally-addressable name, AZ) is delivered
27 : // as a separate structure. This information is not neeed by the pageserver
28 : // itself, it is only used for registering the pageserver with the control
29 : // plane and/or storage controller.
30 : //
31 4 : #[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
32 : pub struct NodeMetadata {
33 : #[serde(rename = "host")]
34 : pub postgres_host: String,
35 : #[serde(rename = "port")]
36 : pub postgres_port: u16,
37 : pub http_host: String,
38 : pub http_port: u16,
39 :
40 : // Deployment tools may write fields to the metadata file beyond what we
41 : // use in this type: this type intentionally only names fields that require.
42 : #[serde(flatten)]
43 : pub other: HashMap<String, serde_json::Value>,
44 : }
45 :
46 : /// `pageserver.toml`
47 : ///
48 : /// We use serde derive with `#[serde(default)]` to generate a deserializer
49 : /// that fills in the default values for each config field.
50 : ///
51 : /// If there cannot be a static default value because we need to make runtime
52 : /// checks to determine the default, make it an `Option` (which defaults to None).
53 : /// The runtime check should be done in the consuming crate, i.e., `pageserver`.
54 : #[serde_as]
55 20 : #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
56 : #[serde(default, deny_unknown_fields)]
57 : pub struct ConfigToml {
58 : // types mapped 1:1 into the runtime PageServerConfig type
59 : pub listen_pg_addr: String,
60 : pub listen_http_addr: String,
61 : pub availability_zone: Option<String>,
62 : #[serde(with = "humantime_serde")]
63 : pub wait_lsn_timeout: Duration,
64 : #[serde(with = "humantime_serde")]
65 : pub wal_redo_timeout: Duration,
66 : pub superuser: String,
67 : pub locale: String,
68 : pub page_cache_size: usize,
69 : pub max_file_descriptors: usize,
70 : pub pg_distrib_dir: Option<Utf8PathBuf>,
71 : #[serde_as(as = "serde_with::DisplayFromStr")]
72 : pub http_auth_type: AuthType,
73 : #[serde_as(as = "serde_with::DisplayFromStr")]
74 : pub pg_auth_type: AuthType,
75 : pub auth_validation_public_key_path: Option<Utf8PathBuf>,
76 : pub remote_storage: Option<RemoteStorageConfig>,
77 : pub tenant_config: TenantConfigToml,
78 : #[serde_as(as = "serde_with::DisplayFromStr")]
79 : pub broker_endpoint: storage_broker::Uri,
80 : #[serde(with = "humantime_serde")]
81 : pub broker_keepalive_interval: Duration,
82 : #[serde_as(as = "serde_with::DisplayFromStr")]
83 : pub log_format: LogFormat,
84 : pub concurrent_tenant_warmup: NonZeroUsize,
85 : pub concurrent_tenant_size_logical_size_queries: NonZeroUsize,
86 : #[serde(with = "humantime_serde")]
87 : pub metric_collection_interval: Duration,
88 : pub metric_collection_endpoint: Option<reqwest::Url>,
89 : pub metric_collection_bucket: Option<RemoteStorageConfig>,
90 : #[serde(with = "humantime_serde")]
91 : pub synthetic_size_calculation_interval: Duration,
92 : pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
93 : pub test_remote_failures: u64,
94 : pub ondemand_download_behavior_treat_error_as_warn: bool,
95 : #[serde(with = "humantime_serde")]
96 : pub background_task_maximum_delay: Duration,
97 : pub control_plane_api: Option<reqwest::Url>,
98 : pub control_plane_api_token: Option<String>,
99 : pub control_plane_emergency_mode: bool,
100 : /// Unstable feature: subject to change or removal without notice.
101 : /// See <https://github.com/neondatabase/neon/pull/9218>.
102 : pub import_pgdata_upcall_api: Option<reqwest::Url>,
103 : /// Unstable feature: subject to change or removal without notice.
104 : /// See <https://github.com/neondatabase/neon/pull/9218>.
105 : pub import_pgdata_upcall_api_token: Option<String>,
106 : /// Unstable feature: subject to change or removal without notice.
107 : /// See <https://github.com/neondatabase/neon/pull/9218>.
108 : pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
109 : pub heatmap_upload_concurrency: usize,
110 : pub secondary_download_concurrency: usize,
111 : pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
112 : pub ingest_batch_size: u64,
113 : pub max_vectored_read_bytes: MaxVectoredReadBytes,
114 : pub image_compression: ImageCompressionAlgorithm,
115 : pub timeline_offloading: bool,
116 : pub ephemeral_bytes_per_memory_kb: usize,
117 : pub l0_flush: Option<crate::models::L0FlushConfig>,
118 : pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
119 : #[serde(skip_serializing_if = "Option::is_none")]
120 : pub no_sync: Option<bool>,
121 : pub wal_receiver_protocol: PostgresClientProtocol,
122 : pub page_service_pipelining: PageServicePipeliningConfig,
123 : pub get_vectored_concurrent_io: GetVectoredConcurrentIo,
124 : }
125 :
126 4 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
127 : #[serde(deny_unknown_fields)]
128 : pub struct DiskUsageEvictionTaskConfig {
129 : pub max_usage_pct: utils::serde_percent::Percent,
130 : pub min_avail_bytes: u64,
131 : #[serde(with = "humantime_serde")]
132 : pub period: Duration,
133 : #[cfg(feature = "testing")]
134 : pub mock_statvfs: Option<statvfs::mock::Behavior>,
135 : /// Select sorting for evicted layers
136 : #[serde(default)]
137 : pub eviction_order: EvictionOrder,
138 : }
139 :
140 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
141 : #[serde(tag = "mode", rename_all = "kebab-case")]
142 : #[serde(deny_unknown_fields)]
143 : pub enum PageServicePipeliningConfig {
144 : Serial,
145 : Pipelined(PageServicePipeliningConfigPipelined),
146 : }
147 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
148 : #[serde(deny_unknown_fields)]
149 : pub struct PageServicePipeliningConfigPipelined {
150 : /// Causes runtime errors if larger than max get_vectored batch size.
151 : pub max_batch_size: NonZeroUsize,
152 : pub execution: PageServiceProtocolPipelinedExecutionStrategy,
153 : }
154 :
155 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
156 : #[serde(rename_all = "kebab-case")]
157 : pub enum PageServiceProtocolPipelinedExecutionStrategy {
158 : ConcurrentFutures,
159 : Tasks,
160 : }
161 :
162 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
163 : #[serde(tag = "mode", rename_all = "kebab-case")]
164 : #[serde(deny_unknown_fields)]
165 : pub enum GetVectoredConcurrentIo {
166 : /// The read path is fully sequential: layers are visited
167 : /// one after the other and IOs are issued and waited upon
168 : /// from the same task that traverses the layers.
169 : Sequential,
170 : /// The read path still traverses layers sequentially, and
171 : /// index blocks will be read into the PS PageCache from
172 : /// that task, with waiting.
173 : /// But data IOs are dispatched and waited upon from a sidecar
174 : /// task so that the traversing task can continue to traverse
175 : /// layers while the IOs are in flight.
176 : /// If the PS PageCache miss rate is low, this improves
177 : /// throughput dramatically.
178 : SidecarTask,
179 : }
180 :
181 : pub mod statvfs {
182 : pub mod mock {
183 0 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
184 : #[serde(tag = "type")]
185 : pub enum Behavior {
186 : Success {
187 : blocksize: u64,
188 : total_blocks: u64,
189 : name_filter: Option<utils::serde_regex::Regex>,
190 : },
191 : #[cfg(feature = "testing")]
192 : Failure { mocked_error: MockedError },
193 : }
194 :
195 : #[cfg(feature = "testing")]
196 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
197 : #[allow(clippy::upper_case_acronyms)]
198 : pub enum MockedError {
199 : EIO,
200 : }
201 :
202 : #[cfg(feature = "testing")]
203 : impl From<MockedError> for nix::Error {
204 0 : fn from(e: MockedError) -> Self {
205 0 : match e {
206 0 : MockedError::EIO => nix::Error::EIO,
207 0 : }
208 0 : }
209 : }
210 : }
211 : }
212 :
213 0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
214 : #[serde(tag = "type", content = "args")]
215 : pub enum EvictionOrder {
216 : RelativeAccessed {
217 : highest_layer_count_loses_first: bool,
218 : },
219 : }
220 :
221 : impl Default for EvictionOrder {
222 4 : fn default() -> Self {
223 4 : Self::RelativeAccessed {
224 4 : highest_layer_count_loses_first: true,
225 4 : }
226 4 : }
227 : }
228 :
229 0 : #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
230 : #[serde(transparent)]
231 : pub struct MaxVectoredReadBytes(pub NonZeroUsize);
232 :
233 : /// A tenant's calcuated configuration, which is the result of merging a
234 : /// tenant's TenantConfOpt with the global TenantConf from PageServerConf.
235 : ///
236 : /// For storing and transmitting individual tenant's configuration, see
237 : /// TenantConfOpt.
238 4 : #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
239 : #[serde(deny_unknown_fields, default)]
240 : pub struct TenantConfigToml {
241 : // Flush out an inmemory layer, if it's holding WAL older than this
242 : // This puts a backstop on how much WAL needs to be re-digested if the
243 : // page server crashes.
244 : // This parameter actually determines L0 layer file size.
245 : pub checkpoint_distance: u64,
246 : // Inmemory layer is also flushed at least once in checkpoint_timeout to
247 : // eventually upload WAL after activity is stopped.
248 : #[serde(with = "humantime_serde")]
249 : pub checkpoint_timeout: Duration,
250 : // Target file size, when creating image and delta layers.
251 : // This parameter determines L1 layer file size.
252 : pub compaction_target_size: u64,
253 : // How often to check if there's compaction work to be done.
254 : // Duration::ZERO means automatic compaction is disabled.
255 : #[serde(with = "humantime_serde")]
256 : pub compaction_period: Duration,
257 : /// Level0 delta layer threshold for compaction.
258 : pub compaction_threshold: usize,
259 : /// Controls the amount of L0 included in a single compaction iteration.
260 : /// The unit is `checkpoint_distance`, i.e., a size.
261 : /// We add L0s to the set of layers to compact until their cumulative
262 : /// size exceeds `compaction_upper_limit * checkpoint_distance`.
263 : pub compaction_upper_limit: usize,
264 : pub compaction_algorithm: crate::models::CompactionAlgorithmSettings,
265 : /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure,
266 : /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer
267 : /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification
268 : /// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default.
269 : pub l0_flush_delay_threshold: Option<usize>,
270 : /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold
271 : /// to avoid deadlock. 0 to disable. Disabled by default.
272 : pub l0_flush_stall_threshold: Option<usize>,
273 : /// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next
274 : /// layer. This is a temporary backpressure mechanism which should be removed once
275 : /// l0_flush_{delay,stall}_threshold is fully enabled.
276 : pub l0_flush_wait_upload: bool,
277 : // Determines how much history is retained, to allow
278 : // branching and read replicas at an older point in time.
279 : // The unit is #of bytes of WAL.
280 : // Page versions older than this are garbage collected away.
281 : pub gc_horizon: u64,
282 : // Interval at which garbage collection is triggered.
283 : // Duration::ZERO means automatic GC is disabled
284 : #[serde(with = "humantime_serde")]
285 : pub gc_period: Duration,
286 : // Delta layer churn threshold to create L1 image layers.
287 : pub image_creation_threshold: usize,
288 : // Determines how much history is retained, to allow
289 : // branching and read replicas at an older point in time.
290 : // The unit is time.
291 : // Page versions older than this are garbage collected away.
292 : #[serde(with = "humantime_serde")]
293 : pub pitr_interval: Duration,
294 : /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
295 : #[serde(with = "humantime_serde")]
296 : pub walreceiver_connect_timeout: Duration,
297 : /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
298 : /// A stalled safekeeper will be changed to a newer one when it appears.
299 : #[serde(with = "humantime_serde")]
300 : pub lagging_wal_timeout: Duration,
301 : /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
302 : /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
303 : /// to avoid eager reconnects.
304 : pub max_lsn_wal_lag: NonZeroU64,
305 : pub eviction_policy: crate::models::EvictionPolicy,
306 : pub min_resident_size_override: Option<u64>,
307 : // See the corresponding metric's help string.
308 : #[serde(with = "humantime_serde")]
309 : pub evictions_low_residence_duration_metric_threshold: Duration,
310 :
311 : /// If non-zero, the period between uploads of a heatmap from attached tenants. This
312 : /// may be disabled if a Tenant will not have secondary locations: only secondary
313 : /// locations will use the heatmap uploaded by attached locations.
314 : #[serde(with = "humantime_serde")]
315 : pub heatmap_period: Duration,
316 :
317 : /// If true then SLRU segments are dowloaded on demand, if false SLRU segments are included in basebackup
318 : pub lazy_slru_download: bool,
319 :
320 : pub timeline_get_throttle: crate::models::ThrottleConfig,
321 :
322 : // How much WAL must be ingested before checking again whether a new image layer is required.
323 : // Expresed in multiples of checkpoint distance.
324 : pub image_layer_creation_check_threshold: u8,
325 :
326 : /// The length for an explicit LSN lease request.
327 : /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
328 : #[serde(with = "humantime_serde")]
329 : pub lsn_lease_length: Duration,
330 :
331 : /// The length for an implicit LSN lease granted as part of `get_lsn_by_timestamp` request.
332 : /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
333 : #[serde(with = "humantime_serde")]
334 : pub lsn_lease_length_for_ts: Duration,
335 :
336 : /// Enable auto-offloading of timelines.
337 : /// (either this flag or the pageserver-global one need to be set)
338 : pub timeline_offloading: bool,
339 :
340 : pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
341 :
342 : /// Enable rel_size_v2 for this tenant. Once enabled, the tenant will persist this information into
343 : /// `index_part.json`, and it cannot be reversed.
344 : pub rel_size_v2_enabled: Option<bool>,
345 :
346 : // gc-compaction related configs
347 : /// Enable automatic gc-compaction trigger on this tenant.
348 : pub gc_compaction_enabled: bool,
349 : /// The initial threshold for gc-compaction in KB. Once the total size of layers below the gc-horizon is above this threshold,
350 : /// gc-compaction will be triggered.
351 : pub gc_compaction_initial_threshold_kb: u64,
352 : /// The ratio that triggers the auto gc-compaction. If (the total size of layers between L2 LSN and gc-horizon) / (size below the L2 LSN)
353 : /// is above this ratio, gc-compaction will be triggered.
354 : pub gc_compaction_ratio_percent: u64,
355 : }
356 :
357 : pub mod defaults {
358 : use crate::models::ImageCompressionAlgorithm;
359 :
360 : pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT;
361 :
362 : pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s";
363 : pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
364 :
365 : pub const DEFAULT_SUPERUSER: &str = "cloud_admin";
366 : pub const DEFAULT_LOCALE: &str = if cfg!(target_os = "macos") {
367 : "C"
368 : } else {
369 : "C.UTF-8"
370 : };
371 :
372 : pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
373 : pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
374 :
375 : pub const DEFAULT_LOG_FORMAT: &str = "plain";
376 :
377 : pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
378 :
379 : pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize = 1;
380 :
381 : pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
382 : pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
383 : pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
384 : pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
385 :
386 : pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
387 : pub const DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY: usize = 1;
388 :
389 : pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
390 :
391 : /// Soft limit for the maximum size of a vectored read.
392 : ///
393 : /// This is determined by the largest NeonWalRecord that can exist (minus dbdir and reldir keys
394 : /// which are bounded by the blob io limits only). As of this writing, that is a `NeonWalRecord::ClogSetCommitted` record,
395 : /// with 32k xids. That's the max number of XIDS on a single CLOG page. The size of such a record
396 : /// is `sizeof(Transactionid) * 32768 + (some fixed overhead from 'timestamp`, the Vec length and whatever extra serde serialization adds)`.
397 : /// That is, slightly above 128 kB.
398 : pub const DEFAULT_MAX_VECTORED_READ_BYTES: usize = 130 * 1024; // 130 KiB
399 :
400 : pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
401 : ImageCompressionAlgorithm::Zstd { level: Some(1) };
402 :
403 : pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
404 :
405 : pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
406 :
407 : pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol =
408 : utils::postgres_client::PostgresClientProtocol::Vanilla;
409 : }
410 :
411 : impl Default for ConfigToml {
412 476 : fn default() -> Self {
413 : use defaults::*;
414 :
415 : Self {
416 476 : listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()),
417 476 : listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()),
418 476 : availability_zone: (None),
419 476 : wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
420 476 : .expect("cannot parse default wait lsn timeout")),
421 476 : wal_redo_timeout: (humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
422 476 : .expect("cannot parse default wal redo timeout")),
423 476 : superuser: (DEFAULT_SUPERUSER.to_string()),
424 476 : locale: DEFAULT_LOCALE.to_string(),
425 476 : page_cache_size: (DEFAULT_PAGE_CACHE_SIZE),
426 476 : max_file_descriptors: (DEFAULT_MAX_FILE_DESCRIPTORS),
427 476 : pg_distrib_dir: None, // Utf8PathBuf::from("./pg_install"), // TODO: formely, this was std::env::current_dir()
428 476 : http_auth_type: (AuthType::Trust),
429 476 : pg_auth_type: (AuthType::Trust),
430 476 : auth_validation_public_key_path: (None),
431 476 : remote_storage: None,
432 476 : broker_endpoint: (storage_broker::DEFAULT_ENDPOINT
433 476 : .parse()
434 476 : .expect("failed to parse default broker endpoint")),
435 476 : broker_keepalive_interval: (humantime::parse_duration(
436 476 : storage_broker::DEFAULT_KEEPALIVE_INTERVAL,
437 476 : )
438 476 : .expect("cannot parse default keepalive interval")),
439 476 : log_format: (LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
440 476 :
441 476 : concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
442 476 : .expect("Invalid default constant")),
443 476 : concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(
444 476 : DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
445 476 : )
446 476 : .unwrap(),
447 476 : metric_collection_interval: (humantime::parse_duration(
448 476 : DEFAULT_METRIC_COLLECTION_INTERVAL,
449 476 : )
450 476 : .expect("cannot parse default metric collection interval")),
451 476 : synthetic_size_calculation_interval: (humantime::parse_duration(
452 476 : DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
453 476 : )
454 476 : .expect("cannot parse default synthetic size calculation interval")),
455 476 : metric_collection_endpoint: (DEFAULT_METRIC_COLLECTION_ENDPOINT),
456 476 :
457 476 : metric_collection_bucket: (None),
458 476 :
459 476 : disk_usage_based_eviction: (None),
460 476 :
461 476 : test_remote_failures: (0),
462 476 :
463 476 : ondemand_download_behavior_treat_error_as_warn: (false),
464 476 :
465 476 : background_task_maximum_delay: (humantime::parse_duration(
466 476 : DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY,
467 476 : )
468 476 : .unwrap()),
469 476 :
470 476 : control_plane_api: (None),
471 476 : control_plane_api_token: (None),
472 476 : control_plane_emergency_mode: (false),
473 476 :
474 476 : import_pgdata_upcall_api: (None),
475 476 : import_pgdata_upcall_api_token: (None),
476 476 : import_pgdata_aws_endpoint_url: (None),
477 476 :
478 476 : heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
479 476 : secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
480 476 :
481 476 : ingest_batch_size: (DEFAULT_INGEST_BATCH_SIZE),
482 476 :
483 476 : virtual_file_io_engine: None,
484 476 :
485 476 : max_vectored_read_bytes: (MaxVectoredReadBytes(
486 476 : NonZeroUsize::new(DEFAULT_MAX_VECTORED_READ_BYTES).unwrap(),
487 476 : )),
488 476 : image_compression: (DEFAULT_IMAGE_COMPRESSION),
489 476 : timeline_offloading: false,
490 476 : ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
491 476 : l0_flush: None,
492 476 : virtual_file_io_mode: None,
493 476 : tenant_config: TenantConfigToml::default(),
494 476 : no_sync: None,
495 476 : wal_receiver_protocol: DEFAULT_WAL_RECEIVER_PROTOCOL,
496 476 : page_service_pipelining: if !cfg!(test) {
497 476 : PageServicePipeliningConfig::Serial
498 : } else {
499 0 : PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
500 0 : max_batch_size: NonZeroUsize::new(32).unwrap(),
501 0 : execution: PageServiceProtocolPipelinedExecutionStrategy::ConcurrentFutures,
502 0 : })
503 : },
504 476 : get_vectored_concurrent_io: if !cfg!(test) {
505 476 : GetVectoredConcurrentIo::Sequential
506 : } else {
507 0 : GetVectoredConcurrentIo::SidecarTask
508 : },
509 : }
510 476 : }
511 : }
512 :
513 : pub mod tenant_conf_defaults {
514 :
515 : // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
516 : // would be more appropriate. But a low value forces the code to be exercised more,
517 : // which is good for now to trigger bugs.
518 : // This parameter actually determines L0 layer file size.
519 : pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
520 : pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
521 :
522 : // FIXME the below configs are only used by legacy algorithm. The new algorithm
523 : // has different parameters.
524 :
525 : // Target file size, when creating image and delta layers.
526 : // This parameter determines L1 layer file size.
527 : pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
528 :
529 : pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
530 : pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
531 :
532 : // This value needs to be tuned to avoid OOM. We have 3/4 of the total CPU threads to do background works, that's 16*3/4=9 on
533 : // most of our pageservers. Compaction ~50 layers requires about 2GB memory (could be reduced later by optimizing L0 hole
534 : // calculation to avoid loading all keys into the memory). So with this config, we can get a maximum peak compaction usage of 18GB.
535 : pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 50;
536 :
537 : pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm =
538 : crate::models::CompactionAlgorithm::Legacy;
539 :
540 : pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true;
541 :
542 : pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
543 :
544 : // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
545 : // If there's a need to decrease this value, first make sure that GC
546 : // doesn't hold a layer map write lock for non-trivial operations.
547 : // Relevant: https://github.com/neondatabase/neon/issues/3394
548 : pub const DEFAULT_GC_PERIOD: &str = "1 hr";
549 : pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
550 : pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
551 : pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
552 : pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
553 : // The default limit on WAL lag should be set to avoid causing disconnects under high throughput
554 : // scenarios: since the broker stats are updated ~1/s, a value of 1GiB should be sufficient for
555 : // throughputs up to 1GiB/s per timeline.
556 : pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 1024 * 1024 * 1024;
557 : pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
558 : // By default ingest enough WAL for two new L0 layers before checking if new image
559 : // image layers should be created.
560 : pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
561 : pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
562 : pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000;
563 : pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
564 : }
565 :
566 : impl Default for TenantConfigToml {
567 900 : fn default() -> Self {
568 : use tenant_conf_defaults::*;
569 900 : Self {
570 900 : checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
571 900 : checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
572 900 : .expect("cannot parse default checkpoint timeout"),
573 900 : compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
574 900 : compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
575 900 : .expect("cannot parse default compaction period"),
576 900 : compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
577 900 : compaction_upper_limit: DEFAULT_COMPACTION_UPPER_LIMIT,
578 900 : compaction_algorithm: crate::models::CompactionAlgorithmSettings {
579 900 : kind: DEFAULT_COMPACTION_ALGORITHM,
580 900 : },
581 900 : l0_flush_delay_threshold: None,
582 900 : l0_flush_stall_threshold: None,
583 900 : l0_flush_wait_upload: DEFAULT_L0_FLUSH_WAIT_UPLOAD,
584 900 : gc_horizon: DEFAULT_GC_HORIZON,
585 900 : gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
586 900 : .expect("cannot parse default gc period"),
587 900 : image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
588 900 : pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
589 900 : .expect("cannot parse default PITR interval"),
590 900 : walreceiver_connect_timeout: humantime::parse_duration(
591 900 : DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
592 900 : )
593 900 : .expect("cannot parse default walreceiver connect timeout"),
594 900 : lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
595 900 : .expect("cannot parse default walreceiver lagging wal timeout"),
596 900 : max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
597 900 : .expect("cannot parse default max walreceiver Lsn wal lag"),
598 900 : eviction_policy: crate::models::EvictionPolicy::NoEviction,
599 900 : min_resident_size_override: None,
600 900 : evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
601 900 : DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
602 900 : )
603 900 : .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
604 900 : heatmap_period: Duration::ZERO,
605 900 : lazy_slru_download: false,
606 900 : timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
607 900 : image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
608 900 : lsn_lease_length: LsnLease::DEFAULT_LENGTH,
609 900 : lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
610 900 : timeline_offloading: false,
611 900 : wal_receiver_protocol_override: None,
612 900 : rel_size_v2_enabled: None,
613 900 : gc_compaction_enabled: DEFAULT_GC_COMPACTION_ENABLED,
614 900 : gc_compaction_initial_threshold_kb: DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB,
615 900 : gc_compaction_ratio_percent: DEFAULT_GC_COMPACTION_RATIO_PERCENT,
616 900 : }
617 900 : }
618 : }
|