Line data Source code
1 : //! Functions for handling page server configuration options
2 : //!
3 : //! Configuration options can be set in the pageserver.toml configuration
4 : //! file, or on the command line.
5 : //! See also `settings.md` for better description on every parameter.
6 :
7 : pub mod ignored_fields;
8 :
9 : use std::env;
10 : use std::num::NonZeroUsize;
11 : use std::sync::Arc;
12 : use std::time::Duration;
13 :
14 : use anyhow::{Context, ensure};
15 : use camino::{Utf8Path, Utf8PathBuf};
16 : use once_cell::sync::OnceCell;
17 : use pageserver_api::config::{
18 : DiskUsageEvictionTaskConfig, MaxGetVectoredKeys, MaxVectoredReadBytes,
19 : PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PostHogConfig,
20 : };
21 : use pageserver_api::models::ImageCompressionAlgorithm;
22 : use pageserver_api::shard::TenantShardId;
23 : use pem::Pem;
24 : use postgres_backend::AuthType;
25 : use postgres_ffi::PgMajorVersion;
26 : use remote_storage::{RemotePath, RemoteStorageConfig};
27 : use reqwest::Url;
28 : use storage_broker::Uri;
29 : use utils::id::{NodeId, TimelineId};
30 : use utils::logging::{LogFormat, SecretString};
31 : use utils::serde_percent::Percent;
32 :
33 : use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
34 : use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
35 : use crate::virtual_file::io_engine;
36 : use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
37 :
38 : /// Global state of pageserver.
39 : ///
40 : /// It's mostly immutable configuration, but some semaphores and the
41 : /// like crept in over time and the name stuck.
42 : ///
43 : /// Instantiated by deserializing `pageserver.toml` into [`pageserver_api::config::ConfigToml`]
44 : /// and passing that to [`PageServerConf::parse_and_validate`].
45 : ///
46 : /// # Adding a New Field
47 : ///
48 : /// 1. Add the field to `pageserver_api::config::ConfigToml`.
49 : /// 2. Fix compiler errors (exhaustive destructuring will guide you).
50 : ///
51 : /// For fields that require additional validation or filling in of defaults at runtime,
52 : /// check for examples in the [`PageServerConf::parse_and_validate`] method.
53 : #[derive(Debug, Clone)]
54 : pub struct PageServerConf {
55 : // Identifier of that particular pageserver so e g safekeepers
56 : // can safely distinguish different pageservers
57 : pub id: NodeId,
58 :
59 : /// Example (default): 127.0.0.1:64000
60 : pub listen_pg_addr: String,
61 : /// Example (default): 127.0.0.1:9898
62 : pub listen_http_addr: String,
63 : /// Example: 127.0.0.1:9899
64 : pub listen_https_addr: Option<String>,
65 : /// If set, expose a gRPC API on this address.
66 : /// Example: 127.0.0.1:51051
67 : ///
68 : /// EXPERIMENTAL: this protocol is unstable and under active development.
69 : pub listen_grpc_addr: Option<String>,
70 :
71 : /// Path to a file with certificate's private key for https and gRPC API.
72 : /// Default: server.key
73 : pub ssl_key_file: Utf8PathBuf,
74 : /// Path to a file with a X509 certificate for https and gRPC API.
75 : /// Default: server.crt
76 : pub ssl_cert_file: Utf8PathBuf,
77 : /// Period to reload certificate and private key from files.
78 : /// Default: 60s.
79 : pub ssl_cert_reload_period: Duration,
80 : /// Trusted root CA certificates to use in https APIs in PEM format.
81 : pub ssl_ca_certs: Vec<Pem>,
82 :
83 : /// Current availability zone. Used for traffic metrics.
84 : pub availability_zone: Option<String>,
85 :
86 : // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
87 : pub wait_lsn_timeout: Duration,
88 : // How long to wait for WAL redo to complete.
89 : pub wal_redo_timeout: Duration,
90 :
91 : pub superuser: String,
92 : pub locale: String,
93 :
94 : pub page_cache_size: usize,
95 : pub max_file_descriptors: usize,
96 :
97 : // Repository directory, relative to current working directory.
98 : // Normally, the page server changes the current working directory
99 : // to the repository, and 'workdir' is always '.'. But we don't do
100 : // that during unit testing, because the current directory is global
101 : // to the process but different unit tests work on different
102 : // repositories.
103 : pub workdir: Utf8PathBuf,
104 :
105 : pub pg_distrib_dir: Utf8PathBuf,
106 :
107 : // Authentication
108 : /// authentication method for the HTTP mgmt API
109 : pub http_auth_type: AuthType,
110 : /// authentication method for libpq connections from compute
111 : pub pg_auth_type: AuthType,
112 : /// authentication method for gRPC connections from compute
113 : pub grpc_auth_type: AuthType,
114 : /// Path to a file or directory containing public key(s) for verifying JWT tokens.
115 : /// Used for both mgmt and compute auth, if enabled.
116 : pub auth_validation_public_key_path: Option<Utf8PathBuf>,
117 :
118 : pub remote_storage_config: Option<RemoteStorageConfig>,
119 :
120 : pub default_tenant_conf: pageserver_api::config::TenantConfigToml,
121 :
122 : /// Storage broker endpoints to connect to.
123 : pub broker_endpoint: Uri,
124 : pub broker_keepalive_interval: Duration,
125 :
126 : pub log_format: LogFormat,
127 :
128 : /// Number of tenants which will be concurrently loaded from remote storage proactively on startup or attach.
129 : ///
130 : /// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
131 : pub concurrent_tenant_warmup: ConfigurableSemaphore,
132 :
133 : /// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
134 : pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
135 : /// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
136 : /// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
137 : /// See the comment in `eviction_task` for details.
138 : ///
139 : /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
140 : pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
141 :
142 : // How often to collect metrics and send them to the metrics endpoint.
143 : pub metric_collection_interval: Duration,
144 : // How often to send unchanged cached metrics to the metrics endpoint.
145 : pub metric_collection_endpoint: Option<Url>,
146 : pub metric_collection_bucket: Option<RemoteStorageConfig>,
147 : pub synthetic_size_calculation_interval: Duration,
148 :
149 : pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
150 :
151 : pub test_remote_failures: u64,
152 :
153 : pub ondemand_download_behavior_treat_error_as_warn: bool,
154 :
155 : /// How long will background tasks be delayed at most after initial load of tenants.
156 : ///
157 : /// Our largest initialization completions are in the range of 100-200s, so perhaps 10s works
158 : /// as we now isolate initial loading, initial logical size calculation and background tasks.
159 : /// Smaller nodes will have background tasks "not running" for this long unless every timeline
160 : /// has it's initial logical size calculated. Not running background tasks for some seconds is
161 : /// not terrible.
162 : pub background_task_maximum_delay: Duration,
163 :
164 : pub control_plane_api: Url,
165 :
166 : /// JWT token for use with the control plane API.
167 : pub control_plane_api_token: Option<SecretString>,
168 :
169 : pub import_pgdata_upcall_api: Option<Url>,
170 : pub import_pgdata_upcall_api_token: Option<SecretString>,
171 : pub import_pgdata_aws_endpoint_url: Option<Url>,
172 :
173 : /// If true, pageserver will make best-effort to operate without a control plane: only
174 : /// for use in major incidents.
175 : pub control_plane_emergency_mode: bool,
176 :
177 : /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize
178 : /// heatmap uploads vs. other remote storage operations.
179 : pub heatmap_upload_concurrency: usize,
180 :
181 : /// How many remote storage downloads may be done for secondary tenants concurrently. Implicitly
182 : /// deprioritises secondary downloads vs. remote storage operations for attached tenants.
183 : pub secondary_download_concurrency: usize,
184 :
185 : /// Maximum number of WAL records to be ingested and committed at the same time
186 : pub ingest_batch_size: u64,
187 :
188 : pub virtual_file_io_engine: virtual_file::IoEngineKind,
189 :
190 : pub max_vectored_read_bytes: MaxVectoredReadBytes,
191 :
192 : /// Maximum number of keys to be read in a single get_vectored call.
193 : pub max_get_vectored_keys: MaxGetVectoredKeys,
194 :
195 : pub image_compression: ImageCompressionAlgorithm,
196 :
197 : /// Whether to offload archived timelines automatically
198 : pub timeline_offloading: bool,
199 :
200 : /// How many bytes of ephemeral layer content will we allow per kilobyte of RAM. When this
201 : /// is exceeded, we start proactively closing ephemeral layers to limit the total amount
202 : /// of ephemeral data.
203 : ///
204 : /// Setting this to zero disables limits on total ephemeral layer size.
205 : pub ephemeral_bytes_per_memory_kb: usize,
206 :
207 : pub l0_flush: crate::l0_flush::L0FlushConfig,
208 :
209 : /// Direct IO settings
210 : pub virtual_file_io_mode: virtual_file::IoMode,
211 :
212 : /// Optionally disable disk syncs (unsafe!)
213 : pub no_sync: bool,
214 :
215 : pub page_service_pipelining: pageserver_api::config::PageServicePipeliningConfig,
216 :
217 : pub get_vectored_concurrent_io: pageserver_api::config::GetVectoredConcurrentIo,
218 :
219 : /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
220 : /// files read.
221 : pub enable_read_path_debugging: bool,
222 :
223 : /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
224 : /// safekeepers does not have gaps.
225 : pub validate_wal_contiguity: bool,
226 :
227 : /// When set, the previously written to disk heatmap is loaded on tenant attach and used
228 : /// to avoid clobbering the heatmap from new, cold, attached locations.
229 : pub load_previous_heatmap: bool,
230 :
231 : /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.
232 : pub generate_unarchival_heatmap: bool,
233 :
234 : pub tracing: Option<pageserver_api::config::Tracing>,
235 :
236 : /// Enable TLS in page service API.
237 : /// Does not force TLS: the client negotiates TLS usage during the handshake.
238 : /// Uses key and certificate from ssl_key_file/ssl_cert_file.
239 : pub enable_tls_page_service_api: bool,
240 :
241 : /// Run in development mode, which disables certain safety checks
242 : /// such as authentication requirements for HTTP and PostgreSQL APIs.
243 : /// This is insecure and should only be used in development environments.
244 : pub dev_mode: bool,
245 :
246 : /// PostHog integration config.
247 : pub posthog_config: Option<PostHogConfig>,
248 :
249 : pub timeline_import_config: pageserver_api::config::TimelineImportConfig,
250 :
251 : pub basebackup_cache_config: Option<pageserver_api::config::BasebackupCacheConfig>,
252 : }
253 :
254 : /// Token for authentication to safekeepers
255 : ///
256 : /// We do not want to store this in a PageServerConf because the latter may be logged
257 : /// and/or serialized at a whim, while the token is secret. Currently this token is the
258 : /// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in
259 : /// the future, more tokens and auth may arrive for storage broker, completely changing the logic.
260 : /// Hence, we resort to a global variable for now instead of passing the token from the
261 : /// startup code to the connection code through a dozen layers.
262 : pub static SAFEKEEPER_AUTH_TOKEN: OnceCell<Arc<String>> = OnceCell::new();
263 :
264 : impl PageServerConf {
265 : //
266 : // Repository paths, relative to workdir.
267 : //
268 :
269 3982 : pub fn tenants_path(&self) -> Utf8PathBuf {
270 3982 : self.workdir.join(TENANTS_SEGMENT_NAME)
271 3982 : }
272 :
273 36 : pub fn deletion_prefix(&self) -> Utf8PathBuf {
274 36 : self.workdir.join("deletion")
275 36 : }
276 :
277 0 : pub fn metadata_path(&self) -> Utf8PathBuf {
278 0 : self.workdir.join("metadata.json")
279 0 : }
280 :
281 0 : pub fn basebackup_cache_dir(&self) -> Utf8PathBuf {
282 0 : self.workdir.join("basebackup_cache")
283 0 : }
284 :
285 14 : pub fn deletion_list_path(&self, sequence: u64) -> Utf8PathBuf {
286 : // Encode a version in the filename, so that if we ever switch away from JSON we can
287 : // increment this.
288 : const VERSION: u8 = 1;
289 :
290 14 : self.deletion_prefix()
291 14 : .join(format!("{sequence:016x}-{VERSION:02x}.list"))
292 14 : }
293 :
294 12 : pub fn deletion_header_path(&self) -> Utf8PathBuf {
295 : // Encode a version in the filename, so that if we ever switch away from JSON we can
296 : // increment this.
297 : const VERSION: u8 = 1;
298 :
299 12 : self.deletion_prefix().join(format!("header-{VERSION:02x}"))
300 12 : }
301 :
302 3955 : pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
303 3955 : self.tenants_path().join(tenant_shard_id.to_string())
304 3955 : }
305 :
306 : /// Points to a place in pageserver's local directory,
307 : /// where certain tenant's LocationConf be stored.
308 0 : pub(crate) fn tenant_location_config_path(
309 0 : &self,
310 0 : tenant_shard_id: &TenantShardId,
311 0 : ) -> Utf8PathBuf {
312 0 : self.tenant_path(tenant_shard_id)
313 0 : .join(TENANT_LOCATION_CONFIG_NAME)
314 0 : }
315 :
316 118 : pub(crate) fn tenant_heatmap_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
317 118 : self.tenant_path(tenant_shard_id)
318 118 : .join(TENANT_HEATMAP_BASENAME)
319 118 : }
320 :
321 3716 : pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
322 3716 : self.tenant_path(tenant_shard_id)
323 3716 : .join(TIMELINES_SEGMENT_NAME)
324 3716 : }
325 :
326 3477 : pub fn timeline_path(
327 3477 : &self,
328 3477 : tenant_shard_id: &TenantShardId,
329 3477 : timeline_id: &TimelineId,
330 3477 : ) -> Utf8PathBuf {
331 3477 : self.timelines_path(tenant_shard_id)
332 3477 : .join(timeline_id.to_string())
333 3477 : }
334 :
335 : /// Turns storage remote path of a file into its local path.
336 0 : pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
337 0 : remote_path.with_base(&self.workdir)
338 0 : }
339 :
340 : //
341 : // Postgres distribution paths
342 : //
343 12 : pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {
344 12 : let path = self.pg_distrib_dir.clone();
345 :
346 12 : Ok(path.join(pg_version.v_str()))
347 12 : }
348 :
349 6 : pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {
350 6 : Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
351 6 : }
352 6 : pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<Utf8PathBuf> {
353 6 : Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
354 6 : }
355 :
356 : /// Parse a configuration file (pageserver.toml) into a PageServerConf struct,
357 : /// validating the input and failing on errors.
358 : ///
359 : /// This leaves any options not present in the file in the built-in defaults.
360 135 : pub fn parse_and_validate(
361 135 : id: NodeId,
362 135 : config_toml: pageserver_api::config::ConfigToml,
363 135 : workdir: &Utf8Path,
364 135 : ) -> anyhow::Result<Self> {
365 : let pageserver_api::config::ConfigToml {
366 135 : listen_pg_addr,
367 135 : listen_http_addr,
368 135 : listen_https_addr,
369 135 : listen_grpc_addr,
370 135 : ssl_key_file,
371 135 : ssl_cert_file,
372 135 : ssl_cert_reload_period,
373 135 : ssl_ca_file,
374 135 : availability_zone,
375 135 : wait_lsn_timeout,
376 135 : wal_redo_timeout,
377 135 : superuser,
378 135 : locale,
379 135 : page_cache_size,
380 135 : max_file_descriptors,
381 135 : pg_distrib_dir,
382 135 : http_auth_type,
383 135 : pg_auth_type,
384 135 : grpc_auth_type,
385 135 : auth_validation_public_key_path,
386 135 : remote_storage,
387 135 : broker_endpoint,
388 135 : broker_keepalive_interval,
389 135 : log_format,
390 135 : metric_collection_interval,
391 135 : metric_collection_endpoint,
392 135 : metric_collection_bucket,
393 135 : synthetic_size_calculation_interval,
394 135 : disk_usage_based_eviction,
395 135 : test_remote_failures,
396 135 : ondemand_download_behavior_treat_error_as_warn,
397 135 : background_task_maximum_delay,
398 135 : control_plane_api,
399 135 : control_plane_api_token,
400 135 : control_plane_emergency_mode,
401 135 : import_pgdata_upcall_api,
402 135 : import_pgdata_upcall_api_token,
403 135 : import_pgdata_aws_endpoint_url,
404 135 : heatmap_upload_concurrency,
405 135 : secondary_download_concurrency,
406 135 : ingest_batch_size,
407 135 : max_vectored_read_bytes,
408 135 : max_get_vectored_keys,
409 135 : image_compression,
410 135 : timeline_offloading,
411 135 : ephemeral_bytes_per_memory_kb,
412 135 : l0_flush,
413 135 : virtual_file_io_mode,
414 135 : concurrent_tenant_warmup,
415 135 : concurrent_tenant_size_logical_size_queries,
416 135 : virtual_file_io_engine,
417 135 : tenant_config,
418 135 : no_sync,
419 135 : page_service_pipelining,
420 135 : get_vectored_concurrent_io,
421 135 : enable_read_path_debugging,
422 135 : validate_wal_contiguity,
423 135 : load_previous_heatmap,
424 135 : generate_unarchival_heatmap,
425 135 : tracing,
426 135 : enable_tls_page_service_api,
427 135 : dev_mode,
428 135 : posthog_config,
429 135 : timeline_import_config,
430 135 : basebackup_cache_config,
431 135 : } = config_toml;
432 :
433 135 : let mut conf = PageServerConf {
434 : // ------------------------------------------------------------
435 : // fields that are already fully validated by the ConfigToml Deserialize impl
436 : // ------------------------------------------------------------
437 135 : listen_pg_addr,
438 135 : listen_http_addr,
439 135 : listen_https_addr,
440 135 : listen_grpc_addr,
441 135 : ssl_key_file,
442 135 : ssl_cert_file,
443 135 : ssl_cert_reload_period,
444 135 : availability_zone,
445 135 : wait_lsn_timeout,
446 135 : wal_redo_timeout,
447 135 : superuser,
448 135 : locale,
449 135 : page_cache_size,
450 135 : max_file_descriptors,
451 135 : http_auth_type,
452 135 : pg_auth_type,
453 135 : grpc_auth_type,
454 135 : auth_validation_public_key_path,
455 135 : remote_storage_config: remote_storage,
456 135 : broker_endpoint,
457 135 : broker_keepalive_interval,
458 135 : log_format,
459 135 : metric_collection_interval,
460 135 : metric_collection_endpoint,
461 135 : metric_collection_bucket,
462 135 : synthetic_size_calculation_interval,
463 135 : disk_usage_based_eviction: Some(disk_usage_based_eviction.unwrap_or(
464 135 : DiskUsageEvictionTaskConfig {
465 135 : max_usage_pct: Percent::new(80).unwrap(),
466 135 : min_avail_bytes: 2_000_000_000,
467 135 : period: Duration::from_secs(60),
468 135 : #[cfg(feature = "testing")]
469 135 : mock_statvfs: None,
470 135 : eviction_order: Default::default(),
471 135 : },
472 135 : )),
473 135 : test_remote_failures,
474 135 : ondemand_download_behavior_treat_error_as_warn,
475 135 : background_task_maximum_delay,
476 135 : control_plane_api: control_plane_api
477 135 : .ok_or_else(|| anyhow::anyhow!("`control_plane_api` must be set"))?,
478 135 : control_plane_emergency_mode,
479 135 : heatmap_upload_concurrency,
480 135 : secondary_download_concurrency,
481 135 : ingest_batch_size,
482 135 : max_vectored_read_bytes,
483 135 : max_get_vectored_keys,
484 135 : image_compression,
485 135 : timeline_offloading,
486 135 : ephemeral_bytes_per_memory_kb,
487 135 : import_pgdata_upcall_api,
488 135 : import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),
489 135 : import_pgdata_aws_endpoint_url,
490 135 : page_service_pipelining,
491 135 : get_vectored_concurrent_io,
492 135 : tracing,
493 135 : enable_tls_page_service_api,
494 135 : dev_mode,
495 135 : timeline_import_config,
496 135 : basebackup_cache_config,
497 :
498 : // ------------------------------------------------------------
499 : // fields that require additional validation or custom handling
500 : // ------------------------------------------------------------
501 135 : workdir: workdir.to_owned(),
502 135 : pg_distrib_dir: pg_distrib_dir.unwrap_or_else(|| {
503 9 : std::env::current_dir()
504 9 : .expect("current_dir() failed")
505 9 : .try_into()
506 9 : .expect("current_dir() is not a valid Utf8Path")
507 9 : }),
508 135 : control_plane_api_token: control_plane_api_token.map(SecretString::from),
509 135 : id,
510 135 : default_tenant_conf: tenant_config,
511 135 : concurrent_tenant_warmup: ConfigurableSemaphore::new(concurrent_tenant_warmup),
512 135 : concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(
513 135 : concurrent_tenant_size_logical_size_queries,
514 : ),
515 135 : eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::new(
516 : // re-use `concurrent_tenant_size_logical_size_queries`
517 135 : concurrent_tenant_size_logical_size_queries,
518 : ),
519 135 : virtual_file_io_engine: match virtual_file_io_engine {
520 0 : Some(v) => v,
521 135 : None => match crate::virtual_file::io_engine_feature_test()
522 135 : .context("auto-detect virtual_file_io_engine")?
523 : {
524 135 : io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
525 0 : io_engine::FeatureTestResult::Worse { engine, remark } => {
526 : // TODO: bubble this up to the caller so we can tracing::warn! it.
527 0 : eprintln!(
528 0 : "auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"
529 : );
530 0 : engine
531 : }
532 : },
533 : },
534 135 : l0_flush: l0_flush
535 135 : .map(crate::l0_flush::L0FlushConfig::from)
536 135 : .unwrap_or_default(),
537 135 : virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
538 135 : no_sync: no_sync.unwrap_or(false),
539 135 : enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
540 135 : validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
541 135 : load_previous_heatmap: load_previous_heatmap.unwrap_or(true),
542 135 : generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true),
543 135 : ssl_ca_certs: match ssl_ca_file {
544 0 : Some(ssl_ca_file) => {
545 0 : let buf = std::fs::read(ssl_ca_file)?;
546 0 : pem::parse_many(&buf)?
547 0 : .into_iter()
548 0 : .filter(|pem| pem.tag() == "CERTIFICATE")
549 0 : .collect()
550 : }
551 135 : None => Vec::new(),
552 : },
553 135 : posthog_config,
554 : };
555 :
556 : // ------------------------------------------------------------
557 : // custom validation code that covers more than one field in isolation
558 : // ------------------------------------------------------------
559 :
560 135 : if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]
561 135 : .contains(&AuthType::NeonJWT)
562 : {
563 0 : let auth_validation_public_key_path = conf
564 0 : .auth_validation_public_key_path
565 0 : .get_or_insert_with(|| workdir.join("auth_public_key.pem"));
566 0 : ensure!(
567 0 : auth_validation_public_key_path.exists(),
568 0 : format!(
569 0 : "Can't find auth_validation_public_key at '{auth_validation_public_key_path}'",
570 : )
571 : );
572 135 : }
573 :
574 135 : if let Some(tracing_config) = conf.tracing.as_ref() {
575 1 : let ratio = &tracing_config.sampling_ratio;
576 1 : ensure!(
577 1 : ratio.denominator != 0 && ratio.denominator >= ratio.numerator,
578 1 : format!(
579 1 : "Invalid sampling ratio: {}/{}",
580 : ratio.numerator, ratio.denominator
581 : )
582 : );
583 :
584 0 : let url = Url::parse(&tracing_config.export_config.endpoint)
585 0 : .map_err(anyhow::Error::msg)
586 0 : .with_context(|| {
587 0 : format!(
588 0 : "tracing endpoint URL is invalid : {}",
589 : tracing_config.export_config.endpoint
590 : )
591 0 : })?;
592 :
593 0 : ensure!(
594 0 : url.scheme() == "http" || url.scheme() == "https",
595 0 : format!(
596 0 : "tracing endpoint URL must start with http:// or https://: {}",
597 : tracing_config.export_config.endpoint
598 : )
599 : );
600 134 : }
601 :
602 134 : IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance)
603 134 : .map_err(anyhow::Error::msg)
604 134 : .with_context(|| {
605 0 : format!(
606 0 : "effective checkpoint distance is unsupported: {}",
607 : conf.default_tenant_conf.checkpoint_distance
608 : )
609 0 : })?;
610 :
611 : if let PageServicePipeliningConfig::Pipelined(PageServicePipeliningConfigPipelined {
612 134 : max_batch_size,
613 : ..
614 134 : }) = conf.page_service_pipelining
615 : {
616 134 : if max_batch_size.get() > conf.max_get_vectored_keys.get() {
617 1 : return Err(anyhow::anyhow!(
618 1 : "`max_batch_size` ({max_batch_size}) must be less than or equal to `max_get_vectored_keys` ({})",
619 1 : conf.max_get_vectored_keys.get()
620 1 : ));
621 133 : }
622 0 : };
623 :
624 133 : Ok(conf)
625 135 : }
626 :
627 : #[cfg(test)]
628 126 : pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {
629 126 : let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into());
630 :
631 126 : let test_id = uuid::Uuid::new_v4();
632 126 : Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}_{test_id}"))
633 126 : }
634 :
635 126 : pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {
636 126 : let pg_distrib_dir = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
637 :
638 126 : let config_toml = pageserver_api::config::ConfigToml {
639 126 : wait_lsn_timeout: Duration::from_secs(60),
640 126 : wal_redo_timeout: Duration::from_secs(60),
641 126 : pg_distrib_dir: Some(pg_distrib_dir),
642 126 : metric_collection_interval: Duration::from_secs(60),
643 126 : synthetic_size_calculation_interval: Duration::from_secs(60),
644 126 : background_task_maximum_delay: Duration::ZERO,
645 126 : load_previous_heatmap: Some(true),
646 126 : generate_unarchival_heatmap: Some(true),
647 126 : control_plane_api: Some(Url::parse("http://localhost:6666").unwrap()),
648 126 : ..Default::default()
649 126 : };
650 126 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
651 126 : }
652 : }
653 :
654 0 : #[derive(serde::Deserialize, serde::Serialize)]
655 : pub struct PageserverIdentity {
656 : pub id: NodeId,
657 : }
658 :
659 : /// Configurable semaphore permits setting.
660 : ///
661 : /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty
662 : /// semaphore cannot be distinguished, leading any feature using these to await forever (or until
663 : /// new permits are added).
664 : #[derive(Debug, Clone)]
665 : pub struct ConfigurableSemaphore {
666 : initial_permits: NonZeroUsize,
667 : inner: std::sync::Arc<tokio::sync::Semaphore>,
668 : }
669 :
670 : impl ConfigurableSemaphore {
671 : /// Initializse using a non-zero amount of permits.
672 : ///
673 : /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
674 : /// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
675 : /// behave like [`futures::future::pending`], just waiting until new permits are added.
676 : ///
677 : /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
678 405 : pub fn new(initial_permits: NonZeroUsize) -> Self {
679 405 : ConfigurableSemaphore {
680 405 : initial_permits,
681 405 : inner: std::sync::Arc::new(tokio::sync::Semaphore::new(initial_permits.get())),
682 405 : }
683 405 : }
684 :
685 : /// Returns the configured amount of permits.
686 0 : pub fn initial_permits(&self) -> NonZeroUsize {
687 0 : self.initial_permits
688 0 : }
689 : }
690 :
691 : impl PartialEq for ConfigurableSemaphore {
692 0 : fn eq(&self, other: &Self) -> bool {
693 : // the number of permits can be increased at runtime, so we cannot really fulfill the
694 : // PartialEq value equality otherwise
695 0 : self.initial_permits == other.initial_permits
696 0 : }
697 : }
698 :
699 : impl Eq for ConfigurableSemaphore {}
700 :
701 : impl ConfigurableSemaphore {
702 0 : pub fn inner(&self) -> &std::sync::Arc<tokio::sync::Semaphore> {
703 0 : &self.inner
704 0 : }
705 : }
706 :
707 : #[cfg(test)]
708 : mod tests {
709 :
710 : use std::time::Duration;
711 :
712 : use camino::Utf8PathBuf;
713 : use rstest::rstest;
714 : use utils::id::NodeId;
715 :
716 : use super::PageServerConf;
717 :
718 : #[test]
719 1 : fn test_minimal_config_toml_is_valid() {
720 : // The minimal valid config for running a pageserver:
721 : // - control_plane_api is mandatory, as pageservers cannot run in isolation
722 : // - we use Default impl of everything else in this situation
723 1 : let input = r#"
724 1 : control_plane_api = "http://localhost:6666"
725 1 : "#;
726 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
727 1 : .expect("empty config is valid");
728 1 : let workdir = Utf8PathBuf::from("/nonexistent");
729 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
730 1 : .expect("parse_and_validate");
731 1 : }
732 :
733 : #[test]
734 1 : fn test_config_tracing_endpoint_is_invalid() {
735 1 : let input = r#"
736 1 : control_plane_api = "http://localhost:6666"
737 1 :
738 1 : [tracing]
739 1 :
740 1 : sampling_ratio = { numerator = 1, denominator = 0 }
741 1 :
742 1 : [tracing.export_config]
743 1 : endpoint = "localhost:4317"
744 1 : protocol = "http-binary"
745 1 : timeout = "1ms"
746 1 : "#;
747 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
748 1 : .expect("config has valid fields");
749 1 : let workdir = Utf8PathBuf::from("/nonexistent");
750 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
751 1 : .expect_err("parse_and_validate should fail for endpoint without scheme");
752 1 : }
753 :
754 : #[rstest]
755 : #[case(32, 32, true)]
756 : #[case(64, 32, false)]
757 : #[case(64, 64, true)]
758 : #[case(128, 128, true)]
759 : fn test_config_max_batch_size_is_valid(
760 : #[case] max_batch_size: usize,
761 : #[case] max_get_vectored_keys: usize,
762 : #[case] is_valid: bool,
763 : ) {
764 : let input = format!(
765 : r#"
766 : control_plane_api = "http://localhost:6666"
767 : max_get_vectored_keys = {max_get_vectored_keys}
768 : page_service_pipelining = {{ mode="pipelined", execution="concurrent-futures", max_batch_size={max_batch_size}, batching="uniform-lsn" }}
769 : "#,
770 : );
771 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(&input)
772 : .expect("config has valid fields");
773 : let workdir = Utf8PathBuf::from("/nonexistent");
774 : let result = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir);
775 : assert_eq!(result.is_ok(), is_valid);
776 : }
777 :
778 : #[test]
779 1 : fn test_config_posthog_config_is_valid() {
780 1 : let input = r#"
781 1 : control_plane_api = "http://localhost:6666"
782 1 :
783 1 : [posthog_config]
784 1 : server_api_key = "phs_AAA"
785 1 : client_api_key = "phc_BBB"
786 1 : project_id = "000"
787 1 : private_api_url = "https://us.posthog.com"
788 1 : public_api_url = "https://us.i.posthog.com"
789 1 : "#;
790 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
791 1 : .expect("posthogconfig is valid");
792 1 : let workdir = Utf8PathBuf::from("/nonexistent");
793 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
794 1 : .expect("parse_and_validate");
795 1 : }
796 :
797 : #[test]
798 1 : fn test_config_posthog_incomplete_config_is_valid() {
799 1 : let input = r#"
800 1 : control_plane_api = "http://localhost:6666"
801 1 :
802 1 : [posthog_config]
803 1 : server_api_key = "phs_AAA"
804 1 : private_api_url = "https://us.posthog.com"
805 1 : public_api_url = "https://us.i.posthog.com"
806 1 : "#;
807 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
808 1 : .expect("posthogconfig is valid");
809 1 : let workdir = Utf8PathBuf::from("/nonexistent");
810 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
811 1 : .expect("parse_and_validate");
812 1 : }
813 :
814 : #[test]
815 1 : fn test_config_disk_usage_based_eviction_is_valid() {
816 1 : let input = r#"
817 1 : control_plane_api = "http://localhost:6666"
818 1 : "#;
819 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
820 1 : .expect("disk_usage_based_eviction is valid");
821 1 : let workdir = Utf8PathBuf::from("/nonexistent");
822 1 : let config = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir).unwrap();
823 1 : let disk_usage_based_eviction = config.disk_usage_based_eviction.unwrap();
824 1 : assert_eq!(disk_usage_based_eviction.max_usage_pct.get(), 80);
825 1 : assert_eq!(disk_usage_based_eviction.min_avail_bytes, 2_000_000_000);
826 1 : assert_eq!(disk_usage_based_eviction.period, Duration::from_secs(60));
827 1 : assert_eq!(disk_usage_based_eviction.eviction_order, Default::default());
828 1 : }
829 : }
|