Line data Source code
1 : //! Functions for handling page server configuration options
2 : //!
3 : //! Configuration options can be set in the pageserver.toml configuration
4 : //! file, or on the command line.
5 : //! See also `settings.md` for better description on every parameter.
6 :
7 : pub mod ignored_fields;
8 :
9 : use std::env;
10 : use std::num::NonZeroUsize;
11 : use std::sync::Arc;
12 : use std::time::Duration;
13 :
14 : use anyhow::{Context, bail, ensure};
15 : use camino::{Utf8Path, Utf8PathBuf};
16 : use once_cell::sync::OnceCell;
17 : use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes};
18 : use pageserver_api::models::ImageCompressionAlgorithm;
19 : use pageserver_api::shard::TenantShardId;
20 : use pem::Pem;
21 : use postgres_backend::AuthType;
22 : use remote_storage::{RemotePath, RemoteStorageConfig};
23 : use reqwest::Url;
24 : use storage_broker::Uri;
25 : use utils::id::{NodeId, TimelineId};
26 : use utils::logging::{LogFormat, SecretString};
27 : use utils::postgres_client::PostgresClientProtocol;
28 :
29 : use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
30 : use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
31 : use crate::virtual_file::io_engine;
32 : use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file};
33 :
34 : /// Global state of pageserver.
35 : ///
36 : /// It's mostly immutable configuration, but some semaphores and the
37 : /// like crept in over time and the name stuck.
38 : ///
39 : /// Instantiated by deserializing `pageserver.toml` into [`pageserver_api::config::ConfigToml`]
40 : /// and passing that to [`PageServerConf::parse_and_validate`].
41 : ///
42 : /// # Adding a New Field
43 : ///
44 : /// 1. Add the field to `pageserver_api::config::ConfigToml`.
45 : /// 2. Fix compiler errors (exhaustive destructuring will guide you).
46 : ///
47 : /// For fields that require additional validation or filling in of defaults at runtime,
48 : /// check for examples in the [`PageServerConf::parse_and_validate`] method.
49 : #[derive(Debug, Clone)]
50 : pub struct PageServerConf {
51 : // Identifier of that particular pageserver so e g safekeepers
52 : // can safely distinguish different pageservers
53 : pub id: NodeId,
54 :
55 : /// Example (default): 127.0.0.1:64000
56 : pub listen_pg_addr: String,
57 : /// Example (default): 127.0.0.1:9898
58 : pub listen_http_addr: String,
59 : /// Example: 127.0.0.1:9899
60 : pub listen_https_addr: Option<String>,
61 : /// If set, expose a gRPC API on this address.
62 : /// Example: 127.0.0.1:51051
63 : ///
64 : /// EXPERIMENTAL: this protocol is unstable and under active development.
65 : pub listen_grpc_addr: Option<String>,
66 : /// If true, enable TLS for the gRPC server, using ssl_key_file and ssl_cert_file.
67 : pub listen_grpc_tls: bool,
68 :
69 : /// Path to a file with certificate's private key for https and gRPC API.
70 : /// Default: server.key
71 : pub ssl_key_file: Utf8PathBuf,
72 : /// Path to a file with a X509 certificate for https and gRPC API.
73 : /// Default: server.crt
74 : pub ssl_cert_file: Utf8PathBuf,
75 : /// Period to reload certificate and private key from files.
76 : /// Default: 60s.
77 : pub ssl_cert_reload_period: Duration,
78 : /// Trusted root CA certificates to use in https APIs in PEM format.
79 : pub ssl_ca_certs: Vec<Pem>,
80 :
81 : /// Current availability zone. Used for traffic metrics.
82 : pub availability_zone: Option<String>,
83 :
84 : // Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
85 : pub wait_lsn_timeout: Duration,
86 : // How long to wait for WAL redo to complete.
87 : pub wal_redo_timeout: Duration,
88 :
89 : pub superuser: String,
90 : pub locale: String,
91 :
92 : pub page_cache_size: usize,
93 : pub max_file_descriptors: usize,
94 :
95 : // Repository directory, relative to current working directory.
96 : // Normally, the page server changes the current working directory
97 : // to the repository, and 'workdir' is always '.'. But we don't do
98 : // that during unit testing, because the current directory is global
99 : // to the process but different unit tests work on different
100 : // repositories.
101 : pub workdir: Utf8PathBuf,
102 :
103 : pub pg_distrib_dir: Utf8PathBuf,
104 :
105 : // Authentication
106 : /// authentication method for the HTTP mgmt API
107 : pub http_auth_type: AuthType,
108 : /// authentication method for libpq connections from compute
109 : pub pg_auth_type: AuthType,
110 : /// authentication method for gRPC connections from compute
111 : pub grpc_auth_type: AuthType,
112 : /// Path to a file or directory containing public key(s) for verifying JWT tokens.
113 : /// Used for both mgmt and compute auth, if enabled.
114 : pub auth_validation_public_key_path: Option<Utf8PathBuf>,
115 :
116 : pub remote_storage_config: Option<RemoteStorageConfig>,
117 :
118 : pub default_tenant_conf: pageserver_api::config::TenantConfigToml,
119 :
120 : /// Storage broker endpoints to connect to.
121 : pub broker_endpoint: Uri,
122 : pub broker_keepalive_interval: Duration,
123 :
124 : pub log_format: LogFormat,
125 :
126 : /// Number of tenants which will be concurrently loaded from remote storage proactively on startup or attach.
127 : ///
128 : /// A lower value implicitly deprioritizes loading such tenants, vs. other work in the system.
129 : pub concurrent_tenant_warmup: ConfigurableSemaphore,
130 :
131 : /// Number of concurrent [`TenantShard::gather_size_inputs`](crate::tenant::TenantShard::gather_size_inputs) allowed.
132 : pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
133 : /// Limit of concurrent [`TenantShard::gather_size_inputs`] issued by module `eviction_task`.
134 : /// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
135 : /// See the comment in `eviction_task` for details.
136 : ///
137 : /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
138 : pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,
139 :
140 : // How often to collect metrics and send them to the metrics endpoint.
141 : pub metric_collection_interval: Duration,
142 : // How often to send unchanged cached metrics to the metrics endpoint.
143 : pub metric_collection_endpoint: Option<Url>,
144 : pub metric_collection_bucket: Option<RemoteStorageConfig>,
145 : pub synthetic_size_calculation_interval: Duration,
146 :
147 : pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
148 :
149 : pub test_remote_failures: u64,
150 :
151 : pub ondemand_download_behavior_treat_error_as_warn: bool,
152 :
153 : /// How long will background tasks be delayed at most after initial load of tenants.
154 : ///
155 : /// Our largest initialization completions are in the range of 100-200s, so perhaps 10s works
156 : /// as we now isolate initial loading, initial logical size calculation and background tasks.
157 : /// Smaller nodes will have background tasks "not running" for this long unless every timeline
158 : /// has it's initial logical size calculated. Not running background tasks for some seconds is
159 : /// not terrible.
160 : pub background_task_maximum_delay: Duration,
161 :
162 : pub control_plane_api: Url,
163 :
164 : /// JWT token for use with the control plane API.
165 : pub control_plane_api_token: Option<SecretString>,
166 :
167 : pub import_pgdata_upcall_api: Option<Url>,
168 : pub import_pgdata_upcall_api_token: Option<SecretString>,
169 : pub import_pgdata_aws_endpoint_url: Option<Url>,
170 :
171 : /// If true, pageserver will make best-effort to operate without a control plane: only
172 : /// for use in major incidents.
173 : pub control_plane_emergency_mode: bool,
174 :
175 : /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize
176 : /// heatmap uploads vs. other remote storage operations.
177 : pub heatmap_upload_concurrency: usize,
178 :
179 : /// How many remote storage downloads may be done for secondary tenants concurrently. Implicitly
180 : /// deprioritises secondary downloads vs. remote storage operations for attached tenants.
181 : pub secondary_download_concurrency: usize,
182 :
183 : /// Maximum number of WAL records to be ingested and committed at the same time
184 : pub ingest_batch_size: u64,
185 :
186 : pub virtual_file_io_engine: virtual_file::IoEngineKind,
187 :
188 : pub max_vectored_read_bytes: MaxVectoredReadBytes,
189 :
190 : pub image_compression: ImageCompressionAlgorithm,
191 :
192 : /// Whether to offload archived timelines automatically
193 : pub timeline_offloading: bool,
194 :
195 : /// How many bytes of ephemeral layer content will we allow per kilobyte of RAM. When this
196 : /// is exceeded, we start proactively closing ephemeral layers to limit the total amount
197 : /// of ephemeral data.
198 : ///
199 : /// Setting this to zero disables limits on total ephemeral layer size.
200 : pub ephemeral_bytes_per_memory_kb: usize,
201 :
202 : pub l0_flush: crate::l0_flush::L0FlushConfig,
203 :
204 : /// Direct IO settings
205 : pub virtual_file_io_mode: virtual_file::IoMode,
206 :
207 : /// Optionally disable disk syncs (unsafe!)
208 : pub no_sync: bool,
209 :
210 : pub wal_receiver_protocol: PostgresClientProtocol,
211 :
212 : pub page_service_pipelining: pageserver_api::config::PageServicePipeliningConfig,
213 :
214 : pub get_vectored_concurrent_io: pageserver_api::config::GetVectoredConcurrentIo,
215 :
216 : /// Enable read path debugging. If enabled, read key errors will print a backtrace of the layer
217 : /// files read.
218 : pub enable_read_path_debugging: bool,
219 :
220 : /// Interpreted protocol feature: if enabled, validate that the logical WAL received from
221 : /// safekeepers does not have gaps.
222 : pub validate_wal_contiguity: bool,
223 :
224 : /// When set, the previously written to disk heatmap is loaded on tenant attach and used
225 : /// to avoid clobbering the heatmap from new, cold, attached locations.
226 : pub load_previous_heatmap: bool,
227 :
228 : /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline.
229 : pub generate_unarchival_heatmap: bool,
230 :
231 : pub tracing: Option<pageserver_api::config::Tracing>,
232 :
233 : /// Enable TLS in the libpq page service API.
234 : /// Does not force TLS: the client negotiates TLS usage during the handshake.
235 : /// Uses key and certificate from ssl_key_file/ssl_cert_file.
236 : pub enable_tls_page_service_api: bool,
237 :
238 : /// Run in development mode, which disables certain safety checks
239 : /// such as authentication requirements for HTTP and PostgreSQL APIs.
240 : /// This is insecure and should only be used in development environments.
241 : pub dev_mode: bool,
242 :
243 : pub timeline_import_config: pageserver_api::config::TimelineImportConfig,
244 :
245 : pub basebackup_cache_config: Option<pageserver_api::config::BasebackupCacheConfig>,
246 : }
247 :
248 : /// Token for authentication to safekeepers
249 : ///
250 : /// We do not want to store this in a PageServerConf because the latter may be logged
251 : /// and/or serialized at a whim, while the token is secret. Currently this token is the
252 : /// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in
253 : /// the future, more tokens and auth may arrive for storage broker, completely changing the logic.
254 : /// Hence, we resort to a global variable for now instead of passing the token from the
255 : /// startup code to the connection code through a dozen layers.
256 : pub static SAFEKEEPER_AUTH_TOKEN: OnceCell<Arc<String>> = OnceCell::new();
257 :
258 : impl PageServerConf {
259 : //
260 : // Repository paths, relative to workdir.
261 : //
262 :
263 3938 : pub fn tenants_path(&self) -> Utf8PathBuf {
264 3938 : self.workdir.join(TENANTS_SEGMENT_NAME)
265 3938 : }
266 :
267 36 : pub fn deletion_prefix(&self) -> Utf8PathBuf {
268 36 : self.workdir.join("deletion")
269 36 : }
270 :
271 0 : pub fn metadata_path(&self) -> Utf8PathBuf {
272 0 : self.workdir.join("metadata.json")
273 0 : }
274 :
275 0 : pub fn basebackup_cache_dir(&self) -> Utf8PathBuf {
276 0 : self.workdir.join("basebackup_cache")
277 0 : }
278 :
279 14 : pub fn deletion_list_path(&self, sequence: u64) -> Utf8PathBuf {
280 : // Encode a version in the filename, so that if we ever switch away from JSON we can
281 : // increment this.
282 : const VERSION: u8 = 1;
283 :
284 14 : self.deletion_prefix()
285 14 : .join(format!("{sequence:016x}-{VERSION:02x}.list"))
286 14 : }
287 :
288 12 : pub fn deletion_header_path(&self) -> Utf8PathBuf {
289 : // Encode a version in the filename, so that if we ever switch away from JSON we can
290 : // increment this.
291 : const VERSION: u8 = 1;
292 :
293 12 : self.deletion_prefix().join(format!("header-{VERSION:02x}"))
294 12 : }
295 :
296 3911 : pub fn tenant_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
297 3911 : self.tenants_path().join(tenant_shard_id.to_string())
298 3911 : }
299 :
300 : /// Points to a place in pageserver's local directory,
301 : /// where certain tenant's LocationConf be stored.
302 0 : pub(crate) fn tenant_location_config_path(
303 0 : &self,
304 0 : tenant_shard_id: &TenantShardId,
305 0 : ) -> Utf8PathBuf {
306 0 : self.tenant_path(tenant_shard_id)
307 0 : .join(TENANT_LOCATION_CONFIG_NAME)
308 0 : }
309 :
310 117 : pub(crate) fn tenant_heatmap_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
311 117 : self.tenant_path(tenant_shard_id)
312 117 : .join(TENANT_HEATMAP_BASENAME)
313 117 : }
314 :
315 3674 : pub fn timelines_path(&self, tenant_shard_id: &TenantShardId) -> Utf8PathBuf {
316 3674 : self.tenant_path(tenant_shard_id)
317 3674 : .join(TIMELINES_SEGMENT_NAME)
318 3674 : }
319 :
320 3437 : pub fn timeline_path(
321 3437 : &self,
322 3437 : tenant_shard_id: &TenantShardId,
323 3437 : timeline_id: &TimelineId,
324 3437 : ) -> Utf8PathBuf {
325 3437 : self.timelines_path(tenant_shard_id)
326 3437 : .join(timeline_id.to_string())
327 3437 : }
328 :
329 : /// Turns storage remote path of a file into its local path.
330 0 : pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
331 0 : remote_path.with_base(&self.workdir)
332 0 : }
333 :
334 : //
335 : // Postgres distribution paths
336 : //
337 12 : pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<Utf8PathBuf> {
338 12 : let path = self.pg_distrib_dir.clone();
339 12 :
340 12 : #[allow(clippy::manual_range_patterns)]
341 12 : match pg_version {
342 12 : 14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
343 0 : _ => bail!("Unsupported postgres version: {}", pg_version),
344 : }
345 12 : }
346 :
347 6 : pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<Utf8PathBuf> {
348 6 : Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
349 6 : }
350 6 : pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<Utf8PathBuf> {
351 6 : Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
352 6 : }
353 :
354 : /// Parse a configuration file (pageserver.toml) into a PageServerConf struct,
355 : /// validating the input and failing on errors.
356 : ///
357 : /// This leaves any options not present in the file in the built-in defaults.
358 127 : pub fn parse_and_validate(
359 127 : id: NodeId,
360 127 : config_toml: pageserver_api::config::ConfigToml,
361 127 : workdir: &Utf8Path,
362 127 : ) -> anyhow::Result<Self> {
363 127 : let pageserver_api::config::ConfigToml {
364 127 : listen_pg_addr,
365 127 : listen_http_addr,
366 127 : listen_https_addr,
367 127 : listen_grpc_addr,
368 127 : listen_grpc_tls,
369 127 : ssl_key_file,
370 127 : ssl_cert_file,
371 127 : ssl_cert_reload_period,
372 127 : ssl_ca_file,
373 127 : availability_zone,
374 127 : wait_lsn_timeout,
375 127 : wal_redo_timeout,
376 127 : superuser,
377 127 : locale,
378 127 : page_cache_size,
379 127 : max_file_descriptors,
380 127 : pg_distrib_dir,
381 127 : http_auth_type,
382 127 : pg_auth_type,
383 127 : grpc_auth_type,
384 127 : auth_validation_public_key_path,
385 127 : remote_storage,
386 127 : broker_endpoint,
387 127 : broker_keepalive_interval,
388 127 : log_format,
389 127 : metric_collection_interval,
390 127 : metric_collection_endpoint,
391 127 : metric_collection_bucket,
392 127 : synthetic_size_calculation_interval,
393 127 : disk_usage_based_eviction,
394 127 : test_remote_failures,
395 127 : ondemand_download_behavior_treat_error_as_warn,
396 127 : background_task_maximum_delay,
397 127 : control_plane_api,
398 127 : control_plane_api_token,
399 127 : control_plane_emergency_mode,
400 127 : import_pgdata_upcall_api,
401 127 : import_pgdata_upcall_api_token,
402 127 : import_pgdata_aws_endpoint_url,
403 127 : heatmap_upload_concurrency,
404 127 : secondary_download_concurrency,
405 127 : ingest_batch_size,
406 127 : max_vectored_read_bytes,
407 127 : image_compression,
408 127 : timeline_offloading,
409 127 : ephemeral_bytes_per_memory_kb,
410 127 : l0_flush,
411 127 : virtual_file_io_mode,
412 127 : concurrent_tenant_warmup,
413 127 : concurrent_tenant_size_logical_size_queries,
414 127 : virtual_file_io_engine,
415 127 : tenant_config,
416 127 : no_sync,
417 127 : wal_receiver_protocol,
418 127 : page_service_pipelining,
419 127 : get_vectored_concurrent_io,
420 127 : enable_read_path_debugging,
421 127 : validate_wal_contiguity,
422 127 : load_previous_heatmap,
423 127 : generate_unarchival_heatmap,
424 127 : tracing,
425 127 : enable_tls_page_service_api,
426 127 : dev_mode,
427 127 : timeline_import_config,
428 127 : basebackup_cache_config,
429 127 : } = config_toml;
430 :
431 127 : let mut conf = PageServerConf {
432 : // ------------------------------------------------------------
433 : // fields that are already fully validated by the ConfigToml Deserialize impl
434 : // ------------------------------------------------------------
435 127 : listen_pg_addr,
436 127 : listen_http_addr,
437 127 : listen_https_addr,
438 127 : listen_grpc_addr,
439 127 : listen_grpc_tls,
440 127 : ssl_key_file,
441 127 : ssl_cert_file,
442 127 : ssl_cert_reload_period,
443 127 : availability_zone,
444 127 : wait_lsn_timeout,
445 127 : wal_redo_timeout,
446 127 : superuser,
447 127 : locale,
448 127 : page_cache_size,
449 127 : max_file_descriptors,
450 127 : http_auth_type,
451 127 : pg_auth_type,
452 127 : grpc_auth_type,
453 127 : auth_validation_public_key_path,
454 127 : remote_storage_config: remote_storage,
455 127 : broker_endpoint,
456 127 : broker_keepalive_interval,
457 127 : log_format,
458 127 : metric_collection_interval,
459 127 : metric_collection_endpoint,
460 127 : metric_collection_bucket,
461 127 : synthetic_size_calculation_interval,
462 127 : disk_usage_based_eviction,
463 127 : test_remote_failures,
464 127 : ondemand_download_behavior_treat_error_as_warn,
465 127 : background_task_maximum_delay,
466 127 : control_plane_api: control_plane_api
467 127 : .ok_or_else(|| anyhow::anyhow!("`control_plane_api` must be set"))?,
468 127 : control_plane_emergency_mode,
469 127 : heatmap_upload_concurrency,
470 127 : secondary_download_concurrency,
471 127 : ingest_batch_size,
472 127 : max_vectored_read_bytes,
473 127 : image_compression,
474 127 : timeline_offloading,
475 127 : ephemeral_bytes_per_memory_kb,
476 127 : import_pgdata_upcall_api,
477 127 : import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),
478 127 : import_pgdata_aws_endpoint_url,
479 127 : wal_receiver_protocol,
480 127 : page_service_pipelining,
481 127 : get_vectored_concurrent_io,
482 127 : tracing,
483 127 : enable_tls_page_service_api,
484 127 : dev_mode,
485 127 : timeline_import_config,
486 127 : basebackup_cache_config,
487 127 :
488 127 : // ------------------------------------------------------------
489 127 : // fields that require additional validation or custom handling
490 127 : // ------------------------------------------------------------
491 127 : workdir: workdir.to_owned(),
492 127 : pg_distrib_dir: pg_distrib_dir.unwrap_or_else(|| {
493 2 : std::env::current_dir()
494 2 : .expect("current_dir() failed")
495 2 : .try_into()
496 2 : .expect("current_dir() is not a valid Utf8Path")
497 127 : }),
498 127 : control_plane_api_token: control_plane_api_token.map(SecretString::from),
499 127 : id,
500 127 : default_tenant_conf: tenant_config,
501 127 : concurrent_tenant_warmup: ConfigurableSemaphore::new(concurrent_tenant_warmup),
502 127 : concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(
503 127 : concurrent_tenant_size_logical_size_queries,
504 127 : ),
505 127 : eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::new(
506 127 : // re-use `concurrent_tenant_size_logical_size_queries`
507 127 : concurrent_tenant_size_logical_size_queries,
508 127 : ),
509 127 : virtual_file_io_engine: match virtual_file_io_engine {
510 0 : Some(v) => v,
511 127 : None => match crate::virtual_file::io_engine_feature_test()
512 127 : .context("auto-detect virtual_file_io_engine")?
513 : {
514 127 : io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise
515 0 : io_engine::FeatureTestResult::Worse { engine, remark } => {
516 0 : // TODO: bubble this up to the caller so we can tracing::warn! it.
517 0 : eprintln!(
518 0 : "auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"
519 0 : );
520 0 : engine
521 : }
522 : },
523 : },
524 127 : l0_flush: l0_flush
525 127 : .map(crate::l0_flush::L0FlushConfig::from)
526 127 : .unwrap_or_default(),
527 127 : virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
528 127 : no_sync: no_sync.unwrap_or(false),
529 127 : enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false),
530 127 : validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false),
531 127 : load_previous_heatmap: load_previous_heatmap.unwrap_or(true),
532 127 : generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true),
533 127 : ssl_ca_certs: match ssl_ca_file {
534 0 : Some(ssl_ca_file) => {
535 0 : let buf = std::fs::read(ssl_ca_file)?;
536 0 : pem::parse_many(&buf)?
537 0 : .into_iter()
538 0 : .filter(|pem| pem.tag() == "CERTIFICATE")
539 0 : .collect()
540 : }
541 127 : None => Vec::new(),
542 : },
543 : };
544 :
545 : // ------------------------------------------------------------
546 : // custom validation code that covers more than one field in isolation
547 : // ------------------------------------------------------------
548 :
549 127 : if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]
550 127 : .contains(&AuthType::NeonJWT)
551 : {
552 0 : let auth_validation_public_key_path = conf
553 0 : .auth_validation_public_key_path
554 0 : .get_or_insert_with(|| workdir.join("auth_public_key.pem"));
555 0 : ensure!(
556 0 : auth_validation_public_key_path.exists(),
557 0 : format!(
558 0 : "Can't find auth_validation_public_key at '{auth_validation_public_key_path}'",
559 0 : )
560 : );
561 127 : }
562 :
563 127 : if let Some(tracing_config) = conf.tracing.as_ref() {
564 1 : let ratio = &tracing_config.sampling_ratio;
565 1 : ensure!(
566 1 : ratio.denominator != 0 && ratio.denominator >= ratio.numerator,
567 1 : format!(
568 1 : "Invalid sampling ratio: {}/{}",
569 1 : ratio.numerator, ratio.denominator
570 1 : )
571 : );
572 :
573 0 : let url = Url::parse(&tracing_config.export_config.endpoint)
574 0 : .map_err(anyhow::Error::msg)
575 0 : .with_context(|| {
576 0 : format!(
577 0 : "tracing endpoint URL is invalid : {}",
578 0 : tracing_config.export_config.endpoint
579 0 : )
580 0 : })?;
581 :
582 0 : ensure!(
583 0 : url.scheme() == "http" || url.scheme() == "https",
584 0 : format!(
585 0 : "tracing endpoint URL must start with http:// or https://: {}",
586 0 : tracing_config.export_config.endpoint
587 0 : )
588 : );
589 126 : }
590 :
591 126 : IndexEntry::validate_checkpoint_distance(conf.default_tenant_conf.checkpoint_distance)
592 126 : .map_err(anyhow::Error::msg)
593 126 : .with_context(|| {
594 0 : format!(
595 0 : "effective checkpoint distance is unsupported: {}",
596 0 : conf.default_tenant_conf.checkpoint_distance
597 0 : )
598 126 : })?;
599 :
600 126 : Ok(conf)
601 127 : }
602 :
603 : #[cfg(test)]
604 125 : pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf {
605 125 : let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into());
606 125 :
607 125 : let test_id = uuid::Uuid::new_v4();
608 125 : Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}_{test_id}"))
609 125 : }
610 :
611 125 : pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self {
612 125 : let pg_distrib_dir = Utf8PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../pg_install");
613 125 :
614 125 : let config_toml = pageserver_api::config::ConfigToml {
615 125 : wait_lsn_timeout: Duration::from_secs(60),
616 125 : wal_redo_timeout: Duration::from_secs(60),
617 125 : pg_distrib_dir: Some(pg_distrib_dir),
618 125 : metric_collection_interval: Duration::from_secs(60),
619 125 : synthetic_size_calculation_interval: Duration::from_secs(60),
620 125 : background_task_maximum_delay: Duration::ZERO,
621 125 : load_previous_heatmap: Some(true),
622 125 : generate_unarchival_heatmap: Some(true),
623 125 : control_plane_api: Some(Url::parse("http://localhost:6666").unwrap()),
624 125 : ..Default::default()
625 125 : };
626 125 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
627 125 : }
628 : }
629 :
630 0 : #[derive(serde::Deserialize, serde::Serialize)]
631 : pub struct PageserverIdentity {
632 : pub id: NodeId,
633 : }
634 :
635 : /// Configurable semaphore permits setting.
636 : ///
637 : /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty
638 : /// semaphore cannot be distinguished, leading any feature using these to await forever (or until
639 : /// new permits are added).
640 : #[derive(Debug, Clone)]
641 : pub struct ConfigurableSemaphore {
642 : initial_permits: NonZeroUsize,
643 : inner: std::sync::Arc<tokio::sync::Semaphore>,
644 : }
645 :
646 : impl ConfigurableSemaphore {
647 : /// Initializse using a non-zero amount of permits.
648 : ///
649 : /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
650 : /// feature such as [`TenantShard::gather_size_inputs`]. Otherwise any semaphore using future will
651 : /// behave like [`futures::future::pending`], just waiting until new permits are added.
652 : ///
653 : /// [`TenantShard::gather_size_inputs`]: crate::tenant::TenantShard::gather_size_inputs
654 381 : pub fn new(initial_permits: NonZeroUsize) -> Self {
655 381 : ConfigurableSemaphore {
656 381 : initial_permits,
657 381 : inner: std::sync::Arc::new(tokio::sync::Semaphore::new(initial_permits.get())),
658 381 : }
659 381 : }
660 :
661 : /// Returns the configured amount of permits.
662 0 : pub fn initial_permits(&self) -> NonZeroUsize {
663 0 : self.initial_permits
664 0 : }
665 : }
666 :
667 : impl PartialEq for ConfigurableSemaphore {
668 0 : fn eq(&self, other: &Self) -> bool {
669 0 : // the number of permits can be increased at runtime, so we cannot really fulfill the
670 0 : // PartialEq value equality otherwise
671 0 : self.initial_permits == other.initial_permits
672 0 : }
673 : }
674 :
675 : impl Eq for ConfigurableSemaphore {}
676 :
677 : impl ConfigurableSemaphore {
678 0 : pub fn inner(&self) -> &std::sync::Arc<tokio::sync::Semaphore> {
679 0 : &self.inner
680 0 : }
681 : }
682 :
683 : #[cfg(test)]
684 : mod tests {
685 :
686 : use camino::Utf8PathBuf;
687 : use utils::id::NodeId;
688 :
689 : use super::PageServerConf;
690 :
691 : #[test]
692 1 : fn test_minimal_config_toml_is_valid() {
693 1 : // The minimal valid config for running a pageserver:
694 1 : // - control_plane_api is mandatory, as pageservers cannot run in isolation
695 1 : // - we use Default impl of everything else in this situation
696 1 : let input = r#"
697 1 : control_plane_api = "http://localhost:6666"
698 1 : "#;
699 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
700 1 : .expect("empty config is valid");
701 1 : let workdir = Utf8PathBuf::from("/nonexistent");
702 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
703 1 : .expect("parse_and_validate");
704 1 : }
705 :
706 : #[test]
707 1 : fn test_config_tracing_endpoint_is_invalid() {
708 1 : let input = r#"
709 1 : control_plane_api = "http://localhost:6666"
710 1 :
711 1 : [tracing]
712 1 :
713 1 : sampling_ratio = { numerator = 1, denominator = 0 }
714 1 :
715 1 : [tracing.export_config]
716 1 : endpoint = "localhost:4317"
717 1 : protocol = "http-binary"
718 1 : timeout = "1ms"
719 1 : "#;
720 1 : let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
721 1 : .expect("config has valid fields");
722 1 : let workdir = Utf8PathBuf::from("/nonexistent");
723 1 : PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
724 1 : .expect_err("parse_and_validate should fail for endpoint without scheme");
725 1 : }
726 : }
|