Line data Source code
1 : //! Functions for handling per-tenant configuration options
2 : //!
3 : //! If tenant is created with --config option,
4 : //! the tenant-specific config will be stored in tenant's directory.
5 : //! Otherwise, global pageserver's config is used.
6 : //!
7 : //! If the tenant config file is corrupted, the tenant will be disabled.
8 : //! We cannot use global or default config instead, because wrong settings
9 : //! may lead to a data loss.
10 : //!
11 : use anyhow::Context;
12 : use pageserver_api::models;
13 : use serde::{Deserialize, Serialize};
14 : use std::num::NonZeroU64;
15 : use std::time::Duration;
16 :
17 : pub mod defaults {
18 : // FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
19 : // would be more appropriate. But a low value forces the code to be exercised more,
20 : // which is good for now to trigger bugs.
21 : // This parameter actually determines L0 layer file size.
22 : pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
23 : pub const DEFAULT_CHECKPOINT_TIMEOUT: &str = "10 m";
24 :
25 : // Target file size, when creating image and delta layers.
26 : // This parameter determines L1 layer file size.
27 : pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
28 :
29 : pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
30 : pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
31 :
32 : pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
33 :
34 : // Large DEFAULT_GC_PERIOD is fine as long as PITR_INTERVAL is larger.
35 : // If there's a need to decrease this value, first make sure that GC
36 : // doesn't hold a layer map write lock for non-trivial operations.
37 : // Relevant: https://github.com/neondatabase/neon/issues/3394
38 : pub const DEFAULT_GC_PERIOD: &str = "1 hr";
39 : pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
40 : pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
41 : pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds";
42 : pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
43 : pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
44 : pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
45 : }
46 :
47 : /// Per-tenant configuration options
48 40 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
49 : pub struct TenantConf {
50 : // Flush out an inmemory layer, if it's holding WAL older than this
51 : // This puts a backstop on how much WAL needs to be re-digested if the
52 : // page server crashes.
53 : // This parameter actually determines L0 layer file size.
54 : pub checkpoint_distance: u64,
55 : // Inmemory layer is also flushed at least once in checkpoint_timeout to
56 : // eventually upload WAL after activity is stopped.
57 : #[serde(with = "humantime_serde")]
58 : pub checkpoint_timeout: Duration,
59 : // Target file size, when creating image and delta layers.
60 : // This parameter determines L1 layer file size.
61 : pub compaction_target_size: u64,
62 : // How often to check if there's compaction work to be done.
63 : // Duration::ZERO means automatic compaction is disabled.
64 : #[serde(with = "humantime_serde")]
65 : pub compaction_period: Duration,
66 : // Level0 delta layer threshold for compaction.
67 : pub compaction_threshold: usize,
68 : // Determines how much history is retained, to allow
69 : // branching and read replicas at an older point in time.
70 : // The unit is #of bytes of WAL.
71 : // Page versions older than this are garbage collected away.
72 : pub gc_horizon: u64,
73 : // Interval at which garbage collection is triggered.
74 : // Duration::ZERO means automatic GC is disabled
75 : #[serde(with = "humantime_serde")]
76 : pub gc_period: Duration,
77 : // Delta layer churn threshold to create L1 image layers.
78 : pub image_creation_threshold: usize,
79 : // Determines how much history is retained, to allow
80 : // branching and read replicas at an older point in time.
81 : // The unit is time.
82 : // Page versions older than this are garbage collected away.
83 : #[serde(with = "humantime_serde")]
84 : pub pitr_interval: Duration,
85 : /// Maximum amount of time to wait while opening a connection to receive wal, before erroring.
86 : #[serde(with = "humantime_serde")]
87 : pub walreceiver_connect_timeout: Duration,
88 : /// Considers safekeepers stalled after no WAL updates were received longer than this threshold.
89 : /// A stalled safekeeper will be changed to a newer one when it appears.
90 : #[serde(with = "humantime_serde")]
91 : pub lagging_wal_timeout: Duration,
92 : /// Considers safekeepers lagging when their WAL is behind another safekeeper for more than this threshold.
93 : /// A lagging safekeeper will be changed after `lagging_wal_timeout` time elapses since the last WAL update,
94 : /// to avoid eager reconnects.
95 : pub max_lsn_wal_lag: NonZeroU64,
96 : pub trace_read_requests: bool,
97 : pub eviction_policy: EvictionPolicy,
98 : pub min_resident_size_override: Option<u64>,
99 : // See the corresponding metric's help string.
100 : #[serde(with = "humantime_serde")]
101 : pub evictions_low_residence_duration_metric_threshold: Duration,
102 : pub gc_feedback: bool,
103 : }
104 :
105 : /// Same as TenantConf, but this struct preserves the information about
106 : /// which parameters are set and which are not.
107 3930 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
108 : pub struct TenantConfOpt {
109 : #[serde(skip_serializing_if = "Option::is_none")]
110 : #[serde(default)]
111 : pub checkpoint_distance: Option<u64>,
112 :
113 : #[serde(skip_serializing_if = "Option::is_none")]
114 : #[serde(with = "humantime_serde")]
115 : #[serde(default)]
116 : pub checkpoint_timeout: Option<Duration>,
117 :
118 : #[serde(skip_serializing_if = "Option::is_none")]
119 : #[serde(default)]
120 : pub compaction_target_size: Option<u64>,
121 :
122 : #[serde(skip_serializing_if = "Option::is_none")]
123 : #[serde(with = "humantime_serde")]
124 : #[serde(default)]
125 : pub compaction_period: Option<Duration>,
126 :
127 : #[serde(skip_serializing_if = "Option::is_none")]
128 : #[serde(default)]
129 : pub compaction_threshold: Option<usize>,
130 :
131 : #[serde(skip_serializing_if = "Option::is_none")]
132 : #[serde(default)]
133 : pub gc_horizon: Option<u64>,
134 :
135 : #[serde(skip_serializing_if = "Option::is_none")]
136 : #[serde(with = "humantime_serde")]
137 : #[serde(default)]
138 : pub gc_period: Option<Duration>,
139 :
140 : #[serde(skip_serializing_if = "Option::is_none")]
141 : #[serde(default)]
142 : pub image_creation_threshold: Option<usize>,
143 :
144 : #[serde(skip_serializing_if = "Option::is_none")]
145 : #[serde(with = "humantime_serde")]
146 : #[serde(default)]
147 : pub pitr_interval: Option<Duration>,
148 :
149 : #[serde(skip_serializing_if = "Option::is_none")]
150 : #[serde(with = "humantime_serde")]
151 : #[serde(default)]
152 : pub walreceiver_connect_timeout: Option<Duration>,
153 :
154 : #[serde(skip_serializing_if = "Option::is_none")]
155 : #[serde(with = "humantime_serde")]
156 : #[serde(default)]
157 : pub lagging_wal_timeout: Option<Duration>,
158 :
159 : #[serde(skip_serializing_if = "Option::is_none")]
160 : #[serde(default)]
161 : pub max_lsn_wal_lag: Option<NonZeroU64>,
162 :
163 : #[serde(skip_serializing_if = "Option::is_none")]
164 : #[serde(default)]
165 : pub trace_read_requests: Option<bool>,
166 :
167 : #[serde(skip_serializing_if = "Option::is_none")]
168 : #[serde(default)]
169 : pub eviction_policy: Option<EvictionPolicy>,
170 :
171 : #[serde(skip_serializing_if = "Option::is_none")]
172 : #[serde(default)]
173 : pub min_resident_size_override: Option<u64>,
174 :
175 : #[serde(skip_serializing_if = "Option::is_none")]
176 : #[serde(with = "humantime_serde")]
177 : #[serde(default)]
178 : pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
179 :
180 : #[serde(skip_serializing_if = "Option::is_none")]
181 : #[serde(default)]
182 : pub gc_feedback: Option<bool>,
183 : }
184 :
185 55 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
186 : #[serde(tag = "kind")]
187 : pub enum EvictionPolicy {
188 : NoEviction,
189 : LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
190 : }
191 :
192 : impl EvictionPolicy {
193 965 : pub fn discriminant_str(&self) -> &'static str {
194 965 : match self {
195 938 : EvictionPolicy::NoEviction => "NoEviction",
196 27 : EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
197 : }
198 965 : }
199 : }
200 :
201 91 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
202 : pub struct EvictionPolicyLayerAccessThreshold {
203 : #[serde(with = "humantime_serde")]
204 : pub period: Duration,
205 : #[serde(with = "humantime_serde")]
206 : pub threshold: Duration,
207 : }
208 :
209 : impl TenantConfOpt {
210 993 : pub fn merge(&self, global_conf: TenantConf) -> TenantConf {
211 993 : TenantConf {
212 993 : checkpoint_distance: self
213 993 : .checkpoint_distance
214 993 : .unwrap_or(global_conf.checkpoint_distance),
215 993 : checkpoint_timeout: self
216 993 : .checkpoint_timeout
217 993 : .unwrap_or(global_conf.checkpoint_timeout),
218 993 : compaction_target_size: self
219 993 : .compaction_target_size
220 993 : .unwrap_or(global_conf.compaction_target_size),
221 993 : compaction_period: self
222 993 : .compaction_period
223 993 : .unwrap_or(global_conf.compaction_period),
224 993 : compaction_threshold: self
225 993 : .compaction_threshold
226 993 : .unwrap_or(global_conf.compaction_threshold),
227 993 : gc_horizon: self.gc_horizon.unwrap_or(global_conf.gc_horizon),
228 993 : gc_period: self.gc_period.unwrap_or(global_conf.gc_period),
229 993 : image_creation_threshold: self
230 993 : .image_creation_threshold
231 993 : .unwrap_or(global_conf.image_creation_threshold),
232 993 : pitr_interval: self.pitr_interval.unwrap_or(global_conf.pitr_interval),
233 993 : walreceiver_connect_timeout: self
234 993 : .walreceiver_connect_timeout
235 993 : .unwrap_or(global_conf.walreceiver_connect_timeout),
236 993 : lagging_wal_timeout: self
237 993 : .lagging_wal_timeout
238 993 : .unwrap_or(global_conf.lagging_wal_timeout),
239 993 : max_lsn_wal_lag: self.max_lsn_wal_lag.unwrap_or(global_conf.max_lsn_wal_lag),
240 993 : trace_read_requests: self
241 993 : .trace_read_requests
242 993 : .unwrap_or(global_conf.trace_read_requests),
243 993 : eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
244 993 : min_resident_size_override: self
245 993 : .min_resident_size_override
246 993 : .or(global_conf.min_resident_size_override),
247 993 : evictions_low_residence_duration_metric_threshold: self
248 993 : .evictions_low_residence_duration_metric_threshold
249 993 : .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
250 993 : gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
251 993 : }
252 993 : }
253 : }
254 :
255 : impl Default for TenantConf {
256 1983 : fn default() -> Self {
257 1983 : use defaults::*;
258 1983 : Self {
259 1983 : checkpoint_distance: DEFAULT_CHECKPOINT_DISTANCE,
260 1983 : checkpoint_timeout: humantime::parse_duration(DEFAULT_CHECKPOINT_TIMEOUT)
261 1983 : .expect("cannot parse default checkpoint timeout"),
262 1983 : compaction_target_size: DEFAULT_COMPACTION_TARGET_SIZE,
263 1983 : compaction_period: humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
264 1983 : .expect("cannot parse default compaction period"),
265 1983 : compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
266 1983 : gc_horizon: DEFAULT_GC_HORIZON,
267 1983 : gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
268 1983 : .expect("cannot parse default gc period"),
269 1983 : image_creation_threshold: DEFAULT_IMAGE_CREATION_THRESHOLD,
270 1983 : pitr_interval: humantime::parse_duration(DEFAULT_PITR_INTERVAL)
271 1983 : .expect("cannot parse default PITR interval"),
272 1983 : walreceiver_connect_timeout: humantime::parse_duration(
273 1983 : DEFAULT_WALRECEIVER_CONNECT_TIMEOUT,
274 1983 : )
275 1983 : .expect("cannot parse default walreceiver connect timeout"),
276 1983 : lagging_wal_timeout: humantime::parse_duration(DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT)
277 1983 : .expect("cannot parse default walreceiver lagging wal timeout"),
278 1983 : max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
279 1983 : .expect("cannot parse default max walreceiver Lsn wal lag"),
280 1983 : trace_read_requests: false,
281 1983 : eviction_policy: EvictionPolicy::NoEviction,
282 1983 : min_resident_size_override: None,
283 1983 : evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
284 1983 : DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
285 1983 : )
286 1983 : .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
287 1983 : gc_feedback: false,
288 1983 : }
289 1983 : }
290 : }
291 :
292 : // Helper function to standardize the error messages we produce on bad durations
293 : //
294 : // Intended to be used with anyhow's `with_context`, e.g.:
295 : //
296 : // let value = result.with_context(bad_duration("name", &value))?;
297 : //
298 403 : fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn() -> String {
299 0 : move || format!("Cannot parse `{field_name}` duration {value:?}")
300 403 : }
301 :
302 : impl TryFrom<&'_ models::TenantConfig> for TenantConfOpt {
303 : type Error = anyhow::Error;
304 :
305 553 : fn try_from(request_data: &'_ models::TenantConfig) -> Result<Self, Self::Error> {
306 553 : let mut tenant_conf = TenantConfOpt::default();
307 :
308 553 : if let Some(gc_period) = &request_data.gc_period {
309 : tenant_conf.gc_period = Some(
310 186 : humantime::parse_duration(gc_period)
311 186 : .with_context(bad_duration("gc_period", gc_period))?,
312 : );
313 367 : }
314 553 : tenant_conf.gc_horizon = request_data.gc_horizon;
315 553 : tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
316 :
317 553 : if let Some(pitr_interval) = &request_data.pitr_interval {
318 : tenant_conf.pitr_interval = Some(
319 17 : humantime::parse_duration(pitr_interval)
320 17 : .with_context(bad_duration("pitr_interval", pitr_interval))?,
321 : );
322 536 : }
323 :
324 553 : if let Some(walreceiver_connect_timeout) = &request_data.walreceiver_connect_timeout {
325 : tenant_conf.walreceiver_connect_timeout = Some(
326 2 : humantime::parse_duration(walreceiver_connect_timeout).with_context(
327 2 : bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
328 2 : )?,
329 : );
330 551 : }
331 553 : if let Some(lagging_wal_timeout) = &request_data.lagging_wal_timeout {
332 : tenant_conf.lagging_wal_timeout = Some(
333 2 : humantime::parse_duration(lagging_wal_timeout)
334 2 : .with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
335 : );
336 551 : }
337 553 : if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
338 3 : tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
339 550 : }
340 553 : if let Some(trace_read_requests) = request_data.trace_read_requests {
341 3 : tenant_conf.trace_read_requests = Some(trace_read_requests);
342 550 : }
343 :
344 553 : tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
345 553 : if let Some(checkpoint_timeout) = &request_data.checkpoint_timeout {
346 : tenant_conf.checkpoint_timeout = Some(
347 7 : humantime::parse_duration(checkpoint_timeout)
348 7 : .with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
349 : );
350 546 : }
351 :
352 553 : tenant_conf.compaction_target_size = request_data.compaction_target_size;
353 553 : tenant_conf.compaction_threshold = request_data.compaction_threshold;
354 :
355 553 : if let Some(compaction_period) = &request_data.compaction_period {
356 : tenant_conf.compaction_period = Some(
357 182 : humantime::parse_duration(compaction_period)
358 182 : .with_context(bad_duration("compaction_period", compaction_period))?,
359 : );
360 371 : }
361 :
362 553 : if let Some(eviction_policy) = &request_data.eviction_policy {
363 : tenant_conf.eviction_policy = Some(
364 6 : serde::Deserialize::deserialize(eviction_policy)
365 6 : .context("parse field `eviction_policy`")?,
366 : );
367 547 : }
368 :
369 553 : tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
370 :
371 7 : if let Some(evictions_low_residence_duration_metric_threshold) =
372 553 : &request_data.evictions_low_residence_duration_metric_threshold
373 : {
374 : tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
375 7 : humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
376 7 : .with_context(bad_duration(
377 7 : "evictions_low_residence_duration_metric_threshold",
378 7 : evictions_low_residence_duration_metric_threshold,
379 7 : ))?,
380 : );
381 546 : }
382 553 : tenant_conf.gc_feedback = request_data.gc_feedback;
383 553 :
384 553 : Ok(tenant_conf)
385 553 : }
386 : }
387 :
388 : #[cfg(test)]
389 : mod tests {
390 : use super::*;
391 :
392 1 : #[test]
393 1 : fn de_serializing_pageserver_config_omits_empty_values() {
394 1 : let small_conf = TenantConfOpt {
395 1 : gc_horizon: Some(42),
396 1 : ..TenantConfOpt::default()
397 1 : };
398 1 :
399 1 : let toml_form = toml_edit::ser::to_string(&small_conf).unwrap();
400 1 : assert_eq!(toml_form, "gc_horizon = 42\n");
401 1 : assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap());
402 :
403 1 : let json_form = serde_json::to_string(&small_conf).unwrap();
404 1 : assert_eq!(json_form, "{\"gc_horizon\":42}");
405 1 : assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
406 1 : }
407 : }
|