Line data Source code
1 : use crate::{
2 : auth::{self, backend::AuthRateLimiter},
3 : console::locks::ApiLocks,
4 : rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
5 : scram::threadpool::ThreadPool,
6 : serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
7 : Host,
8 : };
9 : use anyhow::{bail, ensure, Context, Ok};
10 : use itertools::Itertools;
11 : use remote_storage::RemoteStorageConfig;
12 : use rustls::{
13 : crypto::ring::sign,
14 : pki_types::{CertificateDer, PrivateKeyDer},
15 : };
16 : use sha2::{Digest, Sha256};
17 : use std::{
18 : collections::{HashMap, HashSet},
19 : str::FromStr,
20 : sync::Arc,
21 : time::Duration,
22 : };
23 : use tracing::{error, info};
24 : use x509_parser::oid_registry;
25 :
26 : pub struct ProxyConfig {
27 : pub tls_config: Option<TlsConfig>,
28 : pub auth_backend: auth::Backend<'static, (), ()>,
29 : pub metric_collection: Option<MetricCollectionConfig>,
30 : pub allow_self_signed_compute: bool,
31 : pub http_config: HttpConfig,
32 : pub authentication_config: AuthenticationConfig,
33 : pub require_client_ip: bool,
34 : pub region: String,
35 : pub handshake_timeout: Duration,
36 : pub wake_compute_retry_config: RetryConfig,
37 : pub connect_compute_locks: ApiLocks<Host>,
38 : pub connect_to_compute_retry_config: RetryConfig,
39 : }
40 :
41 : #[derive(Debug)]
42 : pub struct MetricCollectionConfig {
43 : pub endpoint: reqwest::Url,
44 : pub interval: Duration,
45 : pub backup_metric_collection_config: MetricBackupCollectionConfig,
46 : }
47 :
48 : pub struct TlsConfig {
49 : pub config: Arc<rustls::ServerConfig>,
50 : pub common_names: HashSet<String>,
51 : pub cert_resolver: Arc<CertResolver>,
52 : }
53 :
54 : pub struct HttpConfig {
55 : pub accept_websockets: bool,
56 : pub pool_options: GlobalConnPoolOptions,
57 : pub cancel_set: CancelSet,
58 : pub client_conn_threshold: u64,
59 : }
60 :
61 : pub struct AuthenticationConfig {
62 : pub thread_pool: Arc<ThreadPool>,
63 : pub scram_protocol_timeout: tokio::time::Duration,
64 : pub rate_limiter_enabled: bool,
65 : pub rate_limiter: AuthRateLimiter,
66 : pub rate_limit_ip_subnet: u8,
67 : pub ip_allowlist_check_enabled: bool,
68 : }
69 :
70 : impl TlsConfig {
71 20 : pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
72 20 : self.config.clone()
73 20 : }
74 : }
75 :
76 : /// <https://github.com/postgres/postgres/blob/ca481d3c9ab7bf69ff0c8d71ad3951d407f6a33c/src/include/libpq/pqcomm.h#L159>
77 : pub const PG_ALPN_PROTOCOL: &[u8] = b"postgresql";
78 :
79 : /// Configure TLS for the main endpoint.
80 0 : pub fn configure_tls(
81 0 : key_path: &str,
82 0 : cert_path: &str,
83 0 : certs_dir: Option<&String>,
84 0 : ) -> anyhow::Result<TlsConfig> {
85 0 : let mut cert_resolver = CertResolver::new();
86 0 :
87 0 : // add default certificate
88 0 : cert_resolver.add_cert_path(key_path, cert_path, true)?;
89 :
90 : // add extra certificates
91 0 : if let Some(certs_dir) = certs_dir {
92 0 : for entry in std::fs::read_dir(certs_dir)? {
93 0 : let entry = entry?;
94 0 : let path = entry.path();
95 0 : if path.is_dir() {
96 : // file names aligned with default cert-manager names
97 0 : let key_path = path.join("tls.key");
98 0 : let cert_path = path.join("tls.crt");
99 0 : if key_path.exists() && cert_path.exists() {
100 0 : cert_resolver.add_cert_path(
101 0 : &key_path.to_string_lossy(),
102 0 : &cert_path.to_string_lossy(),
103 0 : false,
104 0 : )?;
105 0 : }
106 0 : }
107 : }
108 0 : }
109 :
110 0 : let common_names = cert_resolver.get_common_names();
111 0 :
112 0 : let cert_resolver = Arc::new(cert_resolver);
113 0 :
114 0 : // allow TLS 1.2 to be compatible with older client libraries
115 0 : let mut config = rustls::ServerConfig::builder_with_protocol_versions(&[
116 0 : &rustls::version::TLS13,
117 0 : &rustls::version::TLS12,
118 0 : ])
119 0 : .with_no_client_auth()
120 0 : .with_cert_resolver(cert_resolver.clone());
121 0 :
122 0 : config.alpn_protocols = vec![PG_ALPN_PROTOCOL.to_vec()];
123 0 :
124 0 : Ok(TlsConfig {
125 0 : config: Arc::new(config),
126 0 : common_names,
127 0 : cert_resolver,
128 0 : })
129 0 : }
130 :
131 : /// Channel binding parameter
132 : ///
133 : /// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
134 : /// Description: The hash of the TLS server's certificate as it
135 : /// appears, octet for octet, in the server's Certificate message. Note
136 : /// that the Certificate message contains a certificate_list, in which
137 : /// the first element is the server's certificate.
138 : ///
139 : /// The hash function is to be selected as follows:
140 : ///
141 : /// * if the certificate's signatureAlgorithm uses a single hash
142 : /// function, and that hash function is either MD5 or SHA-1, then use SHA-256;
143 : ///
144 : /// * if the certificate's signatureAlgorithm uses a single hash
145 : /// function and that hash function neither MD5 nor SHA-1, then use
146 : /// the hash function associated with the certificate's
147 : /// signatureAlgorithm;
148 : ///
149 : /// * if the certificate's signatureAlgorithm uses no hash functions or
150 : /// uses multiple hash functions, then this channel binding type's
151 : /// channel bindings are undefined at this time (updates to is channel
152 : /// binding type may occur to address this issue if it ever arises).
153 : #[derive(Debug, Clone, Copy)]
154 : pub enum TlsServerEndPoint {
155 : Sha256([u8; 32]),
156 : Undefined,
157 : }
158 :
159 : impl TlsServerEndPoint {
160 21 : pub fn new(cert: &CertificateDer<'_>) -> anyhow::Result<Self> {
161 21 : let sha256_oids = [
162 21 : // I'm explicitly not adding MD5 or SHA1 here... They're bad.
163 21 : oid_registry::OID_SIG_ECDSA_WITH_SHA256,
164 21 : oid_registry::OID_PKCS1_SHA256WITHRSA,
165 21 : ];
166 :
167 21 : let pem = x509_parser::parse_x509_certificate(cert)
168 21 : .context("Failed to parse PEM object from cerficiate")?
169 : .1;
170 :
171 21 : info!(subject = %pem.subject, "parsing TLS certificate");
172 :
173 21 : let reg = oid_registry::OidRegistry::default().with_all_crypto();
174 21 : let oid = pem.signature_algorithm.oid();
175 21 : let alg = reg.get(oid);
176 21 : if sha256_oids.contains(oid) {
177 21 : let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
178 21 : info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
179 21 : Ok(Self::Sha256(tls_server_end_point))
180 : } else {
181 0 : error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
182 0 : Ok(Self::Undefined)
183 : }
184 21 : }
185 :
186 16 : pub fn supported(&self) -> bool {
187 16 : !matches!(self, TlsServerEndPoint::Undefined)
188 16 : }
189 : }
190 :
191 : #[derive(Default, Debug)]
192 : pub struct CertResolver {
193 : certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
194 : default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
195 : }
196 :
197 : impl CertResolver {
198 21 : pub fn new() -> Self {
199 21 : Self::default()
200 21 : }
201 :
202 0 : fn add_cert_path(
203 0 : &mut self,
204 0 : key_path: &str,
205 0 : cert_path: &str,
206 0 : is_default: bool,
207 0 : ) -> anyhow::Result<()> {
208 0 : let priv_key = {
209 0 : let key_bytes = std::fs::read(key_path)
210 0 : .context(format!("Failed to read TLS keys at '{key_path}'"))?;
211 0 : let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec();
212 0 :
213 0 : ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
214 : PrivateKeyDer::Pkcs8(
215 0 : keys.pop()
216 0 : .unwrap()
217 0 : .context(format!("Failed to parse TLS keys at '{key_path}'"))?,
218 : )
219 : };
220 :
221 0 : let cert_chain_bytes = std::fs::read(cert_path)
222 0 : .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
223 :
224 0 : let cert_chain = {
225 0 : rustls_pemfile::certs(&mut &cert_chain_bytes[..])
226 0 : .try_collect()
227 0 : .with_context(|| {
228 0 : format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
229 0 : })?
230 : };
231 :
232 0 : self.add_cert(priv_key, cert_chain, is_default)
233 0 : }
234 :
235 21 : pub fn add_cert(
236 21 : &mut self,
237 21 : priv_key: PrivateKeyDer<'static>,
238 21 : cert_chain: Vec<CertificateDer<'static>>,
239 21 : is_default: bool,
240 21 : ) -> anyhow::Result<()> {
241 21 : let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
242 :
243 21 : let first_cert = &cert_chain[0];
244 21 : let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
245 21 : let pem = x509_parser::parse_x509_certificate(first_cert)
246 21 : .context("Failed to parse PEM object from cerficiate")?
247 : .1;
248 :
249 21 : let common_name = pem.subject().to_string();
250 :
251 : // We only use non-wildcard certificates in web auth proxy so it seems okay to treat them the same as
252 : // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
253 : // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
254 : // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
255 : // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
256 : // of cutting off '*.' parts.
257 21 : let common_name = if common_name.starts_with("CN=*.") {
258 0 : common_name.strip_prefix("CN=*.").map(|s| s.to_string())
259 : } else {
260 21 : common_name.strip_prefix("CN=").map(|s| s.to_string())
261 : }
262 21 : .context("Failed to parse common name from certificate")?;
263 :
264 21 : let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
265 21 :
266 21 : if is_default {
267 21 : self.default = Some((cert.clone(), tls_server_end_point));
268 21 : }
269 :
270 21 : self.certs.insert(common_name, (cert, tls_server_end_point));
271 21 :
272 21 : Ok(())
273 21 : }
274 :
275 21 : pub fn get_common_names(&self) -> HashSet<String> {
276 21 : self.certs.keys().map(|s| s.to_string()).collect()
277 21 : }
278 : }
279 :
280 : impl rustls::server::ResolvesServerCert for CertResolver {
281 0 : fn resolve(
282 0 : &self,
283 0 : client_hello: rustls::server::ClientHello<'_>,
284 0 : ) -> Option<Arc<rustls::sign::CertifiedKey>> {
285 0 : self.resolve(client_hello.server_name()).map(|x| x.0)
286 0 : }
287 : }
288 :
289 : impl CertResolver {
290 20 : pub fn resolve(
291 20 : &self,
292 20 : server_name: Option<&str>,
293 20 : ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
294 : // loop here and cut off more and more subdomains until we find
295 : // a match to get a proper wildcard support. OTOH, we now do not
296 : // use nested domains, so keep this simple for now.
297 : //
298 : // With the current coding foo.com will match *.foo.com and that
299 : // repeats behavior of the old code.
300 20 : if let Some(mut sni_name) = server_name {
301 : loop {
302 40 : if let Some(cert) = self.certs.get(sni_name) {
303 20 : return Some(cert.clone());
304 20 : }
305 20 : if let Some((_, rest)) = sni_name.split_once('.') {
306 20 : sni_name = rest;
307 20 : } else {
308 0 : return None;
309 : }
310 : }
311 : } else {
312 : // No SNI, use the default certificate, otherwise we can't get to
313 : // options parameter which can be used to set endpoint name too.
314 : // That means that non-SNI flow will not work for CNAME domains in
315 : // verify-full mode.
316 : //
317 : // If that will be a problem we can:
318 : //
319 : // a) Instead of multi-cert approach use single cert with extra
320 : // domains listed in Subject Alternative Name (SAN).
321 : // b) Deploy separate proxy instances for extra domains.
322 0 : self.default.clone()
323 : }
324 20 : }
325 : }
326 :
327 : #[derive(Debug)]
328 : pub struct EndpointCacheConfig {
329 : /// Batch size to receive all endpoints on the startup.
330 : pub initial_batch_size: usize,
331 : /// Batch size to receive endpoints.
332 : pub default_batch_size: usize,
333 : /// Timeouts for the stream read operation.
334 : pub xread_timeout: Duration,
335 : /// Stream name to read from.
336 : pub stream_name: String,
337 : /// Limiter info (to distinguish when to enable cache).
338 : pub limiter_info: Vec<RateBucketInfo>,
339 : /// Disable cache.
340 : /// If true, cache is ignored, but reports all statistics.
341 : pub disable_cache: bool,
342 : /// Retry interval for the stream read operation.
343 : pub retry_interval: Duration,
344 : }
345 :
346 : impl EndpointCacheConfig {
347 : /// Default options for [`crate::console::provider::NodeInfoCache`].
348 : /// Notice that by default the limiter is empty, which means that cache is disabled.
349 : pub const CACHE_DEFAULT_OPTIONS: &'static str =
350 : "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
351 :
352 : /// Parse cache options passed via cmdline.
353 : /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
354 0 : fn parse(options: &str) -> anyhow::Result<Self> {
355 0 : let mut initial_batch_size = None;
356 0 : let mut default_batch_size = None;
357 0 : let mut xread_timeout = None;
358 0 : let mut stream_name = None;
359 0 : let mut limiter_info = vec![];
360 0 : let mut disable_cache = false;
361 0 : let mut retry_interval = None;
362 :
363 0 : for option in options.split(',') {
364 0 : let (key, value) = option
365 0 : .split_once('=')
366 0 : .with_context(|| format!("bad key-value pair: {option}"))?;
367 :
368 0 : match key {
369 0 : "initial_batch_size" => initial_batch_size = Some(value.parse()?),
370 0 : "default_batch_size" => default_batch_size = Some(value.parse()?),
371 0 : "xread_timeout" => xread_timeout = Some(humantime::parse_duration(value)?),
372 0 : "stream_name" => stream_name = Some(value.to_string()),
373 0 : "limiter_info" => limiter_info.push(RateBucketInfo::from_str(value)?),
374 0 : "disable_cache" => disable_cache = value.parse()?,
375 0 : "retry_interval" => retry_interval = Some(humantime::parse_duration(value)?),
376 0 : unknown => bail!("unknown key: {unknown}"),
377 : }
378 : }
379 0 : RateBucketInfo::validate(&mut limiter_info)?;
380 :
381 : Ok(Self {
382 0 : initial_batch_size: initial_batch_size.context("missing `initial_batch_size`")?,
383 0 : default_batch_size: default_batch_size.context("missing `default_batch_size`")?,
384 0 : xread_timeout: xread_timeout.context("missing `xread_timeout`")?,
385 0 : stream_name: stream_name.context("missing `stream_name`")?,
386 0 : disable_cache,
387 0 : limiter_info,
388 0 : retry_interval: retry_interval.context("missing `retry_interval`")?,
389 : })
390 0 : }
391 : }
392 :
393 : impl FromStr for EndpointCacheConfig {
394 : type Err = anyhow::Error;
395 :
396 0 : fn from_str(options: &str) -> Result<Self, Self::Err> {
397 0 : let error = || format!("failed to parse endpoint cache options '{options}'");
398 0 : Self::parse(options).with_context(error)
399 0 : }
400 : }
401 : #[derive(Debug)]
402 : pub struct MetricBackupCollectionConfig {
403 : pub interval: Duration,
404 : pub remote_storage_config: Option<RemoteStorageConfig>,
405 : pub chunk_size: usize,
406 : }
407 :
408 1 : pub fn remote_storage_from_toml(s: &str) -> anyhow::Result<RemoteStorageConfig> {
409 1 : RemoteStorageConfig::from_toml(&s.parse()?)
410 1 : }
411 :
412 : /// Helper for cmdline cache options parsing.
413 : #[derive(Debug)]
414 : pub struct CacheOptions {
415 : /// Max number of entries.
416 : pub size: usize,
417 : /// Entry's time-to-live.
418 : pub ttl: Duration,
419 : }
420 :
421 : impl CacheOptions {
422 : /// Default options for [`crate::console::provider::NodeInfoCache`].
423 : pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
424 :
425 : /// Parse cache options passed via cmdline.
426 : /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
427 4 : fn parse(options: &str) -> anyhow::Result<Self> {
428 4 : let mut size = None;
429 4 : let mut ttl = None;
430 :
431 7 : for option in options.split(',') {
432 7 : let (key, value) = option
433 7 : .split_once('=')
434 7 : .with_context(|| format!("bad key-value pair: {option}"))?;
435 :
436 7 : match key {
437 7 : "size" => size = Some(value.parse()?),
438 3 : "ttl" => ttl = Some(humantime::parse_duration(value)?),
439 0 : unknown => bail!("unknown key: {unknown}"),
440 : }
441 : }
442 :
443 : // TTL doesn't matter if cache is always empty.
444 4 : if let Some(0) = size {
445 2 : ttl.get_or_insert(Duration::default());
446 2 : }
447 :
448 : Ok(Self {
449 4 : size: size.context("missing `size`")?,
450 4 : ttl: ttl.context("missing `ttl`")?,
451 : })
452 4 : }
453 : }
454 :
455 : impl FromStr for CacheOptions {
456 : type Err = anyhow::Error;
457 :
458 4 : fn from_str(options: &str) -> Result<Self, Self::Err> {
459 4 : let error = || format!("failed to parse cache options '{options}'");
460 4 : Self::parse(options).with_context(error)
461 4 : }
462 : }
463 :
464 : /// Helper for cmdline cache options parsing.
465 : #[derive(Debug)]
466 : pub struct ProjectInfoCacheOptions {
467 : /// Max number of entries.
468 : pub size: usize,
469 : /// Entry's time-to-live.
470 : pub ttl: Duration,
471 : /// Max number of roles per endpoint.
472 : pub max_roles: usize,
473 : /// Gc interval.
474 : pub gc_interval: Duration,
475 : }
476 :
477 : impl ProjectInfoCacheOptions {
478 : /// Default options for [`crate::console::provider::NodeInfoCache`].
479 : pub const CACHE_DEFAULT_OPTIONS: &'static str =
480 : "size=10000,ttl=4m,max_roles=10,gc_interval=60m";
481 :
482 : /// Parse cache options passed via cmdline.
483 : /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
484 0 : fn parse(options: &str) -> anyhow::Result<Self> {
485 0 : let mut size = None;
486 0 : let mut ttl = None;
487 0 : let mut max_roles = None;
488 0 : let mut gc_interval = None;
489 :
490 0 : for option in options.split(',') {
491 0 : let (key, value) = option
492 0 : .split_once('=')
493 0 : .with_context(|| format!("bad key-value pair: {option}"))?;
494 :
495 0 : match key {
496 0 : "size" => size = Some(value.parse()?),
497 0 : "ttl" => ttl = Some(humantime::parse_duration(value)?),
498 0 : "max_roles" => max_roles = Some(value.parse()?),
499 0 : "gc_interval" => gc_interval = Some(humantime::parse_duration(value)?),
500 0 : unknown => bail!("unknown key: {unknown}"),
501 : }
502 : }
503 :
504 : // TTL doesn't matter if cache is always empty.
505 0 : if let Some(0) = size {
506 0 : ttl.get_or_insert(Duration::default());
507 0 : }
508 :
509 : Ok(Self {
510 0 : size: size.context("missing `size`")?,
511 0 : ttl: ttl.context("missing `ttl`")?,
512 0 : max_roles: max_roles.context("missing `max_roles`")?,
513 0 : gc_interval: gc_interval.context("missing `gc_interval`")?,
514 : })
515 0 : }
516 : }
517 :
518 : impl FromStr for ProjectInfoCacheOptions {
519 : type Err = anyhow::Error;
520 :
521 0 : fn from_str(options: &str) -> Result<Self, Self::Err> {
522 0 : let error = || format!("failed to parse cache options '{options}'");
523 0 : Self::parse(options).with_context(error)
524 0 : }
525 : }
526 :
527 : /// This is a config for connect to compute and wake compute.
528 : #[derive(Clone, Copy, Debug)]
529 : pub struct RetryConfig {
530 : /// Number of times we should retry.
531 : pub max_retries: u32,
532 : /// Retry duration is base_delay * backoff_factor ^ n, where n starts at 0
533 : pub base_delay: tokio::time::Duration,
534 : /// Exponential base for retry wait duration
535 : pub backoff_factor: f64,
536 : }
537 :
538 : impl RetryConfig {
539 : /// Default options for RetryConfig.
540 :
541 : /// Total delay for 5 retries with 200ms base delay and 2 backoff factor is about 6s.
542 : pub const CONNECT_TO_COMPUTE_DEFAULT_VALUES: &'static str =
543 : "num_retries=5,base_retry_wait_duration=200ms,retry_wait_exponent_base=2";
544 : /// Total delay for 8 retries with 100ms base delay and 1.6 backoff factor is about 7s.
545 : /// Cplane has timeout of 60s on each request. 8m7s in total.
546 : pub const WAKE_COMPUTE_DEFAULT_VALUES: &'static str =
547 : "num_retries=8,base_retry_wait_duration=100ms,retry_wait_exponent_base=1.6";
548 :
549 : /// Parse retry options passed via cmdline.
550 : /// Example: [`Self::CONNECT_TO_COMPUTE_DEFAULT_VALUES`].
551 0 : pub fn parse(options: &str) -> anyhow::Result<Self> {
552 0 : let mut num_retries = None;
553 0 : let mut base_retry_wait_duration = None;
554 0 : let mut retry_wait_exponent_base = None;
555 :
556 0 : for option in options.split(',') {
557 0 : let (key, value) = option
558 0 : .split_once('=')
559 0 : .with_context(|| format!("bad key-value pair: {option}"))?;
560 :
561 0 : match key {
562 0 : "num_retries" => num_retries = Some(value.parse()?),
563 0 : "base_retry_wait_duration" => {
564 0 : base_retry_wait_duration = Some(humantime::parse_duration(value)?);
565 : }
566 0 : "retry_wait_exponent_base" => retry_wait_exponent_base = Some(value.parse()?),
567 0 : unknown => bail!("unknown key: {unknown}"),
568 : }
569 : }
570 :
571 : Ok(Self {
572 0 : max_retries: num_retries.context("missing `num_retries`")?,
573 0 : base_delay: base_retry_wait_duration.context("missing `base_retry_wait_duration`")?,
574 0 : backoff_factor: retry_wait_exponent_base
575 0 : .context("missing `retry_wait_exponent_base`")?,
576 : })
577 0 : }
578 : }
579 :
580 : /// Helper for cmdline cache options parsing.
581 8 : #[derive(serde::Deserialize)]
582 : pub struct ConcurrencyLockOptions {
583 : /// The number of shards the lock map should have
584 : pub shards: usize,
585 : /// The number of allowed concurrent requests for each endpoitn
586 : #[serde(flatten)]
587 : pub limiter: RateLimiterConfig,
588 : /// Garbage collection epoch
589 : #[serde(deserialize_with = "humantime_serde::deserialize")]
590 : pub epoch: Duration,
591 : /// Lock timeout
592 : #[serde(deserialize_with = "humantime_serde::deserialize")]
593 : pub timeout: Duration,
594 : }
595 :
596 : impl ConcurrencyLockOptions {
597 : /// Default options for [`crate::console::provider::ApiLocks`].
598 : pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
599 : /// Default options for [`crate::console::provider::ApiLocks`].
600 : pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
601 : "shards=64,permits=100,epoch=10m,timeout=10ms";
602 :
603 : // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "shards=32,permits=4,epoch=10m,timeout=1s";
604 :
605 : /// Parse lock options passed via cmdline.
606 : /// Example: [`Self::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK`].
607 4 : fn parse(options: &str) -> anyhow::Result<Self> {
608 4 : let options = options.trim();
609 4 : if options.starts_with('{') && options.ends_with('}') {
610 1 : return Ok(serde_json::from_str(options)?);
611 3 : }
612 3 :
613 3 : let mut shards = None;
614 3 : let mut permits = None;
615 3 : let mut epoch = None;
616 3 : let mut timeout = None;
617 :
618 9 : for option in options.split(',') {
619 9 : let (key, value) = option
620 9 : .split_once('=')
621 9 : .with_context(|| format!("bad key-value pair: {option}"))?;
622 :
623 9 : match key {
624 9 : "shards" => shards = Some(value.parse()?),
625 7 : "permits" => permits = Some(value.parse()?),
626 4 : "epoch" => epoch = Some(humantime::parse_duration(value)?),
627 2 : "timeout" => timeout = Some(humantime::parse_duration(value)?),
628 0 : unknown => bail!("unknown key: {unknown}"),
629 : }
630 : }
631 :
632 : // these dont matter if lock is disabled
633 3 : if let Some(0) = permits {
634 1 : timeout = Some(Duration::default());
635 1 : epoch = Some(Duration::default());
636 1 : shards = Some(2);
637 2 : }
638 :
639 3 : let permits = permits.context("missing `permits`")?;
640 3 : let out = Self {
641 3 : shards: shards.context("missing `shards`")?,
642 3 : limiter: RateLimiterConfig {
643 3 : algorithm: RateLimitAlgorithm::Fixed,
644 3 : initial_limit: permits,
645 3 : },
646 3 : epoch: epoch.context("missing `epoch`")?,
647 3 : timeout: timeout.context("missing `timeout`")?,
648 : };
649 :
650 3 : ensure!(out.shards > 1, "shard count must be > 1");
651 3 : ensure!(
652 3 : out.shards.is_power_of_two(),
653 0 : "shard count must be a power of two"
654 : );
655 :
656 3 : Ok(out)
657 4 : }
658 : }
659 :
660 : impl FromStr for ConcurrencyLockOptions {
661 : type Err = anyhow::Error;
662 :
663 4 : fn from_str(options: &str) -> Result<Self, Self::Err> {
664 4 : let error = || format!("failed to parse cache lock options '{options}'");
665 4 : Self::parse(options).with_context(error)
666 4 : }
667 : }
668 :
669 : #[cfg(test)]
670 : mod tests {
671 : use crate::rate_limiter::Aimd;
672 :
673 : use super::*;
674 :
675 : #[test]
676 1 : fn test_parse_cache_options() -> anyhow::Result<()> {
677 1 : let CacheOptions { size, ttl } = "size=4096,ttl=5min".parse()?;
678 1 : assert_eq!(size, 4096);
679 1 : assert_eq!(ttl, Duration::from_secs(5 * 60));
680 :
681 1 : let CacheOptions { size, ttl } = "ttl=4m,size=2".parse()?;
682 1 : assert_eq!(size, 2);
683 1 : assert_eq!(ttl, Duration::from_secs(4 * 60));
684 :
685 1 : let CacheOptions { size, ttl } = "size=0,ttl=1s".parse()?;
686 1 : assert_eq!(size, 0);
687 1 : assert_eq!(ttl, Duration::from_secs(1));
688 :
689 1 : let CacheOptions { size, ttl } = "size=0".parse()?;
690 1 : assert_eq!(size, 0);
691 1 : assert_eq!(ttl, Duration::default());
692 :
693 1 : Ok(())
694 1 : }
695 :
696 : #[test]
697 1 : fn test_parse_lock_options() -> anyhow::Result<()> {
698 : let ConcurrencyLockOptions {
699 1 : epoch,
700 1 : limiter,
701 1 : shards,
702 1 : timeout,
703 1 : } = "shards=32,permits=4,epoch=10m,timeout=1s".parse()?;
704 1 : assert_eq!(epoch, Duration::from_secs(10 * 60));
705 1 : assert_eq!(timeout, Duration::from_secs(1));
706 1 : assert_eq!(shards, 32);
707 1 : assert_eq!(limiter.initial_limit, 4);
708 1 : assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
709 :
710 : let ConcurrencyLockOptions {
711 1 : epoch,
712 1 : limiter,
713 1 : shards,
714 1 : timeout,
715 1 : } = "epoch=60s,shards=16,timeout=100ms,permits=8".parse()?;
716 1 : assert_eq!(epoch, Duration::from_secs(60));
717 1 : assert_eq!(timeout, Duration::from_millis(100));
718 1 : assert_eq!(shards, 16);
719 1 : assert_eq!(limiter.initial_limit, 8);
720 1 : assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
721 :
722 : let ConcurrencyLockOptions {
723 1 : epoch,
724 1 : limiter,
725 1 : shards,
726 1 : timeout,
727 1 : } = "permits=0".parse()?;
728 1 : assert_eq!(epoch, Duration::ZERO);
729 1 : assert_eq!(timeout, Duration::ZERO);
730 1 : assert_eq!(shards, 2);
731 1 : assert_eq!(limiter.initial_limit, 0);
732 1 : assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
733 :
734 1 : Ok(())
735 1 : }
736 :
737 : #[test]
738 1 : fn test_parse_json_lock_options() -> anyhow::Result<()> {
739 : let ConcurrencyLockOptions {
740 1 : epoch,
741 1 : limiter,
742 1 : shards,
743 1 : timeout,
744 1 : } = r#"{"shards":32,"initial_limit":44,"aimd":{"min":5,"max":500,"inc":10,"dec":0.9,"utilisation":0.8},"epoch":"10m","timeout":"1s"}"#
745 1 : .parse()?;
746 1 : assert_eq!(epoch, Duration::from_secs(10 * 60));
747 1 : assert_eq!(timeout, Duration::from_secs(1));
748 1 : assert_eq!(shards, 32);
749 1 : assert_eq!(limiter.initial_limit, 44);
750 1 : assert_eq!(
751 1 : limiter.algorithm,
752 1 : RateLimitAlgorithm::Aimd {
753 1 : conf: Aimd {
754 1 : min: 5,
755 1 : max: 500,
756 1 : dec: 0.9,
757 1 : inc: 10,
758 1 : utilisation: 0.8
759 1 : }
760 1 : },
761 1 : );
762 :
763 1 : Ok(())
764 1 : }
765 : }
|