LCOV - code coverage report
Current view: top level - proxy/src - config.rs (source / functions) Coverage Total Hit
Test: 75747cdbffeb0b6d2a2a311584368de68cd9aadc.info Lines: 55.6 % 367 204
Test Date: 2024-06-24 06:52:57 Functions: 33.3 % 63 21

            Line data    Source code
       1              : use crate::{
       2              :     auth::{self, backend::AuthRateLimiter},
       3              :     console::locks::ApiLocks,
       4              :     rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
       5              :     scram::threadpool::ThreadPool,
       6              :     serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
       7              :     Host,
       8              : };
       9              : use anyhow::{bail, ensure, Context, Ok};
      10              : use itertools::Itertools;
      11              : use remote_storage::RemoteStorageConfig;
      12              : use rustls::{
      13              :     crypto::ring::sign,
      14              :     pki_types::{CertificateDer, PrivateKeyDer},
      15              : };
      16              : use sha2::{Digest, Sha256};
      17              : use std::{
      18              :     collections::{HashMap, HashSet},
      19              :     str::FromStr,
      20              :     sync::Arc,
      21              :     time::Duration,
      22              : };
      23              : use tracing::{error, info};
      24              : use x509_parser::oid_registry;
      25              : 
      26              : pub struct ProxyConfig {
      27              :     pub tls_config: Option<TlsConfig>,
      28              :     pub auth_backend: auth::BackendType<'static, (), ()>,
      29              :     pub metric_collection: Option<MetricCollectionConfig>,
      30              :     pub allow_self_signed_compute: bool,
      31              :     pub http_config: HttpConfig,
      32              :     pub authentication_config: AuthenticationConfig,
      33              :     pub require_client_ip: bool,
      34              :     pub disable_ip_check_for_http: bool,
      35              :     pub redis_rps_limit: Vec<RateBucketInfo>,
      36              :     pub region: String,
      37              :     pub handshake_timeout: Duration,
      38              :     pub aws_region: String,
      39              :     pub wake_compute_retry_config: RetryConfig,
      40              :     pub connect_compute_locks: ApiLocks<Host>,
      41              :     pub connect_to_compute_retry_config: RetryConfig,
      42              : }
      43              : 
      44              : #[derive(Debug)]
      45              : pub struct MetricCollectionConfig {
      46              :     pub endpoint: reqwest::Url,
      47              :     pub interval: Duration,
      48              :     pub backup_metric_collection_config: MetricBackupCollectionConfig,
      49              : }
      50              : 
      51              : pub struct TlsConfig {
      52              :     pub config: Arc<rustls::ServerConfig>,
      53              :     pub common_names: HashSet<String>,
      54              :     pub cert_resolver: Arc<CertResolver>,
      55              : }
      56              : 
      57              : pub struct HttpConfig {
      58              :     pub request_timeout: tokio::time::Duration,
      59              :     pub pool_options: GlobalConnPoolOptions,
      60              :     pub cancel_set: CancelSet,
      61              :     pub client_conn_threshold: u64,
      62              : }
      63              : 
      64              : pub struct AuthenticationConfig {
      65              :     pub thread_pool: Arc<ThreadPool>,
      66              :     pub scram_protocol_timeout: tokio::time::Duration,
      67              :     pub rate_limiter_enabled: bool,
      68              :     pub rate_limiter: AuthRateLimiter,
      69              :     pub rate_limit_ip_subnet: u8,
      70              : }
      71              : 
      72              : impl TlsConfig {
      73           40 :     pub fn to_server_config(&self) -> Arc<rustls::ServerConfig> {
      74           40 :         self.config.clone()
      75           40 :     }
      76              : }
      77              : 
      78              : /// Configure TLS for the main endpoint.
      79            0 : pub fn configure_tls(
      80            0 :     key_path: &str,
      81            0 :     cert_path: &str,
      82            0 :     certs_dir: Option<&String>,
      83            0 : ) -> anyhow::Result<TlsConfig> {
      84            0 :     let mut cert_resolver = CertResolver::new();
      85            0 : 
      86            0 :     // add default certificate
      87            0 :     cert_resolver.add_cert_path(key_path, cert_path, true)?;
      88              : 
      89              :     // add extra certificates
      90            0 :     if let Some(certs_dir) = certs_dir {
      91            0 :         for entry in std::fs::read_dir(certs_dir)? {
      92            0 :             let entry = entry?;
      93            0 :             let path = entry.path();
      94            0 :             if path.is_dir() {
      95              :                 // file names aligned with default cert-manager names
      96            0 :                 let key_path = path.join("tls.key");
      97            0 :                 let cert_path = path.join("tls.crt");
      98            0 :                 if key_path.exists() && cert_path.exists() {
      99            0 :                     cert_resolver.add_cert_path(
     100            0 :                         &key_path.to_string_lossy(),
     101            0 :                         &cert_path.to_string_lossy(),
     102            0 :                         false,
     103            0 :                     )?;
     104            0 :                 }
     105            0 :             }
     106              :         }
     107            0 :     }
     108              : 
     109            0 :     let common_names = cert_resolver.get_common_names();
     110            0 : 
     111            0 :     let cert_resolver = Arc::new(cert_resolver);
     112            0 : 
     113            0 :     // allow TLS 1.2 to be compatible with older client libraries
     114            0 :     let config = rustls::ServerConfig::builder_with_protocol_versions(&[
     115            0 :         &rustls::version::TLS13,
     116            0 :         &rustls::version::TLS12,
     117            0 :     ])
     118            0 :     .with_no_client_auth()
     119            0 :     .with_cert_resolver(cert_resolver.clone())
     120            0 :     .into();
     121            0 : 
     122            0 :     Ok(TlsConfig {
     123            0 :         config,
     124            0 :         common_names,
     125            0 :         cert_resolver,
     126            0 :     })
     127            0 : }
     128              : 
     129              : /// Channel binding parameter
     130              : ///
     131              : /// <https://www.rfc-editor.org/rfc/rfc5929#section-4>
     132              : /// Description: The hash of the TLS server's certificate as it
     133              : /// appears, octet for octet, in the server's Certificate message.  Note
     134              : /// that the Certificate message contains a certificate_list, in which
     135              : /// the first element is the server's certificate.
     136              : ///
     137              : /// The hash function is to be selected as follows:
     138              : ///
     139              : /// * if the certificate's signatureAlgorithm uses a single hash
     140              : ///   function, and that hash function is either MD5 or SHA-1, then use SHA-256;
     141              : ///
     142              : /// * if the certificate's signatureAlgorithm uses a single hash
     143              : ///   function and that hash function neither MD5 nor SHA-1, then use
     144              : ///   the hash function associated with the certificate's
     145              : ///   signatureAlgorithm;
     146              : ///
     147              : /// * if the certificate's signatureAlgorithm uses no hash functions or
     148              : ///   uses multiple hash functions, then this channel binding type's
     149              : ///   channel bindings are undefined at this time (updates to is channel
     150              : ///   binding type may occur to address this issue if it ever arises).
     151              : #[derive(Debug, Clone, Copy)]
     152              : pub enum TlsServerEndPoint {
     153              :     Sha256([u8; 32]),
     154              :     Undefined,
     155              : }
     156              : 
     157              : impl TlsServerEndPoint {
     158           42 :     pub fn new(cert: &CertificateDer) -> anyhow::Result<Self> {
     159           42 :         let sha256_oids = [
     160           42 :             // I'm explicitly not adding MD5 or SHA1 here... They're bad.
     161           42 :             oid_registry::OID_SIG_ECDSA_WITH_SHA256,
     162           42 :             oid_registry::OID_PKCS1_SHA256WITHRSA,
     163           42 :         ];
     164              : 
     165           42 :         let pem = x509_parser::parse_x509_certificate(cert)
     166           42 :             .context("Failed to parse PEM object from cerficiate")?
     167              :             .1;
     168              : 
     169           42 :         info!(subject = %pem.subject, "parsing TLS certificate");
     170              : 
     171           42 :         let reg = oid_registry::OidRegistry::default().with_all_crypto();
     172           42 :         let oid = pem.signature_algorithm.oid();
     173           42 :         let alg = reg.get(oid);
     174           42 :         if sha256_oids.contains(oid) {
     175           42 :             let tls_server_end_point: [u8; 32] = Sha256::new().chain_update(cert).finalize().into();
     176           42 :             info!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), tls_server_end_point = %base64::encode(tls_server_end_point), "determined channel binding");
     177           42 :             Ok(Self::Sha256(tls_server_end_point))
     178              :         } else {
     179            0 :             error!(subject = %pem.subject, signature_algorithm = alg.map(|a| a.description()), "unknown channel binding");
     180            0 :             Ok(Self::Undefined)
     181              :         }
     182           42 :     }
     183              : 
     184           32 :     pub fn supported(&self) -> bool {
     185           32 :         !matches!(self, TlsServerEndPoint::Undefined)
     186           32 :     }
     187              : }
     188              : 
     189              : #[derive(Default, Debug)]
     190              : pub struct CertResolver {
     191              :     certs: HashMap<String, (Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
     192              :     default: Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)>,
     193              : }
     194              : 
     195              : impl CertResolver {
     196           42 :     pub fn new() -> Self {
     197           42 :         Self::default()
     198           42 :     }
     199              : 
     200            0 :     fn add_cert_path(
     201            0 :         &mut self,
     202            0 :         key_path: &str,
     203            0 :         cert_path: &str,
     204            0 :         is_default: bool,
     205            0 :     ) -> anyhow::Result<()> {
     206            0 :         let priv_key = {
     207            0 :             let key_bytes = std::fs::read(key_path)
     208            0 :                 .context(format!("Failed to read TLS keys at '{key_path}'"))?;
     209            0 :             let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]).collect_vec();
     210            0 : 
     211            0 :             ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
     212              :             PrivateKeyDer::Pkcs8(
     213            0 :                 keys.pop()
     214            0 :                     .unwrap()
     215            0 :                     .context(format!("Failed to parse TLS keys at '{key_path}'"))?,
     216              :             )
     217              :         };
     218              : 
     219            0 :         let cert_chain_bytes = std::fs::read(cert_path)
     220            0 :             .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
     221              : 
     222            0 :         let cert_chain = {
     223            0 :             rustls_pemfile::certs(&mut &cert_chain_bytes[..])
     224            0 :                 .try_collect()
     225            0 :                 .with_context(|| {
     226            0 :                     format!("Failed to read TLS certificate chain from bytes from file at '{cert_path}'.")
     227            0 :                 })?
     228              :         };
     229              : 
     230            0 :         self.add_cert(priv_key, cert_chain, is_default)
     231            0 :     }
     232              : 
     233           42 :     pub fn add_cert(
     234           42 :         &mut self,
     235           42 :         priv_key: PrivateKeyDer<'static>,
     236           42 :         cert_chain: Vec<CertificateDer<'static>>,
     237           42 :         is_default: bool,
     238           42 :     ) -> anyhow::Result<()> {
     239           42 :         let key = sign::any_supported_type(&priv_key).context("invalid private key")?;
     240              : 
     241           42 :         let first_cert = &cert_chain[0];
     242           42 :         let tls_server_end_point = TlsServerEndPoint::new(first_cert)?;
     243           42 :         let pem = x509_parser::parse_x509_certificate(first_cert)
     244           42 :             .context("Failed to parse PEM object from cerficiate")?
     245              :             .1;
     246              : 
     247           42 :         let common_name = pem.subject().to_string();
     248              : 
     249              :         // We only use non-wildcard certificates in link proxy so it seems okay to treat them the same as
     250              :         // wildcard ones as we don't use SNI there. That treatment only affects certificate selection, so
     251              :         // verify-full will still check wildcard match. Old coding here just ignored non-wildcard common names
     252              :         // and passed None instead, which blows up number of cases downstream code should handle. Proper coding
     253              :         // here should better avoid Option for common_names, and do wildcard-based certificate selection instead
     254              :         // of cutting off '*.' parts.
     255           42 :         let common_name = if common_name.starts_with("CN=*.") {
     256            0 :             common_name.strip_prefix("CN=*.").map(|s| s.to_string())
     257              :         } else {
     258           42 :             common_name.strip_prefix("CN=").map(|s| s.to_string())
     259              :         }
     260           42 :         .context("Failed to parse common name from certificate")?;
     261              : 
     262           42 :         let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
     263           42 : 
     264           42 :         if is_default {
     265           42 :             self.default = Some((cert.clone(), tls_server_end_point));
     266           42 :         }
     267              : 
     268           42 :         self.certs.insert(common_name, (cert, tls_server_end_point));
     269           42 : 
     270           42 :         Ok(())
     271           42 :     }
     272              : 
     273           42 :     pub fn get_common_names(&self) -> HashSet<String> {
     274           42 :         self.certs.keys().map(|s| s.to_string()).collect()
     275           42 :     }
     276              : }
     277              : 
     278              : impl rustls::server::ResolvesServerCert for CertResolver {
     279            0 :     fn resolve(
     280            0 :         &self,
     281            0 :         client_hello: rustls::server::ClientHello,
     282            0 :     ) -> Option<Arc<rustls::sign::CertifiedKey>> {
     283            0 :         self.resolve(client_hello.server_name()).map(|x| x.0)
     284            0 :     }
     285              : }
     286              : 
     287              : impl CertResolver {
     288           40 :     pub fn resolve(
     289           40 :         &self,
     290           40 :         server_name: Option<&str>,
     291           40 :     ) -> Option<(Arc<rustls::sign::CertifiedKey>, TlsServerEndPoint)> {
     292              :         // loop here and cut off more and more subdomains until we find
     293              :         // a match to get a proper wildcard support. OTOH, we now do not
     294              :         // use nested domains, so keep this simple for now.
     295              :         //
     296              :         // With the current coding foo.com will match *.foo.com and that
     297              :         // repeats behavior of the old code.
     298           40 :         if let Some(mut sni_name) = server_name {
     299              :             loop {
     300           80 :                 if let Some(cert) = self.certs.get(sni_name) {
     301           40 :                     return Some(cert.clone());
     302           40 :                 }
     303           40 :                 if let Some((_, rest)) = sni_name.split_once('.') {
     304           40 :                     sni_name = rest;
     305           40 :                 } else {
     306            0 :                     return None;
     307              :                 }
     308              :             }
     309              :         } else {
     310              :             // No SNI, use the default certificate, otherwise we can't get to
     311              :             // options parameter which can be used to set endpoint name too.
     312              :             // That means that non-SNI flow will not work for CNAME domains in
     313              :             // verify-full mode.
     314              :             //
     315              :             // If that will be a problem we can:
     316              :             //
     317              :             // a) Instead of multi-cert approach use single cert with extra
     318              :             //    domains listed in Subject Alternative Name (SAN).
     319              :             // b) Deploy separate proxy instances for extra domains.
     320            0 :             self.default.as_ref().cloned()
     321              :         }
     322           40 :     }
     323              : }
     324              : 
     325              : #[derive(Debug)]
     326              : pub struct EndpointCacheConfig {
     327              :     /// Batch size to receive all endpoints on the startup.
     328              :     pub initial_batch_size: usize,
     329              :     /// Batch size to receive endpoints.
     330              :     pub default_batch_size: usize,
     331              :     /// Timeouts for the stream read operation.
     332              :     pub xread_timeout: Duration,
     333              :     /// Stream name to read from.
     334              :     pub stream_name: String,
     335              :     /// Limiter info (to distinguish when to enable cache).
     336              :     pub limiter_info: Vec<RateBucketInfo>,
     337              :     /// Disable cache.
     338              :     /// If true, cache is ignored, but reports all statistics.
     339              :     pub disable_cache: bool,
     340              :     /// Retry interval for the stream read operation.
     341              :     pub retry_interval: Duration,
     342              : }
     343              : 
     344              : impl EndpointCacheConfig {
     345              :     /// Default options for [`crate::console::provider::NodeInfoCache`].
     346              :     /// Notice that by default the limiter is empty, which means that cache is disabled.
     347              :     pub const CACHE_DEFAULT_OPTIONS: &'static str =
     348              :         "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
     349              : 
     350              :     /// Parse cache options passed via cmdline.
     351              :     /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
     352            0 :     fn parse(options: &str) -> anyhow::Result<Self> {
     353            0 :         let mut initial_batch_size = None;
     354            0 :         let mut default_batch_size = None;
     355            0 :         let mut xread_timeout = None;
     356            0 :         let mut stream_name = None;
     357            0 :         let mut limiter_info = vec![];
     358            0 :         let mut disable_cache = false;
     359            0 :         let mut retry_interval = None;
     360              : 
     361            0 :         for option in options.split(',') {
     362            0 :             let (key, value) = option
     363            0 :                 .split_once('=')
     364            0 :                 .with_context(|| format!("bad key-value pair: {option}"))?;
     365              : 
     366            0 :             match key {
     367            0 :                 "initial_batch_size" => initial_batch_size = Some(value.parse()?),
     368            0 :                 "default_batch_size" => default_batch_size = Some(value.parse()?),
     369            0 :                 "xread_timeout" => xread_timeout = Some(humantime::parse_duration(value)?),
     370            0 :                 "stream_name" => stream_name = Some(value.to_string()),
     371            0 :                 "limiter_info" => limiter_info.push(RateBucketInfo::from_str(value)?),
     372            0 :                 "disable_cache" => disable_cache = value.parse()?,
     373            0 :                 "retry_interval" => retry_interval = Some(humantime::parse_duration(value)?),
     374            0 :                 unknown => bail!("unknown key: {unknown}"),
     375              :             }
     376              :         }
     377            0 :         RateBucketInfo::validate(&mut limiter_info)?;
     378              : 
     379              :         Ok(Self {
     380            0 :             initial_batch_size: initial_batch_size.context("missing `initial_batch_size`")?,
     381            0 :             default_batch_size: default_batch_size.context("missing `default_batch_size`")?,
     382            0 :             xread_timeout: xread_timeout.context("missing `xread_timeout`")?,
     383            0 :             stream_name: stream_name.context("missing `stream_name`")?,
     384            0 :             disable_cache,
     385            0 :             limiter_info,
     386            0 :             retry_interval: retry_interval.context("missing `retry_interval`")?,
     387              :         })
     388            0 :     }
     389              : }
     390              : 
     391              : impl FromStr for EndpointCacheConfig {
     392              :     type Err = anyhow::Error;
     393              : 
     394            0 :     fn from_str(options: &str) -> Result<Self, Self::Err> {
     395            0 :         let error = || format!("failed to parse endpoint cache options '{options}'");
     396            0 :         Self::parse(options).with_context(error)
     397            0 :     }
     398              : }
     399              : #[derive(Debug)]
     400              : pub struct MetricBackupCollectionConfig {
     401              :     pub interval: Duration,
     402              :     pub remote_storage_config: OptRemoteStorageConfig,
     403              :     pub chunk_size: usize,
     404              : }
     405              : 
     406              : /// Hack to avoid clap being smarter. If you don't use this type alias, clap assumes more about the optional state and you get
     407              : /// runtime type errors from the value parser we use.
     408              : pub type OptRemoteStorageConfig = Option<RemoteStorageConfig>;
     409              : 
     410           24 : pub fn remote_storage_from_toml(s: &str) -> anyhow::Result<OptRemoteStorageConfig> {
     411           24 :     RemoteStorageConfig::from_toml(&s.parse()?)
     412           24 : }
     413              : 
     414              : /// Helper for cmdline cache options parsing.
     415              : #[derive(Debug)]
     416              : pub struct CacheOptions {
     417              :     /// Max number of entries.
     418              :     pub size: usize,
     419              :     /// Entry's time-to-live.
     420              :     pub ttl: Duration,
     421              : }
     422              : 
     423              : impl CacheOptions {
     424              :     /// Default options for [`crate::console::provider::NodeInfoCache`].
     425              :     pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
     426              : 
     427              :     /// Parse cache options passed via cmdline.
     428              :     /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
     429            8 :     fn parse(options: &str) -> anyhow::Result<Self> {
     430            8 :         let mut size = None;
     431            8 :         let mut ttl = None;
     432              : 
     433           14 :         for option in options.split(',') {
     434           14 :             let (key, value) = option
     435           14 :                 .split_once('=')
     436           14 :                 .with_context(|| format!("bad key-value pair: {option}"))?;
     437              : 
     438           14 :             match key {
     439           14 :                 "size" => size = Some(value.parse()?),
     440            6 :                 "ttl" => ttl = Some(humantime::parse_duration(value)?),
     441            0 :                 unknown => bail!("unknown key: {unknown}"),
     442              :             }
     443              :         }
     444              : 
     445              :         // TTL doesn't matter if cache is always empty.
     446            8 :         if let Some(0) = size {
     447            4 :             ttl.get_or_insert(Duration::default());
     448            4 :         }
     449              : 
     450              :         Ok(Self {
     451            8 :             size: size.context("missing `size`")?,
     452            8 :             ttl: ttl.context("missing `ttl`")?,
     453              :         })
     454            8 :     }
     455              : }
     456              : 
     457              : impl FromStr for CacheOptions {
     458              :     type Err = anyhow::Error;
     459              : 
     460            8 :     fn from_str(options: &str) -> Result<Self, Self::Err> {
     461            8 :         let error = || format!("failed to parse cache options '{options}'");
     462            8 :         Self::parse(options).with_context(error)
     463            8 :     }
     464              : }
     465              : 
     466              : /// Helper for cmdline cache options parsing.
     467              : #[derive(Debug)]
     468              : pub struct ProjectInfoCacheOptions {
     469              :     /// Max number of entries.
     470              :     pub size: usize,
     471              :     /// Entry's time-to-live.
     472              :     pub ttl: Duration,
     473              :     /// Max number of roles per endpoint.
     474              :     pub max_roles: usize,
     475              :     /// Gc interval.
     476              :     pub gc_interval: Duration,
     477              : }
     478              : 
     479              : impl ProjectInfoCacheOptions {
     480              :     /// Default options for [`crate::console::provider::NodeInfoCache`].
     481              :     pub const CACHE_DEFAULT_OPTIONS: &'static str =
     482              :         "size=10000,ttl=4m,max_roles=10,gc_interval=60m";
     483              : 
     484              :     /// Parse cache options passed via cmdline.
     485              :     /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
     486            0 :     fn parse(options: &str) -> anyhow::Result<Self> {
     487            0 :         let mut size = None;
     488            0 :         let mut ttl = None;
     489            0 :         let mut max_roles = None;
     490            0 :         let mut gc_interval = None;
     491              : 
     492            0 :         for option in options.split(',') {
     493            0 :             let (key, value) = option
     494            0 :                 .split_once('=')
     495            0 :                 .with_context(|| format!("bad key-value pair: {option}"))?;
     496              : 
     497            0 :             match key {
     498            0 :                 "size" => size = Some(value.parse()?),
     499            0 :                 "ttl" => ttl = Some(humantime::parse_duration(value)?),
     500            0 :                 "max_roles" => max_roles = Some(value.parse()?),
     501            0 :                 "gc_interval" => gc_interval = Some(humantime::parse_duration(value)?),
     502            0 :                 unknown => bail!("unknown key: {unknown}"),
     503              :             }
     504              :         }
     505              : 
     506              :         // TTL doesn't matter if cache is always empty.
     507            0 :         if let Some(0) = size {
     508            0 :             ttl.get_or_insert(Duration::default());
     509            0 :         }
     510              : 
     511              :         Ok(Self {
     512            0 :             size: size.context("missing `size`")?,
     513            0 :             ttl: ttl.context("missing `ttl`")?,
     514            0 :             max_roles: max_roles.context("missing `max_roles`")?,
     515            0 :             gc_interval: gc_interval.context("missing `gc_interval`")?,
     516              :         })
     517            0 :     }
     518              : }
     519              : 
     520              : impl FromStr for ProjectInfoCacheOptions {
     521              :     type Err = anyhow::Error;
     522              : 
     523            0 :     fn from_str(options: &str) -> Result<Self, Self::Err> {
     524            0 :         let error = || format!("failed to parse cache options '{options}'");
     525            0 :         Self::parse(options).with_context(error)
     526            0 :     }
     527              : }
     528              : 
     529              : /// This is a config for connect to compute and wake compute.
     530              : #[derive(Clone, Copy, Debug)]
     531              : pub struct RetryConfig {
     532              :     /// Number of times we should retry.
     533              :     pub max_retries: u32,
     534              :     /// Retry duration is base_delay * backoff_factor ^ n, where n starts at 0
     535              :     pub base_delay: tokio::time::Duration,
     536              :     /// Exponential base for retry wait duration
     537              :     pub backoff_factor: f64,
     538              : }
     539              : 
     540              : impl RetryConfig {
     541              :     /// Default options for RetryConfig.
     542              : 
     543              :     /// Total delay for 5 retries with 200ms base delay and 2 backoff factor is about 6s.
     544              :     pub const CONNECT_TO_COMPUTE_DEFAULT_VALUES: &'static str =
     545              :         "num_retries=5,base_retry_wait_duration=200ms,retry_wait_exponent_base=2";
     546              :     /// Total delay for 8 retries with 100ms base delay and 1.6 backoff factor is about 7s.
     547              :     /// Cplane has timeout of 60s on each request. 8m7s in total.
     548              :     pub const WAKE_COMPUTE_DEFAULT_VALUES: &'static str =
     549              :         "num_retries=8,base_retry_wait_duration=100ms,retry_wait_exponent_base=1.6";
     550              : 
     551              :     /// Parse retry options passed via cmdline.
     552              :     /// Example: [`Self::CONNECT_TO_COMPUTE_DEFAULT_VALUES`].
     553            0 :     pub fn parse(options: &str) -> anyhow::Result<Self> {
     554            0 :         let mut num_retries = None;
     555            0 :         let mut base_retry_wait_duration = None;
     556            0 :         let mut retry_wait_exponent_base = None;
     557              : 
     558            0 :         for option in options.split(',') {
     559            0 :             let (key, value) = option
     560            0 :                 .split_once('=')
     561            0 :                 .with_context(|| format!("bad key-value pair: {option}"))?;
     562              : 
     563            0 :             match key {
     564            0 :                 "num_retries" => num_retries = Some(value.parse()?),
     565            0 :                 "base_retry_wait_duration" => {
     566            0 :                     base_retry_wait_duration = Some(humantime::parse_duration(value)?)
     567              :                 }
     568            0 :                 "retry_wait_exponent_base" => retry_wait_exponent_base = Some(value.parse()?),
     569            0 :                 unknown => bail!("unknown key: {unknown}"),
     570              :             }
     571              :         }
     572              : 
     573              :         Ok(Self {
     574            0 :             max_retries: num_retries.context("missing `num_retries`")?,
     575            0 :             base_delay: base_retry_wait_duration.context("missing `base_retry_wait_duration`")?,
     576            0 :             backoff_factor: retry_wait_exponent_base
     577            0 :                 .context("missing `retry_wait_exponent_base`")?,
     578              :         })
     579            0 :     }
     580              : }
     581              : 
     582              : /// Helper for cmdline cache options parsing.
     583           16 : #[derive(serde::Deserialize)]
     584              : pub struct ConcurrencyLockOptions {
     585              :     /// The number of shards the lock map should have
     586              :     pub shards: usize,
     587              :     /// The number of allowed concurrent requests for each endpoitn
     588              :     #[serde(flatten)]
     589              :     pub limiter: RateLimiterConfig,
     590              :     /// Garbage collection epoch
     591              :     #[serde(deserialize_with = "humantime_serde::deserialize")]
     592              :     pub epoch: Duration,
     593              :     /// Lock timeout
     594              :     #[serde(deserialize_with = "humantime_serde::deserialize")]
     595              :     pub timeout: Duration,
     596              : }
     597              : 
     598              : impl ConcurrencyLockOptions {
     599              :     /// Default options for [`crate::console::provider::ApiLocks`].
     600              :     pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
     601              :     /// Default options for [`crate::console::provider::ApiLocks`].
     602              :     pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
     603              :         "shards=64,permits=100,epoch=10m,timeout=10ms";
     604              : 
     605              :     // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "shards=32,permits=4,epoch=10m,timeout=1s";
     606              : 
     607              :     /// Parse lock options passed via cmdline.
     608              :     /// Example: [`Self::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK`].
     609            8 :     fn parse(options: &str) -> anyhow::Result<Self> {
     610            8 :         let options = options.trim();
     611            8 :         if options.starts_with('{') && options.ends_with('}') {
     612            2 :             return Ok(serde_json::from_str(options)?);
     613            6 :         }
     614            6 : 
     615            6 :         let mut shards = None;
     616            6 :         let mut permits = None;
     617            6 :         let mut epoch = None;
     618            6 :         let mut timeout = None;
     619              : 
     620           18 :         for option in options.split(',') {
     621           18 :             let (key, value) = option
     622           18 :                 .split_once('=')
     623           18 :                 .with_context(|| format!("bad key-value pair: {option}"))?;
     624              : 
     625           18 :             match key {
     626           18 :                 "shards" => shards = Some(value.parse()?),
     627           14 :                 "permits" => permits = Some(value.parse()?),
     628            8 :                 "epoch" => epoch = Some(humantime::parse_duration(value)?),
     629            4 :                 "timeout" => timeout = Some(humantime::parse_duration(value)?),
     630            0 :                 unknown => bail!("unknown key: {unknown}"),
     631              :             }
     632              :         }
     633              : 
     634              :         // these dont matter if lock is disabled
     635            6 :         if let Some(0) = permits {
     636            2 :             timeout = Some(Duration::default());
     637            2 :             epoch = Some(Duration::default());
     638            2 :             shards = Some(2);
     639            4 :         }
     640              : 
     641            6 :         let permits = permits.context("missing `permits`")?;
     642            6 :         let out = Self {
     643            6 :             shards: shards.context("missing `shards`")?,
     644            6 :             limiter: RateLimiterConfig {
     645            6 :                 algorithm: RateLimitAlgorithm::Fixed,
     646            6 :                 initial_limit: permits,
     647            6 :             },
     648            6 :             epoch: epoch.context("missing `epoch`")?,
     649            6 :             timeout: timeout.context("missing `timeout`")?,
     650              :         };
     651              : 
     652            6 :         ensure!(out.shards > 1, "shard count must be > 1");
     653            6 :         ensure!(
     654            6 :             out.shards.is_power_of_two(),
     655            0 :             "shard count must be a power of two"
     656              :         );
     657              : 
     658            6 :         Ok(out)
     659            8 :     }
     660              : }
     661              : 
     662              : impl FromStr for ConcurrencyLockOptions {
     663              :     type Err = anyhow::Error;
     664              : 
     665            8 :     fn from_str(options: &str) -> Result<Self, Self::Err> {
     666            8 :         let error = || format!("failed to parse cache lock options '{options}'");
     667            8 :         Self::parse(options).with_context(error)
     668            8 :     }
     669              : }
     670              : 
     671              : #[cfg(test)]
     672              : mod tests {
     673              :     use crate::rate_limiter::Aimd;
     674              : 
     675              :     use super::*;
     676              : 
     677              :     #[test]
     678            2 :     fn test_parse_cache_options() -> anyhow::Result<()> {
     679            2 :         let CacheOptions { size, ttl } = "size=4096,ttl=5min".parse()?;
     680            2 :         assert_eq!(size, 4096);
     681            2 :         assert_eq!(ttl, Duration::from_secs(5 * 60));
     682              : 
     683            2 :         let CacheOptions { size, ttl } = "ttl=4m,size=2".parse()?;
     684            2 :         assert_eq!(size, 2);
     685            2 :         assert_eq!(ttl, Duration::from_secs(4 * 60));
     686              : 
     687            2 :         let CacheOptions { size, ttl } = "size=0,ttl=1s".parse()?;
     688            2 :         assert_eq!(size, 0);
     689            2 :         assert_eq!(ttl, Duration::from_secs(1));
     690              : 
     691            2 :         let CacheOptions { size, ttl } = "size=0".parse()?;
     692            2 :         assert_eq!(size, 0);
     693            2 :         assert_eq!(ttl, Duration::default());
     694              : 
     695            2 :         Ok(())
     696            2 :     }
     697              : 
     698              :     #[test]
     699            2 :     fn test_parse_lock_options() -> anyhow::Result<()> {
     700              :         let ConcurrencyLockOptions {
     701            2 :             epoch,
     702            2 :             limiter,
     703            2 :             shards,
     704            2 :             timeout,
     705            2 :         } = "shards=32,permits=4,epoch=10m,timeout=1s".parse()?;
     706            2 :         assert_eq!(epoch, Duration::from_secs(10 * 60));
     707            2 :         assert_eq!(timeout, Duration::from_secs(1));
     708            2 :         assert_eq!(shards, 32);
     709            2 :         assert_eq!(limiter.initial_limit, 4);
     710            2 :         assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
     711              : 
     712              :         let ConcurrencyLockOptions {
     713            2 :             epoch,
     714            2 :             limiter,
     715            2 :             shards,
     716            2 :             timeout,
     717            2 :         } = "epoch=60s,shards=16,timeout=100ms,permits=8".parse()?;
     718            2 :         assert_eq!(epoch, Duration::from_secs(60));
     719            2 :         assert_eq!(timeout, Duration::from_millis(100));
     720            2 :         assert_eq!(shards, 16);
     721            2 :         assert_eq!(limiter.initial_limit, 8);
     722            2 :         assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
     723              : 
     724              :         let ConcurrencyLockOptions {
     725            2 :             epoch,
     726            2 :             limiter,
     727            2 :             shards,
     728            2 :             timeout,
     729            2 :         } = "permits=0".parse()?;
     730            2 :         assert_eq!(epoch, Duration::ZERO);
     731            2 :         assert_eq!(timeout, Duration::ZERO);
     732            2 :         assert_eq!(shards, 2);
     733            2 :         assert_eq!(limiter.initial_limit, 0);
     734            2 :         assert_eq!(limiter.algorithm, RateLimitAlgorithm::Fixed);
     735              : 
     736            2 :         Ok(())
     737            2 :     }
     738              : 
     739              :     #[test]
     740            2 :     fn test_parse_json_lock_options() -> anyhow::Result<()> {
     741              :         let ConcurrencyLockOptions {
     742            2 :             epoch,
     743            2 :             limiter,
     744            2 :             shards,
     745            2 :             timeout,
     746            2 :         } = r#"{"shards":32,"initial_limit":44,"aimd":{"min":5,"max":500,"inc":10,"dec":0.9,"utilisation":0.8},"epoch":"10m","timeout":"1s"}"#
     747            2 :             .parse()?;
     748            2 :         assert_eq!(epoch, Duration::from_secs(10 * 60));
     749            2 :         assert_eq!(timeout, Duration::from_secs(1));
     750            2 :         assert_eq!(shards, 32);
     751            2 :         assert_eq!(limiter.initial_limit, 44);
     752            2 :         assert_eq!(
     753            2 :             limiter.algorithm,
     754            2 :             RateLimitAlgorithm::Aimd {
     755            2 :                 conf: Aimd {
     756            2 :                     min: 5,
     757            2 :                     max: 500,
     758            2 :                     dec: 0.9,
     759            2 :                     inc: 10,
     760            2 :                     utilisation: 0.8
     761            2 :                 }
     762            2 :             },
     763            2 :         );
     764              : 
     765            2 :         Ok(())
     766            2 :     }
     767              : }
        

Generated by: LCOV version 2.1-beta