LCOV - differential code coverage report
Current view: top level - proxy/src/bin - proxy.rs (source / functions) Coverage Total Hit UIC UBC GIC CBC EUB ECB
Current: cd44433dd675caa99df17a61b18949c8387e2242.info Lines: 85.6 % 236 202 34 202
Current Date: 2024-01-09 02:06:09 Functions: 29.3 % 99 29 2 68 2 27 2 2
Baseline: 66c52a629a0f4a503e193045e0df4c77139e344b.info
Baseline Date: 2024-01-08 15:34:46

           TLA  Line data    Source code
       1                 : use futures::future::Either;
       2                 : use proxy::auth;
       3                 : use proxy::config::AuthenticationConfig;
       4                 : use proxy::config::CacheOptions;
       5                 : use proxy::config::HttpConfig;
       6                 : use proxy::console;
       7                 : use proxy::console::provider::AllowedIpsCache;
       8                 : use proxy::console::provider::NodeInfoCache;
       9                 : use proxy::console::provider::RoleSecretCache;
      10                 : use proxy::context::parquet::ParquetUploadArgs;
      11                 : use proxy::http;
      12                 : use proxy::rate_limiter::EndpointRateLimiter;
      13                 : use proxy::rate_limiter::RateBucketInfo;
      14                 : use proxy::rate_limiter::RateLimiterConfig;
      15                 : use proxy::serverless::GlobalConnPoolOptions;
      16                 : use proxy::usage_metrics;
      17                 : 
      18                 : use anyhow::bail;
      19                 : use proxy::config::{self, ProxyConfig};
      20                 : use proxy::serverless;
      21                 : use std::pin::pin;
      22                 : use std::sync::Arc;
      23                 : use std::{borrow::Cow, net::SocketAddr};
      24                 : use tokio::net::TcpListener;
      25                 : use tokio::task::JoinSet;
      26                 : use tokio_util::sync::CancellationToken;
      27                 : use tracing::info;
      28                 : use tracing::warn;
      29                 : use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
      30                 : 
      31                 : project_git_version!(GIT_VERSION);
      32                 : project_build_tag!(BUILD_TAG);
      33                 : 
      34                 : use clap::{Parser, ValueEnum};
      35                 : 
      36 CBC         210 : #[derive(Clone, Debug, ValueEnum)]
      37                 : enum AuthBackend {
      38                 :     Console,
      39                 :     #[cfg(feature = "testing")]
      40                 :     Postgres,
      41                 :     Link,
      42                 : }
      43                 : 
      44                 : /// Neon proxy/router
      45              23 : #[derive(Parser)]
      46                 : #[command(version = GIT_VERSION, about)]
      47                 : struct ProxyCliArgs {
      48                 :     /// Name of the region this proxy is deployed in
      49              23 :     #[clap(long, default_value_t = String::new())]
      50 UBC           0 :     region: String,
      51                 :     /// listen for incoming client connections on ip:port
      52                 :     #[clap(short, long, default_value = "127.0.0.1:4432")]
      53               0 :     proxy: String,
      54 CBC          23 :     #[clap(value_enum, long, default_value_t = AuthBackend::Link)]
      55 UBC           0 :     auth_backend: AuthBackend,
      56                 :     /// listen for management callback connection on ip:port
      57                 :     #[clap(short, long, default_value = "127.0.0.1:7000")]
      58               0 :     mgmt: String,
      59                 :     /// listen for incoming http connections (metrics, etc) on ip:port
      60                 :     #[clap(long, default_value = "127.0.0.1:7001")]
      61               0 :     http: String,
      62                 :     /// listen for incoming wss connections on ip:port
      63                 :     #[clap(long)]
      64                 :     wss: Option<String>,
      65                 :     /// redirect unauthenticated users to the given uri in case of link auth
      66                 :     #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
      67               0 :     uri: String,
      68                 :     /// cloud API endpoint for authenticating users
      69                 :     #[clap(
      70                 :         short,
      71                 :         long,
      72                 :         default_value = "http://localhost:3000/authenticate_proxy_request/"
      73                 :     )]
      74               0 :     auth_endpoint: String,
      75                 :     /// path to TLS key for client postgres connections
      76                 :     ///
      77                 :     /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
      78                 :     #[clap(short = 'k', long, alias = "ssl-key")]
      79                 :     tls_key: Option<String>,
      80                 :     /// path to TLS cert for client postgres connections
      81                 :     ///
      82                 :     /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
      83                 :     #[clap(short = 'c', long, alias = "ssl-cert")]
      84                 :     tls_cert: Option<String>,
      85                 :     /// path to directory with TLS certificates for client postgres connections
      86                 :     #[clap(long)]
      87                 :     certs_dir: Option<String>,
      88                 :     /// http endpoint to receive periodic metric updates
      89                 :     #[clap(long)]
      90                 :     metric_collection_endpoint: Option<String>,
      91                 :     /// how often metrics should be sent to a collection endpoint
      92                 :     #[clap(long)]
      93                 :     metric_collection_interval: Option<String>,
      94                 :     /// cache for `wake_compute` api method (use `size=0` to disable)
      95                 :     #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
      96               0 :     wake_compute_cache: String,
      97                 :     /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
      98                 :     #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
      99               0 :     wake_compute_lock: String,
     100                 :     /// Allow self-signed certificates for compute nodes (for testing)
     101 CBC          23 :     #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     102 UBC           0 :     allow_self_signed_compute: bool,
     103                 :     #[clap(flatten)]
     104                 :     sql_over_http: SqlOverHttpArgs,
     105                 :     /// timeout for scram authentication protocol
     106                 :     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     107               0 :     scram_protocol_timeout: tokio::time::Duration,
     108                 :     /// Require that all incoming requests have a Proxy Protocol V2 packet **and** have an IP address associated.
     109 CBC          23 :     #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     110 UBC           0 :     require_client_ip: bool,
     111                 :     /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
     112 CBC          23 :     #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     113 UBC           0 :     disable_dynamic_rate_limiter: bool,
     114                 :     /// Rate limit algorithm. Makes sense only if `disable_rate_limiter` is `false`.
     115 CBC          23 :     #[clap(value_enum, long, default_value_t = proxy::rate_limiter::RateLimitAlgorithm::Aimd)]
     116 UBC           0 :     rate_limit_algorithm: proxy::rate_limiter::RateLimitAlgorithm,
     117                 :     /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
     118                 :     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     119               0 :     rate_limiter_timeout: tokio::time::Duration,
     120                 :     /// Endpoint rate limiter max number of requests per second.
     121                 :     ///
     122                 :     /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
     123                 :     /// Can be given multiple times for different bucket sizes.
     124 CBC         115 :     #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
     125              23 :     endpoint_rps_limit: Vec<RateBucketInfo>,
     126                 :     /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
     127              23 :     #[clap(long, default_value_t = 100)]
     128 UBC           0 :     initial_limit: usize,
     129                 :     #[clap(flatten)]
     130                 :     aimd_config: proxy::rate_limiter::AimdConfig,
     131                 :     /// cache for `allowed_ips` (use `size=0` to disable)
     132                 :     #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
     133               0 :     allowed_ips_cache: String,
     134                 :     /// cache for `role_secret` (use `size=0` to disable)
     135                 :     #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
     136               0 :     role_secret_cache: String,
     137                 :     /// disable ip check for http requests. If it is too time consuming, it could be turned off.
     138 CBC          23 :     #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     139 UBC           0 :     disable_ip_check_for_http: bool,
     140                 : 
     141                 :     #[clap(flatten)]
     142                 :     parquet_upload: ParquetUploadArgs,
     143                 : }
     144                 : 
     145 CBC          23 : #[derive(clap::Args, Clone, Copy, Debug)]
     146                 : struct SqlOverHttpArgs {
     147                 :     /// timeout for http connection requests
     148                 :     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     149 UBC           0 :     sql_over_http_timeout: tokio::time::Duration,
     150                 : 
     151                 :     /// Whether the SQL over http pool is opt-in
     152 CBC          23 :     #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     153 UBC           0 :     sql_over_http_pool_opt_in: bool,
     154                 : 
     155                 :     /// How many connections to pool for each endpoint. Excess connections are discarded
     156 CBC          23 :     #[clap(long, default_value_t = 20)]
     157 UBC           0 :     sql_over_http_pool_max_conns_per_endpoint: usize,
     158                 : 
     159                 :     /// How long pooled connections should remain idle for before closing
     160                 :     #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
     161               0 :     sql_over_http_idle_timeout: tokio::time::Duration,
     162                 : 
     163                 :     /// Duration each shard will wait on average before a GC sweep.
     164                 :     /// A longer time will causes sweeps to take longer but will interfere less frequently.
     165                 :     #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
     166               0 :     sql_over_http_pool_gc_epoch: tokio::time::Duration,
     167                 : 
     168                 :     /// How many shards should the global pool have. Must be a power of two.
     169                 :     /// More shards will introduce less contention for pool operations, but can
     170                 :     /// increase memory used by the pool
     171 CBC          23 :     #[clap(long, default_value_t = 128)]
     172 UBC           0 :     sql_over_http_pool_shards: usize,
     173                 : }
     174                 : 
     175                 : #[tokio::main]
     176 CBC          22 : async fn main() -> anyhow::Result<()> {
     177              22 :     let _logging_guard = proxy::logging::init().await?;
     178              22 :     let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
     179              22 :     let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
     180              22 : 
     181              22 :     info!("Version: {GIT_VERSION}");
     182              22 :     info!("Build_tag: {BUILD_TAG}");
     183              22 :     ::metrics::set_build_info_metric(GIT_VERSION, BUILD_TAG);
     184              22 : 
     185              22 :     let args = ProxyCliArgs::parse();
     186              22 :     let config = build_config(&args)?;
     187                 : 
     188              22 :     info!("Authentication backend: {}", config.auth_backend);
     189                 : 
     190                 :     // Check that we can bind to address before further initialization
     191              22 :     let http_address: SocketAddr = args.http.parse()?;
     192              22 :     info!("Starting http on {http_address}");
     193              22 :     let http_listener = TcpListener::bind(http_address).await?.into_std()?;
     194                 : 
     195              22 :     let mgmt_address: SocketAddr = args.mgmt.parse()?;
     196              22 :     info!("Starting mgmt on {mgmt_address}");
     197              22 :     let mgmt_listener = TcpListener::bind(mgmt_address).await?;
     198                 : 
     199              22 :     let proxy_address: SocketAddr = args.proxy.parse()?;
     200              22 :     info!("Starting proxy on {proxy_address}");
     201              22 :     let proxy_listener = TcpListener::bind(proxy_address).await?;
     202              22 :     let cancellation_token = CancellationToken::new();
     203              22 : 
     204              22 :     let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
     205              22 : 
     206              22 :     // client facing tasks. these will exit on error or on cancellation
     207              22 :     // cancellation returns Ok(())
     208              22 :     let mut client_tasks = JoinSet::new();
     209              22 :     client_tasks.spawn(proxy::proxy::task_main(
     210              22 :         config,
     211              22 :         proxy_listener,
     212              22 :         cancellation_token.clone(),
     213              22 :         endpoint_rate_limiter.clone(),
     214              22 :     ));
     215                 : 
     216                 :     // TODO: rename the argument to something like serverless.
     217                 :     // It now covers more than just websockets, it also covers SQL over HTTP.
     218              22 :     if let Some(serverless_address) = args.wss {
     219              22 :         let serverless_address: SocketAddr = serverless_address.parse()?;
     220              22 :         info!("Starting wss on {serverless_address}");
     221              22 :         let serverless_listener = TcpListener::bind(serverless_address).await?;
     222                 : 
     223              22 :         client_tasks.spawn(serverless::task_main(
     224              22 :             config,
     225              22 :             serverless_listener,
     226              22 :             cancellation_token.clone(),
     227              22 :             endpoint_rate_limiter.clone(),
     228              22 :         ));
     229 UBC           0 :     }
     230                 : 
     231 CBC          22 :     client_tasks.spawn(proxy::context::parquet::worker(
     232              22 :         cancellation_token.clone(),
     233              22 :         args.parquet_upload,
     234              22 :     ));
     235              22 : 
     236              22 :     // maintenance tasks. these never return unless there's an error
     237              22 :     let mut maintenance_tasks = JoinSet::new();
     238              22 :     maintenance_tasks.spawn(proxy::handle_signals(cancellation_token));
     239              22 :     maintenance_tasks.spawn(http::health_server::task_main(http_listener));
     240              22 :     maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
     241                 : 
     242              22 :     if let Some(metrics_config) = &config.metric_collection {
     243               1 :         maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
     244              21 :     }
     245                 : 
     246                 :     let maintenance = loop {
     247                 :         // get one complete task
     248              88 :         match futures::future::select(
     249              88 :             pin!(maintenance_tasks.join_next()),
     250              88 :             pin!(client_tasks.join_next()),
     251              88 :         )
     252              51 :         .await
     253                 :         {
     254                 :             // exit immediately on maintenance task completion
     255 UBC           0 :             Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
     256                 :             // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
     257               0 :             Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
     258                 :             // exit immediately on client task error
     259 CBC          66 :             Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
     260                 :             // exit if all our client tasks have shutdown gracefully
     261              22 :             Either::Right((None, _)) => return Ok(()),
     262                 :         }
     263                 :     };
     264                 : 
     265                 :     // maintenance tasks return Infallible success values, this is an impossible value
     266                 :     // so this match statically ensures that there are no possibilities for that value
     267                 :     match maintenance {}
     268                 : }
     269                 : 
     270                 : /// ProxyConfig is created at proxy startup, and lives forever.
     271              22 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
     272              22 :     let tls_config = match (&args.tls_key, &args.tls_cert) {
     273              22 :         (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
     274              22 :             key_path,
     275              22 :             cert_path,
     276              22 :             args.certs_dir.as_ref(),
     277              22 :         )?),
     278 UBC           0 :         (None, None) => None,
     279               0 :         _ => bail!("either both or neither tls-key and tls-cert must be specified"),
     280                 :     };
     281                 : 
     282 CBC          22 :     if args.allow_self_signed_compute {
     283               3 :         warn!("allowing self-signed compute certificates");
     284              19 :     }
     285                 : 
     286              22 :     let metric_collection = match (
     287              22 :         &args.metric_collection_endpoint,
     288              22 :         &args.metric_collection_interval,
     289                 :     ) {
     290               1 :         (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
     291               1 :             endpoint: endpoint.parse()?,
     292               1 :             interval: humantime::parse_duration(interval)?,
     293                 :         }),
     294              21 :         (None, None) => None,
     295 UBC           0 :         _ => bail!(
     296               0 :             "either both or neither metric-collection-endpoint \
     297               0 :              and metric-collection-interval must be specified"
     298               0 :         ),
     299                 :     };
     300 CBC          22 :     let rate_limiter_config = RateLimiterConfig {
     301              22 :         disable: args.disable_dynamic_rate_limiter,
     302              22 :         algorithm: args.rate_limit_algorithm,
     303              22 :         timeout: args.rate_limiter_timeout,
     304              22 :         initial_limit: args.initial_limit,
     305              22 :         aimd_config: Some(args.aimd_config),
     306              22 :     };
     307                 : 
     308              22 :     let auth_backend = match &args.auth_backend {
     309                 :         AuthBackend::Console => {
     310               1 :             let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
     311               1 :             let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?;
     312               1 :             let role_secret_cache_config: CacheOptions = args.role_secret_cache.parse()?;
     313                 : 
     314               1 :             info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
     315               1 :             info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}");
     316               1 :             info!("Using RoleSecretCache (wake_compute) with options={role_secret_cache_config:?}");
     317               1 :             let caches = Box::leak(Box::new(console::caches::ApiCaches {
     318               1 :                 node_info: NodeInfoCache::new(
     319               1 :                     "node_info_cache",
     320               1 :                     wake_compute_cache_config.size,
     321               1 :                     wake_compute_cache_config.ttl,
     322               1 :                     true,
     323               1 :                 ),
     324               1 :                 allowed_ips: AllowedIpsCache::new(
     325               1 :                     "allowed_ips_cache",
     326               1 :                     allowed_ips_cache_config.size,
     327               1 :                     allowed_ips_cache_config.ttl,
     328               1 :                     false,
     329               1 :                 ),
     330               1 :                 role_secret: RoleSecretCache::new(
     331               1 :                     "role_secret_cache",
     332               1 :                     role_secret_cache_config.size,
     333               1 :                     role_secret_cache_config.ttl,
     334               1 :                     false,
     335               1 :                 ),
     336               1 :             }));
     337                 : 
     338                 :             let config::WakeComputeLockOptions {
     339               1 :                 shards,
     340               1 :                 permits,
     341               1 :                 epoch,
     342               1 :                 timeout,
     343               1 :             } = args.wake_compute_lock.parse()?;
     344               1 :             info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
     345               1 :             let locks = Box::leak(Box::new(
     346               1 :                 console::locks::ApiLocks::new("wake_compute_lock", permits, shards, timeout)
     347               1 :                     .unwrap(),
     348               1 :             ));
     349               1 :             tokio::spawn(locks.garbage_collect_worker(epoch));
     350                 : 
     351               1 :             let url = args.auth_endpoint.parse()?;
     352               1 :             let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));
     353               1 : 
     354               1 :             let api = console::provider::neon::Api::new(endpoint, caches, locks);
     355               1 :             auth::BackendType::Console(Cow::Owned(api), ())
     356                 :         }
     357                 :         #[cfg(feature = "testing")]
     358                 :         AuthBackend::Postgres => {
     359              18 :             let url = args.auth_endpoint.parse()?;
     360              18 :             let api = console::provider::mock::Api::new(url);
     361              18 :             auth::BackendType::Postgres(Cow::Owned(api), ())
     362                 :         }
     363                 :         AuthBackend::Link => {
     364               3 :             let url = args.uri.parse()?;
     365               3 :             auth::BackendType::Link(Cow::Owned(url))
     366                 :         }
     367                 :     };
     368              22 :     let http_config = HttpConfig {
     369              22 :         request_timeout: args.sql_over_http.sql_over_http_timeout,
     370              22 :         pool_options: GlobalConnPoolOptions {
     371              22 :             max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
     372              22 :             gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
     373              22 :             pool_shards: args.sql_over_http.sql_over_http_pool_shards,
     374              22 :             idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
     375              22 :             opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
     376              22 :         },
     377              22 :     };
     378              22 :     let authentication_config = AuthenticationConfig {
     379              22 :         scram_protocol_timeout: args.scram_protocol_timeout,
     380              22 :     };
     381              22 : 
     382              22 :     let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
     383              22 :     RateBucketInfo::validate(&mut endpoint_rps_limit)?;
     384                 : 
     385              22 :     let config = Box::leak(Box::new(ProxyConfig {
     386              22 :         tls_config,
     387              22 :         auth_backend,
     388              22 :         metric_collection,
     389              22 :         allow_self_signed_compute: args.allow_self_signed_compute,
     390              22 :         http_config,
     391              22 :         authentication_config,
     392              22 :         require_client_ip: args.require_client_ip,
     393              22 :         disable_ip_check_for_http: args.disable_ip_check_for_http,
     394              22 :         endpoint_rps_limit,
     395              22 :         // TODO: add this argument
     396              22 :         region: args.region.clone(),
     397              22 :     }));
     398              22 : 
     399              22 :     Ok(config)
     400              22 : }
     401                 : 
     402                 : #[cfg(test)]
     403                 : mod tests {
     404                 :     use std::time::Duration;
     405                 : 
     406                 :     use clap::Parser;
     407                 :     use proxy::rate_limiter::RateBucketInfo;
     408                 : 
     409               1 :     #[test]
     410               1 :     fn parse_endpoint_rps_limit() {
     411               1 :         let config = super::ProxyCliArgs::parse_from([
     412               1 :             "proxy",
     413               1 :             "--endpoint-rps-limit",
     414               1 :             "100@1s",
     415               1 :             "--endpoint-rps-limit",
     416               1 :             "20@30s",
     417               1 :         ]);
     418               1 : 
     419               1 :         assert_eq!(
     420               1 :             config.endpoint_rps_limit,
     421               1 :             vec![
     422               1 :                 RateBucketInfo::new(100, Duration::from_secs(1)),
     423               1 :                 RateBucketInfo::new(20, Duration::from_secs(30)),
     424               1 :             ]
     425               1 :         );
     426               1 :     }
     427                 : }
        

Generated by: LCOV version 2.1-beta