Line data Source code
1 : use aws_config::environment::EnvironmentVariableCredentialsProvider;
2 : use aws_config::imds::credentials::ImdsCredentialsProvider;
3 : use aws_config::meta::credentials::CredentialsProviderChain;
4 : use aws_config::meta::region::RegionProviderChain;
5 : use aws_config::profile::ProfileFileCredentialsProvider;
6 : use aws_config::provider_config::ProviderConfig;
7 : use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
8 : use futures::future::Either;
9 : use proxy::auth;
10 : use proxy::auth::backend::AuthRateLimiter;
11 : use proxy::auth::backend::MaybeOwned;
12 : use proxy::cancellation::CancelMap;
13 : use proxy::cancellation::CancellationHandler;
14 : use proxy::config::remote_storage_from_toml;
15 : use proxy::config::AuthenticationConfig;
16 : use proxy::config::CacheOptions;
17 : use proxy::config::HttpConfig;
18 : use proxy::config::ProjectInfoCacheOptions;
19 : use proxy::console;
20 : use proxy::context::parquet::ParquetUploadArgs;
21 : use proxy::http;
22 : use proxy::http::health_server::AppMetrics;
23 : use proxy::metrics::Metrics;
24 : use proxy::rate_limiter::EndpointRateLimiter;
25 : use proxy::rate_limiter::RateBucketInfo;
26 : use proxy::redis::cancellation_publisher::RedisPublisherClient;
27 : use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
28 : use proxy::redis::elasticache;
29 : use proxy::redis::notifications;
30 : use proxy::scram::threadpool::ThreadPool;
31 : use proxy::serverless::cancel_set::CancelSet;
32 : use proxy::serverless::GlobalConnPoolOptions;
33 : use proxy::usage_metrics;
34 :
35 : use anyhow::bail;
36 : use proxy::config::{self, ProxyConfig};
37 : use proxy::serverless;
38 : use std::net::SocketAddr;
39 : use std::pin::pin;
40 : use std::sync::Arc;
41 : use tokio::net::TcpListener;
42 : use tokio::sync::Mutex;
43 : use tokio::task::JoinSet;
44 : use tokio_util::sync::CancellationToken;
45 : use tracing::info;
46 : use tracing::warn;
47 : use tracing::Instrument;
48 : use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
49 :
50 : project_git_version!(GIT_VERSION);
51 : project_build_tag!(BUILD_TAG);
52 :
53 : use clap::{Parser, ValueEnum};
54 :
55 : #[global_allocator]
56 : static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
57 :
58 20 : #[derive(Clone, Debug, ValueEnum)]
59 : enum AuthBackend {
60 : Console,
61 : #[cfg(feature = "testing")]
62 : Postgres,
63 : Link,
64 : }
65 :
66 : /// Neon proxy/router
67 4 : #[derive(Parser)]
68 : #[command(version = GIT_VERSION, about)]
69 : struct ProxyCliArgs {
70 : /// Name of the region this proxy is deployed in
71 2 : #[clap(long, default_value_t = String::new())]
72 0 : region: String,
73 : /// listen for incoming client connections on ip:port
74 : #[clap(short, long, default_value = "127.0.0.1:4432")]
75 0 : proxy: String,
76 2 : #[clap(value_enum, long, default_value_t = AuthBackend::Link)]
77 0 : auth_backend: AuthBackend,
78 : /// listen for management callback connection on ip:port
79 : #[clap(short, long, default_value = "127.0.0.1:7000")]
80 0 : mgmt: String,
81 : /// listen for incoming http connections (metrics, etc) on ip:port
82 : #[clap(long, default_value = "127.0.0.1:7001")]
83 0 : http: String,
84 : /// listen for incoming wss connections on ip:port
85 : #[clap(long)]
86 : wss: Option<String>,
87 : /// redirect unauthenticated users to the given uri in case of link auth
88 : #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
89 0 : uri: String,
90 : /// cloud API endpoint for authenticating users
91 : #[clap(
92 : short,
93 : long,
94 : default_value = "http://localhost:3000/authenticate_proxy_request/"
95 : )]
96 0 : auth_endpoint: String,
97 : /// path to TLS key for client postgres connections
98 : ///
99 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
100 : #[clap(short = 'k', long, alias = "ssl-key")]
101 : tls_key: Option<String>,
102 : /// path to TLS cert for client postgres connections
103 : ///
104 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
105 : #[clap(short = 'c', long, alias = "ssl-cert")]
106 : tls_cert: Option<String>,
107 : /// path to directory with TLS certificates for client postgres connections
108 : #[clap(long)]
109 : certs_dir: Option<String>,
110 : /// timeout for the TLS handshake
111 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
112 0 : handshake_timeout: tokio::time::Duration,
113 : /// http endpoint to receive periodic metric updates
114 : #[clap(long)]
115 : metric_collection_endpoint: Option<String>,
116 : /// how often metrics should be sent to a collection endpoint
117 : #[clap(long)]
118 : metric_collection_interval: Option<String>,
119 : /// cache for `wake_compute` api method (use `size=0` to disable)
120 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
121 0 : wake_compute_cache: String,
122 : /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
123 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
124 0 : wake_compute_lock: String,
125 : /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
126 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
127 0 : connect_compute_lock: String,
128 : /// Allow self-signed certificates for compute nodes (for testing)
129 2 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
130 0 : allow_self_signed_compute: bool,
131 : #[clap(flatten)]
132 : sql_over_http: SqlOverHttpArgs,
133 : /// timeout for scram authentication protocol
134 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
135 0 : scram_protocol_timeout: tokio::time::Duration,
136 : /// size of the threadpool for password hashing
137 2 : #[clap(long, default_value_t = 4)]
138 0 : scram_thread_pool_size: u8,
139 : /// Require that all incoming requests have a Proxy Protocol V2 packet **and** have an IP address associated.
140 2 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
141 0 : require_client_ip: bool,
142 : /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
143 2 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
144 0 : disable_dynamic_rate_limiter: bool,
145 : /// Endpoint rate limiter max number of requests per second.
146 : ///
147 : /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
148 : /// Can be given multiple times for different bucket sizes.
149 12 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
150 2 : endpoint_rps_limit: Vec<RateBucketInfo>,
151 : /// Wake compute rate limiter max number of requests per second.
152 12 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
153 2 : wake_compute_limit: Vec<RateBucketInfo>,
154 : /// Whether the auth rate limiter actually takes effect (for testing)
155 2 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
156 0 : auth_rate_limit_enabled: bool,
157 : /// Authentication rate limiter max number of hashes per second.
158 12 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_AUTH_SET)]
159 2 : auth_rate_limit: Vec<RateBucketInfo>,
160 : /// The IP subnet to use when considering whether two IP addresses are considered the same.
161 2 : #[clap(long, default_value_t = 64)]
162 0 : auth_rate_limit_ip_subnet: u8,
163 : /// Redis rate limiter max number of requests per second.
164 12 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
165 2 : redis_rps_limit: Vec<RateBucketInfo>,
166 : /// cache for `allowed_ips` (use `size=0` to disable)
167 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
168 0 : allowed_ips_cache: String,
169 : /// cache for `role_secret` (use `size=0` to disable)
170 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
171 0 : role_secret_cache: String,
172 : /// disable ip check for http requests. If it is too time consuming, it could be turned off.
173 2 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
174 0 : disable_ip_check_for_http: bool,
175 : /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
176 : #[clap(long)]
177 : redis_notifications: Option<String>,
178 : /// redis host for streaming connections (might be different from the notifications host)
179 : #[clap(long)]
180 : redis_host: Option<String>,
181 : /// redis port for streaming connections (might be different from the notifications host)
182 : #[clap(long)]
183 : redis_port: Option<u16>,
184 : /// redis cluster name, used in aws elasticache
185 : #[clap(long)]
186 : redis_cluster_name: Option<String>,
187 : /// redis user_id, used in aws elasticache
188 : #[clap(long)]
189 : redis_user_id: Option<String>,
190 : /// aws region to retrieve credentials
191 2 : #[clap(long, default_value_t = String::new())]
192 0 : aws_region: String,
193 : /// cache for `project_info` (use `size=0` to disable)
194 : #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
195 0 : project_info_cache: String,
196 : /// cache for all valid endpoints
197 : #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
198 0 : endpoint_cache_config: String,
199 : #[clap(flatten)]
200 : parquet_upload: ParquetUploadArgs,
201 :
202 : /// interval for backup metric collection
203 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
204 0 : metric_backup_collection_interval: std::time::Duration,
205 : /// remote storage configuration for backup metric collection
206 : /// Encoded as toml (same format as pageservers), eg
207 : /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
208 : #[clap(long, default_value = "{}")]
209 0 : metric_backup_collection_remote_storage: String,
210 : /// chunk size for backup metric collection
211 : /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
212 : #[clap(long, default_value = "4194304")]
213 0 : metric_backup_collection_chunk_size: usize,
214 : /// Whether to retry the connection to the compute node
215 : #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
216 0 : connect_to_compute_retry: String,
217 : /// Whether to retry the wake_compute request
218 : #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
219 0 : wake_compute_retry: String,
220 : }
221 :
222 4 : #[derive(clap::Args, Clone, Copy, Debug)]
223 : struct SqlOverHttpArgs {
224 : /// timeout for http connection requests
225 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
226 0 : sql_over_http_timeout: tokio::time::Duration,
227 :
228 : /// Whether the SQL over http pool is opt-in
229 2 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
230 0 : sql_over_http_pool_opt_in: bool,
231 :
232 : /// How many connections to pool for each endpoint. Excess connections are discarded
233 2 : #[clap(long, default_value_t = 20)]
234 0 : sql_over_http_pool_max_conns_per_endpoint: usize,
235 :
236 : /// How many connections to pool for each endpoint. Excess connections are discarded
237 2 : #[clap(long, default_value_t = 20000)]
238 0 : sql_over_http_pool_max_total_conns: usize,
239 :
240 : /// How long pooled connections should remain idle for before closing
241 : #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
242 0 : sql_over_http_idle_timeout: tokio::time::Duration,
243 :
244 : /// Duration each shard will wait on average before a GC sweep.
245 : /// A longer time will causes sweeps to take longer but will interfere less frequently.
246 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
247 0 : sql_over_http_pool_gc_epoch: tokio::time::Duration,
248 :
249 : /// How many shards should the global pool have. Must be a power of two.
250 : /// More shards will introduce less contention for pool operations, but can
251 : /// increase memory used by the pool
252 2 : #[clap(long, default_value_t = 128)]
253 0 : sql_over_http_pool_shards: usize,
254 :
255 2 : #[clap(long, default_value_t = 10000)]
256 0 : sql_over_http_client_conn_threshold: u64,
257 :
258 2 : #[clap(long, default_value_t = 64)]
259 0 : sql_over_http_cancel_set_shards: usize,
260 : }
261 :
262 : #[tokio::main]
263 0 : async fn main() -> anyhow::Result<()> {
264 0 : let _logging_guard = proxy::logging::init().await?;
265 0 : let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
266 0 : let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
267 0 :
268 0 : info!("Version: {GIT_VERSION}");
269 0 : info!("Build_tag: {BUILD_TAG}");
270 0 : let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
271 0 : revision: GIT_VERSION,
272 0 : build_tag: BUILD_TAG,
273 0 : });
274 0 :
275 0 : let jemalloc = match proxy::jemalloc::MetricRecorder::new() {
276 0 : Ok(t) => Some(t),
277 0 : Err(e) => {
278 0 : tracing::error!(error = ?e, "could not start jemalloc metrics loop");
279 0 : None
280 0 : }
281 0 : };
282 0 :
283 0 : let args = ProxyCliArgs::parse();
284 0 : let config = build_config(&args)?;
285 0 :
286 0 : info!("Authentication backend: {}", config.auth_backend);
287 0 : info!("Using region: {}", config.aws_region);
288 0 :
289 0 : let region_provider = RegionProviderChain::default_provider().or_else(&*config.aws_region); // Replace with your Redis region if needed
290 0 : let provider_conf =
291 0 : ProviderConfig::without_region().with_region(region_provider.region().await);
292 0 : let aws_credentials_provider = {
293 0 : // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
294 0 : CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new())
295 0 : // uses "AWS_PROFILE" / `aws sso login --profile <profile>`
296 0 : .or_else(
297 0 : "profile-sso",
298 0 : ProfileFileCredentialsProvider::builder()
299 0 : .configure(&provider_conf)
300 0 : .build(),
301 0 : )
302 0 : // uses "AWS_WEB_IDENTITY_TOKEN_FILE", "AWS_ROLE_ARN", "AWS_ROLE_SESSION_NAME"
303 0 : // needed to access remote extensions bucket
304 0 : .or_else(
305 0 : "token",
306 0 : WebIdentityTokenCredentialsProvider::builder()
307 0 : .configure(&provider_conf)
308 0 : .build(),
309 0 : )
310 0 : // uses imds v2
311 0 : .or_else("imds", ImdsCredentialsProvider::builder().build())
312 0 : };
313 0 : let elasticache_credentials_provider = Arc::new(elasticache::CredentialsProvider::new(
314 0 : elasticache::AWSIRSAConfig::new(
315 0 : config.aws_region.clone(),
316 0 : args.redis_cluster_name,
317 0 : args.redis_user_id,
318 0 : ),
319 0 : aws_credentials_provider,
320 0 : ));
321 0 : let regional_redis_client = match (args.redis_host, args.redis_port) {
322 0 : (Some(host), Some(port)) => Some(
323 0 : ConnectionWithCredentialsProvider::new_with_credentials_provider(
324 0 : host,
325 0 : port,
326 0 : elasticache_credentials_provider.clone(),
327 0 : ),
328 0 : ),
329 0 : (None, None) => {
330 0 : warn!("Redis events from console are disabled");
331 0 : None
332 0 : }
333 0 : _ => {
334 0 : bail!("redis-host and redis-port must be specified together");
335 0 : }
336 0 : };
337 0 : let redis_notifications_client = if let Some(url) = args.redis_notifications {
338 0 : Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url))
339 0 : } else {
340 0 : regional_redis_client.clone()
341 0 : };
342 0 :
343 0 : // Check that we can bind to address before further initialization
344 0 : let http_address: SocketAddr = args.http.parse()?;
345 0 : info!("Starting http on {http_address}");
346 0 : let http_listener = TcpListener::bind(http_address).await?.into_std()?;
347 0 :
348 0 : let mgmt_address: SocketAddr = args.mgmt.parse()?;
349 0 : info!("Starting mgmt on {mgmt_address}");
350 0 : let mgmt_listener = TcpListener::bind(mgmt_address).await?;
351 0 :
352 0 : let proxy_address: SocketAddr = args.proxy.parse()?;
353 0 : info!("Starting proxy on {proxy_address}");
354 0 : let proxy_listener = TcpListener::bind(proxy_address).await?;
355 0 : let cancellation_token = CancellationToken::new();
356 0 :
357 0 : let cancel_map = CancelMap::default();
358 0 :
359 0 : let redis_publisher = match ®ional_redis_client {
360 0 : Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
361 0 : redis_publisher.clone(),
362 0 : args.region.clone(),
363 0 : &config.redis_rps_limit,
364 0 : )?))),
365 0 : None => None,
366 0 : };
367 0 : let cancellation_handler = Arc::new(CancellationHandler::<
368 0 : Option<Arc<tokio::sync::Mutex<RedisPublisherClient>>>,
369 0 : >::new(
370 0 : cancel_map.clone(),
371 0 : redis_publisher,
372 0 : proxy::metrics::CancellationSource::FromClient,
373 0 : ));
374 0 :
375 0 : let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
376 0 : RateBucketInfo::validate(&mut endpoint_rps_limit)?;
377 0 : let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(endpoint_rps_limit));
378 0 :
379 0 : // client facing tasks. these will exit on error or on cancellation
380 0 : // cancellation returns Ok(())
381 0 : let mut client_tasks = JoinSet::new();
382 0 : client_tasks.spawn(proxy::proxy::task_main(
383 0 : config,
384 0 : proxy_listener,
385 0 : cancellation_token.clone(),
386 0 : cancellation_handler.clone(),
387 0 : endpoint_rate_limiter.clone(),
388 0 : ));
389 0 :
390 0 : // TODO: rename the argument to something like serverless.
391 0 : // It now covers more than just websockets, it also covers SQL over HTTP.
392 0 : if let Some(serverless_address) = args.wss {
393 0 : let serverless_address: SocketAddr = serverless_address.parse()?;
394 0 : info!("Starting wss on {serverless_address}");
395 0 : let serverless_listener = TcpListener::bind(serverless_address).await?;
396 0 :
397 0 : client_tasks.spawn(serverless::task_main(
398 0 : config,
399 0 : serverless_listener,
400 0 : cancellation_token.clone(),
401 0 : cancellation_handler.clone(),
402 0 : endpoint_rate_limiter.clone(),
403 0 : ));
404 0 : }
405 0 :
406 0 : client_tasks.spawn(proxy::context::parquet::worker(
407 0 : cancellation_token.clone(),
408 0 : args.parquet_upload,
409 0 : ));
410 0 :
411 0 : // maintenance tasks. these never return unless there's an error
412 0 : let mut maintenance_tasks = JoinSet::new();
413 0 : maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone()));
414 0 : maintenance_tasks.spawn(http::health_server::task_main(
415 0 : http_listener,
416 0 : AppMetrics {
417 0 : jemalloc,
418 0 : neon_metrics,
419 0 : proxy: proxy::metrics::Metrics::get(),
420 0 : },
421 0 : ));
422 0 : maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
423 0 :
424 0 : if let Some(metrics_config) = &config.metric_collection {
425 0 : // TODO: Add gc regardles of the metric collection being enabled.
426 0 : maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
427 0 : client_tasks.spawn(usage_metrics::task_backup(
428 0 : &metrics_config.backup_metric_collection_config,
429 0 : cancellation_token.clone(),
430 0 : ));
431 0 : }
432 0 :
433 0 : if let auth::BackendType::Console(api, _) = &config.auth_backend {
434 0 : if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
435 0 : match (redis_notifications_client, regional_redis_client.clone()) {
436 0 : (None, None) => {}
437 0 : (client1, client2) => {
438 0 : let cache = api.caches.project_info.clone();
439 0 : if let Some(client) = client1 {
440 0 : maintenance_tasks.spawn(notifications::task_main(
441 0 : client,
442 0 : cache.clone(),
443 0 : cancel_map.clone(),
444 0 : args.region.clone(),
445 0 : ));
446 0 : }
447 0 : if let Some(client) = client2 {
448 0 : maintenance_tasks.spawn(notifications::task_main(
449 0 : client,
450 0 : cache.clone(),
451 0 : cancel_map.clone(),
452 0 : args.region.clone(),
453 0 : ));
454 0 : }
455 0 : maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
456 0 : }
457 0 : }
458 0 : if let Some(regional_redis_client) = regional_redis_client {
459 0 : let cache = api.caches.endpoints_cache.clone();
460 0 : let con = regional_redis_client;
461 0 : let span = tracing::info_span!("endpoints_cache");
462 0 : maintenance_tasks.spawn(
463 0 : async move { cache.do_read(con, cancellation_token.clone()).await }
464 0 : .instrument(span),
465 0 : );
466 0 : }
467 0 : }
468 0 : }
469 0 :
470 0 : let maintenance = loop {
471 0 : // get one complete task
472 0 : match futures::future::select(
473 0 : pin!(maintenance_tasks.join_next()),
474 0 : pin!(client_tasks.join_next()),
475 0 : )
476 0 : .await
477 0 : {
478 0 : // exit immediately on maintenance task completion
479 0 : Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
480 0 : // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
481 0 : Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
482 0 : // exit immediately on client task error
483 0 : Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
484 0 : // exit if all our client tasks have shutdown gracefully
485 0 : Either::Right((None, _)) => return Ok(()),
486 0 : }
487 0 : };
488 0 :
489 0 : // maintenance tasks return Infallible success values, this is an impossible value
490 0 : // so this match statically ensures that there are no possibilities for that value
491 0 : match maintenance {}
492 0 : }
493 :
494 : /// ProxyConfig is created at proxy startup, and lives forever.
495 0 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
496 0 : let thread_pool = ThreadPool::new(args.scram_thread_pool_size);
497 0 : Metrics::install(thread_pool.metrics.clone());
498 :
499 0 : let tls_config = match (&args.tls_key, &args.tls_cert) {
500 0 : (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
501 0 : key_path,
502 0 : cert_path,
503 0 : args.certs_dir.as_ref(),
504 0 : )?),
505 0 : (None, None) => None,
506 0 : _ => bail!("either both or neither tls-key and tls-cert must be specified"),
507 : };
508 :
509 0 : if args.allow_self_signed_compute {
510 0 : warn!("allowing self-signed compute certificates");
511 0 : }
512 0 : let backup_metric_collection_config = config::MetricBackupCollectionConfig {
513 0 : interval: args.metric_backup_collection_interval,
514 0 : remote_storage_config: remote_storage_from_toml(
515 0 : &args.metric_backup_collection_remote_storage,
516 0 : )?,
517 0 : chunk_size: args.metric_backup_collection_chunk_size,
518 : };
519 :
520 0 : let metric_collection = match (
521 0 : &args.metric_collection_endpoint,
522 0 : &args.metric_collection_interval,
523 : ) {
524 0 : (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
525 0 : endpoint: endpoint.parse()?,
526 0 : interval: humantime::parse_duration(interval)?,
527 0 : backup_metric_collection_config,
528 : }),
529 0 : (None, None) => None,
530 0 : _ => bail!(
531 0 : "either both or neither metric-collection-endpoint \
532 0 : and metric-collection-interval must be specified"
533 0 : ),
534 : };
535 0 : if !args.disable_dynamic_rate_limiter {
536 0 : bail!("dynamic rate limiter should be disabled");
537 0 : }
538 :
539 0 : let auth_backend = match &args.auth_backend {
540 : AuthBackend::Console => {
541 0 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
542 0 : let project_info_cache_config: ProjectInfoCacheOptions =
543 0 : args.project_info_cache.parse()?;
544 0 : let endpoint_cache_config: config::EndpointCacheConfig =
545 0 : args.endpoint_cache_config.parse()?;
546 :
547 0 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
548 0 : info!(
549 0 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
550 : );
551 0 : info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
552 0 : let caches = Box::leak(Box::new(console::caches::ApiCaches::new(
553 0 : wake_compute_cache_config,
554 0 : project_info_cache_config,
555 0 : endpoint_cache_config,
556 0 : )));
557 :
558 : let config::ConcurrencyLockOptions {
559 0 : shards,
560 0 : limiter,
561 0 : epoch,
562 0 : timeout,
563 0 : } = args.wake_compute_lock.parse()?;
564 0 : info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
565 0 : let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
566 0 : "wake_compute_lock",
567 0 : limiter,
568 0 : shards,
569 0 : timeout,
570 0 : epoch,
571 0 : &Metrics::get().wake_compute_lock,
572 0 : )?));
573 0 : tokio::spawn(locks.garbage_collect_worker());
574 :
575 0 : let url = args.auth_endpoint.parse()?;
576 0 : let endpoint = http::Endpoint::new(url, http::new_client());
577 0 :
578 0 : let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
579 0 : RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
580 0 : let wake_compute_endpoint_rate_limiter =
581 0 : Arc::new(EndpointRateLimiter::new(wake_compute_rps_limit));
582 0 : let api = console::provider::neon::Api::new(
583 0 : endpoint,
584 0 : caches,
585 0 : locks,
586 0 : wake_compute_endpoint_rate_limiter,
587 0 : );
588 0 : let api = console::provider::ConsoleBackend::Console(api);
589 0 : auth::BackendType::Console(MaybeOwned::Owned(api), ())
590 : }
591 : #[cfg(feature = "testing")]
592 : AuthBackend::Postgres => {
593 0 : let url = args.auth_endpoint.parse()?;
594 0 : let api = console::provider::mock::Api::new(url);
595 0 : let api = console::provider::ConsoleBackend::Postgres(api);
596 0 : auth::BackendType::Console(MaybeOwned::Owned(api), ())
597 : }
598 : AuthBackend::Link => {
599 0 : let url = args.uri.parse()?;
600 0 : auth::BackendType::Link(MaybeOwned::Owned(url), ())
601 : }
602 : };
603 :
604 : let config::ConcurrencyLockOptions {
605 0 : shards,
606 0 : limiter,
607 0 : epoch,
608 0 : timeout,
609 0 : } = args.connect_compute_lock.parse()?;
610 0 : info!(
611 : ?limiter,
612 : shards,
613 : ?epoch,
614 0 : "Using NodeLocks (connect_compute)"
615 : );
616 0 : let connect_compute_locks = console::locks::ApiLocks::new(
617 0 : "connect_compute_lock",
618 0 : limiter,
619 0 : shards,
620 0 : timeout,
621 0 : epoch,
622 0 : &Metrics::get().proxy.connect_compute_lock,
623 0 : )?;
624 :
625 0 : let http_config = HttpConfig {
626 0 : request_timeout: args.sql_over_http.sql_over_http_timeout,
627 0 : pool_options: GlobalConnPoolOptions {
628 0 : max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
629 0 : gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
630 0 : pool_shards: args.sql_over_http.sql_over_http_pool_shards,
631 0 : idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
632 0 : opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
633 0 : max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
634 0 : },
635 0 : cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
636 0 : client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
637 0 : };
638 0 : let authentication_config = AuthenticationConfig {
639 0 : thread_pool,
640 0 : scram_protocol_timeout: args.scram_protocol_timeout,
641 0 : rate_limiter_enabled: args.auth_rate_limit_enabled,
642 0 : rate_limiter: AuthRateLimiter::new(args.auth_rate_limit.clone()),
643 0 : rate_limit_ip_subnet: args.auth_rate_limit_ip_subnet,
644 0 : };
645 0 :
646 0 : let mut redis_rps_limit = args.redis_rps_limit.clone();
647 0 : RateBucketInfo::validate(&mut redis_rps_limit)?;
648 :
649 0 : let config = Box::leak(Box::new(ProxyConfig {
650 0 : tls_config,
651 0 : auth_backend,
652 0 : metric_collection,
653 0 : allow_self_signed_compute: args.allow_self_signed_compute,
654 0 : http_config,
655 0 : authentication_config,
656 0 : require_client_ip: args.require_client_ip,
657 0 : disable_ip_check_for_http: args.disable_ip_check_for_http,
658 0 : redis_rps_limit,
659 0 : handshake_timeout: args.handshake_timeout,
660 0 : region: args.region.clone(),
661 0 : aws_region: args.aws_region.clone(),
662 0 : wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
663 0 : connect_compute_locks,
664 0 : connect_to_compute_retry_config: config::RetryConfig::parse(
665 0 : &args.connect_to_compute_retry,
666 0 : )?,
667 : }));
668 :
669 0 : tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
670 0 :
671 0 : Ok(config)
672 0 : }
673 :
674 : #[cfg(test)]
675 : mod tests {
676 : use std::time::Duration;
677 :
678 : use clap::Parser;
679 : use proxy::rate_limiter::RateBucketInfo;
680 :
681 : #[test]
682 2 : fn parse_endpoint_rps_limit() {
683 2 : let config = super::ProxyCliArgs::parse_from([
684 2 : "proxy",
685 2 : "--endpoint-rps-limit",
686 2 : "100@1s",
687 2 : "--endpoint-rps-limit",
688 2 : "20@30s",
689 2 : ]);
690 2 :
691 2 : assert_eq!(
692 2 : config.endpoint_rps_limit,
693 2 : vec![
694 2 : RateBucketInfo::new(100, Duration::from_secs(1)),
695 2 : RateBucketInfo::new(20, Duration::from_secs(30)),
696 2 : ]
697 2 : );
698 2 : }
699 : }
|