Line data Source code
1 : #[cfg(any(test, feature = "testing"))]
2 : use std::env;
3 : use std::net::SocketAddr;
4 : use std::path::PathBuf;
5 : use std::pin::pin;
6 : use std::sync::Arc;
7 : use std::time::Duration;
8 :
9 : #[cfg(any(test, feature = "testing"))]
10 : use anyhow::Context;
11 : use anyhow::{bail, ensure};
12 : use arc_swap::ArcSwapOption;
13 : use futures::future::Either;
14 : use itertools::{Itertools, Position};
15 : use rand::{Rng, thread_rng};
16 : use remote_storage::RemoteStorageConfig;
17 : use tokio::net::TcpListener;
18 : use tokio::task::JoinSet;
19 : use tokio_util::sync::CancellationToken;
20 : use tracing::{Instrument, error, info, warn};
21 : use utils::sentry_init::init_sentry;
22 : use utils::{project_build_tag, project_git_version};
23 :
24 : use crate::auth::backend::jwt::JwkCache;
25 : use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
26 : use crate::cancellation::{CancellationHandler, handle_cancel_messages};
27 : use crate::config::{
28 : self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
29 : ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
30 : };
31 : use crate::context::parquet::ParquetUploadArgs;
32 : use crate::http::health_server::AppMetrics;
33 : use crate::metrics::Metrics;
34 : use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
35 : use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
36 : use crate::redis::kv_ops::RedisKVClient;
37 : use crate::redis::{elasticache, notifications};
38 : use crate::scram::threadpool::ThreadPool;
39 : use crate::serverless::GlobalConnPoolOptions;
40 : use crate::serverless::cancel_set::CancelSet;
41 : use crate::tls::client_config::compute_client_config_with_root_certs;
42 : #[cfg(any(test, feature = "testing"))]
43 : use crate::url::ApiUrl;
44 : use crate::{auth, control_plane, http, serverless, usage_metrics};
45 :
46 : project_git_version!(GIT_VERSION);
47 : project_build_tag!(BUILD_TAG);
48 :
49 : use clap::{Parser, ValueEnum};
50 :
51 : #[derive(Clone, Debug, ValueEnum)]
52 : #[clap(rename_all = "kebab-case")]
53 : enum AuthBackendType {
54 : #[clap(alias("cplane-v1"))]
55 : ControlPlane,
56 :
57 : #[clap(alias("link"))]
58 : ConsoleRedirect,
59 :
60 : #[cfg(any(test, feature = "testing"))]
61 : Postgres,
62 : }
63 :
64 : /// Neon proxy/router
65 : #[derive(Parser)]
66 : #[command(version = GIT_VERSION, about)]
67 : struct ProxyCliArgs {
68 : /// Name of the region this proxy is deployed in
69 1 : #[clap(long, default_value_t = String::new())]
70 0 : region: String,
71 : /// listen for incoming client connections on ip:port
72 : #[clap(short, long, default_value = "127.0.0.1:4432")]
73 0 : proxy: SocketAddr,
74 1 : #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]
75 0 : auth_backend: AuthBackendType,
76 : /// listen for management callback connection on ip:port
77 : #[clap(short, long, default_value = "127.0.0.1:7000")]
78 0 : mgmt: SocketAddr,
79 : /// listen for incoming http connections (metrics, etc) on ip:port
80 : #[clap(long, default_value = "127.0.0.1:7001")]
81 0 : http: SocketAddr,
82 : /// listen for incoming wss connections on ip:port
83 : #[clap(long)]
84 : wss: Option<SocketAddr>,
85 : /// redirect unauthenticated users to the given uri in case of console redirect auth
86 : #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
87 0 : uri: String,
88 : /// cloud API endpoint for authenticating users
89 : #[clap(
90 : short,
91 : long,
92 : default_value = "http://localhost:3000/authenticate_proxy_request/"
93 : )]
94 0 : auth_endpoint: String,
95 : /// JWT used to connect to control plane.
96 : #[clap(
97 : long,
98 : value_name = "JWT",
99 : default_value = "",
100 : env = "NEON_PROXY_TO_CONTROLPLANE_TOKEN"
101 : )]
102 0 : control_plane_token: Arc<str>,
103 : /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
104 1 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
105 0 : is_auth_broker: bool,
106 : /// path to TLS key for client postgres connections
107 : ///
108 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
109 : #[clap(short = 'k', long, alias = "ssl-key")]
110 : tls_key: Option<PathBuf>,
111 : /// path to TLS cert for client postgres connections
112 : ///
113 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
114 : #[clap(short = 'c', long, alias = "ssl-cert")]
115 : tls_cert: Option<PathBuf>,
116 : /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`.
117 : #[clap(long, alias = "allow-ssl-keylogfile")]
118 0 : allow_tls_keylogfile: bool,
119 : /// path to directory with TLS certificates for client postgres connections
120 : #[clap(long)]
121 : certs_dir: Option<PathBuf>,
122 : /// timeout for the TLS handshake
123 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
124 0 : handshake_timeout: tokio::time::Duration,
125 : /// http endpoint to receive periodic metric updates
126 : #[clap(long)]
127 : metric_collection_endpoint: Option<String>,
128 : /// how often metrics should be sent to a collection endpoint
129 : #[clap(long)]
130 : metric_collection_interval: Option<String>,
131 : /// cache for `wake_compute` api method (use `size=0` to disable)
132 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
133 0 : wake_compute_cache: String,
134 : /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
135 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
136 0 : wake_compute_lock: String,
137 : /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
138 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
139 0 : connect_compute_lock: String,
140 : #[clap(flatten)]
141 : sql_over_http: SqlOverHttpArgs,
142 : /// timeout for scram authentication protocol
143 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
144 0 : scram_protocol_timeout: tokio::time::Duration,
145 : /// size of the threadpool for password hashing
146 1 : #[clap(long, default_value_t = 4)]
147 0 : scram_thread_pool_size: u8,
148 : /// Endpoint rate limiter max number of requests per second.
149 : ///
150 : /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
151 : /// Can be given multiple times for different bucket sizes.
152 4 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
153 1 : endpoint_rps_limit: Vec<RateBucketInfo>,
154 : /// Wake compute rate limiter max number of requests per second.
155 4 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
156 1 : wake_compute_limit: Vec<RateBucketInfo>,
157 : /// Redis rate limiter max number of requests per second.
158 3 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
159 1 : redis_rps_limit: Vec<RateBucketInfo>,
160 : /// Cancellation channel size (max queue size for redis kv client)
161 1 : #[clap(long, default_value_t = 1024)]
162 0 : cancellation_ch_size: usize,
163 : /// Cancellation ops batch size for redis
164 1 : #[clap(long, default_value_t = 8)]
165 0 : cancellation_batch_size: usize,
166 : /// cache for `allowed_ips` (use `size=0` to disable)
167 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
168 0 : allowed_ips_cache: String,
169 : /// cache for `role_secret` (use `size=0` to disable)
170 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
171 0 : role_secret_cache: String,
172 : /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
173 : #[clap(long)]
174 : redis_notifications: Option<String>,
175 : /// what from the available authentications type to use for the regional redis we have. Supported are "irsa" and "plain".
176 : #[clap(long, default_value = "irsa")]
177 0 : redis_auth_type: String,
178 : /// redis host for streaming connections (might be different from the notifications host)
179 : #[clap(long)]
180 : redis_host: Option<String>,
181 : /// redis port for streaming connections (might be different from the notifications host)
182 : #[clap(long)]
183 : redis_port: Option<u16>,
184 : /// redis cluster name, used in aws elasticache
185 : #[clap(long)]
186 : redis_cluster_name: Option<String>,
187 : /// redis user_id, used in aws elasticache
188 : #[clap(long)]
189 : redis_user_id: Option<String>,
190 : /// aws region to retrieve credentials
191 1 : #[clap(long, default_value_t = String::new())]
192 0 : aws_region: String,
193 : /// cache for `project_info` (use `size=0` to disable)
194 : #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
195 0 : project_info_cache: String,
196 : /// cache for all valid endpoints
197 : #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
198 0 : endpoint_cache_config: String,
199 : #[clap(flatten)]
200 : parquet_upload: ParquetUploadArgs,
201 :
202 : /// interval for backup metric collection
203 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
204 0 : metric_backup_collection_interval: std::time::Duration,
205 : /// remote storage configuration for backup metric collection
206 : /// Encoded as toml (same format as pageservers), eg
207 : /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
208 : #[clap(long, value_parser = remote_storage_from_toml)]
209 : metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,
210 : /// chunk size for backup metric collection
211 : /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
212 : #[clap(long, default_value = "4194304")]
213 0 : metric_backup_collection_chunk_size: usize,
214 : /// Whether to retry the connection to the compute node
215 : #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
216 0 : connect_to_compute_retry: String,
217 : /// Whether to retry the wake_compute request
218 : #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
219 0 : wake_compute_retry: String,
220 :
221 : /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
222 1 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
223 0 : is_private_access_proxy: bool,
224 :
225 : /// Configure whether all incoming requests have a Proxy Protocol V2 packet.
226 1 : #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Rejected)]
227 0 : proxy_protocol_v2: ProxyProtocolV2,
228 :
229 : /// Time the proxy waits for the webauth session to be confirmed by the control plane.
230 : // TODO: rename to `console_redirect_confirmation_timeout`.
231 : #[clap(long, default_value = "2m", value_parser = humantime::parse_duration)]
232 0 : webauth_confirmation_timeout: std::time::Duration,
233 :
234 : #[clap(flatten)]
235 : pg_sni_router: PgSniRouterArgs,
236 : }
237 :
238 : #[derive(clap::Args, Clone, Copy, Debug)]
239 : struct SqlOverHttpArgs {
240 : /// timeout for http connection requests
241 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
242 0 : sql_over_http_timeout: tokio::time::Duration,
243 :
244 : /// Whether the SQL over http pool is opt-in
245 1 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
246 0 : sql_over_http_pool_opt_in: bool,
247 :
248 : /// How many connections to pool for each endpoint. Excess connections are discarded
249 1 : #[clap(long, default_value_t = 20)]
250 0 : sql_over_http_pool_max_conns_per_endpoint: usize,
251 :
252 : /// How many connections to pool for each endpoint. Excess connections are discarded
253 1 : #[clap(long, default_value_t = 20000)]
254 0 : sql_over_http_pool_max_total_conns: usize,
255 :
256 : /// How long pooled connections should remain idle for before closing
257 : #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
258 0 : sql_over_http_idle_timeout: tokio::time::Duration,
259 :
260 : /// Duration each shard will wait on average before a GC sweep.
261 : /// A longer time will causes sweeps to take longer but will interfere less frequently.
262 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
263 0 : sql_over_http_pool_gc_epoch: tokio::time::Duration,
264 :
265 : /// How many shards should the global pool have. Must be a power of two.
266 : /// More shards will introduce less contention for pool operations, but can
267 : /// increase memory used by the pool
268 1 : #[clap(long, default_value_t = 128)]
269 0 : sql_over_http_pool_shards: usize,
270 :
271 1 : #[clap(long, default_value_t = 10000)]
272 0 : sql_over_http_client_conn_threshold: u64,
273 :
274 1 : #[clap(long, default_value_t = 64)]
275 0 : sql_over_http_cancel_set_shards: usize,
276 :
277 1 : #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
278 0 : sql_over_http_max_request_size_bytes: usize,
279 :
280 1 : #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
281 0 : sql_over_http_max_response_size_bytes: usize,
282 : }
283 :
284 : #[derive(clap::Args, Clone, Debug)]
285 : struct PgSniRouterArgs {
286 : /// listen for incoming client connections on ip:port
287 : #[clap(id = "sni-router-listen", long, default_value = "127.0.0.1:4432")]
288 0 : listen: SocketAddr,
289 : /// listen for incoming client connections on ip:port, requiring TLS to compute
290 : #[clap(id = "sni-router-listen-tls", long, default_value = "127.0.0.1:4433")]
291 0 : listen_tls: SocketAddr,
292 : /// path to TLS key for client postgres connections
293 : #[clap(id = "sni-router-tls-key", long)]
294 : tls_key: Option<PathBuf>,
295 : /// path to TLS cert for client postgres connections
296 : #[clap(id = "sni-router-tls-cert", long)]
297 : tls_cert: Option<PathBuf>,
298 : /// append this domain zone to the SNI hostname to get the destination address
299 : #[clap(id = "sni-router-destination", long)]
300 : dest: Option<String>,
301 : }
302 :
303 0 : pub async fn run() -> anyhow::Result<()> {
304 0 : let _logging_guard = crate::logging::init().await?;
305 0 : let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
306 0 : let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
307 0 :
308 0 : // TODO: refactor these to use labels
309 0 : info!("Version: {GIT_VERSION}");
310 0 : info!("Build_tag: {BUILD_TAG}");
311 0 : let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
312 0 : revision: GIT_VERSION,
313 0 : build_tag: BUILD_TAG,
314 0 : });
315 :
316 0 : let jemalloc = match crate::jemalloc::MetricRecorder::new() {
317 0 : Ok(t) => Some(t),
318 0 : Err(e) => {
319 0 : error!(error = ?e, "could not start jemalloc metrics loop");
320 0 : None
321 : }
322 : };
323 :
324 0 : let args = ProxyCliArgs::parse();
325 0 : let config = build_config(&args)?;
326 0 : let auth_backend = build_auth_backend(&args)?;
327 :
328 0 : match auth_backend {
329 0 : Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
330 0 : Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
331 : }
332 0 : info!("Using region: {}", args.aws_region);
333 0 : let (regional_redis_client, redis_notifications_client) = configure_redis(&args).await?;
334 :
335 : // Check that we can bind to address before further initialization
336 0 : info!("Starting http on {}", args.http);
337 0 : let http_listener = TcpListener::bind(args.http).await?.into_std()?;
338 :
339 0 : info!("Starting mgmt on {}", args.mgmt);
340 0 : let mgmt_listener = TcpListener::bind(args.mgmt).await?;
341 :
342 0 : let proxy_listener = if args.is_auth_broker {
343 0 : None
344 : } else {
345 0 : info!("Starting proxy on {}", args.proxy);
346 0 : Some(TcpListener::bind(args.proxy).await?)
347 : };
348 :
349 0 : let sni_router_listeners = {
350 0 : let args = &args.pg_sni_router;
351 0 : if args.dest.is_some() {
352 0 : ensure!(
353 0 : args.tls_key.is_some(),
354 0 : "sni-router-tls-key must be provided"
355 : );
356 0 : ensure!(
357 0 : args.tls_cert.is_some(),
358 0 : "sni-router-tls-cert must be provided"
359 : );
360 :
361 0 : info!(
362 0 : "Starting pg-sni-router on {} and {}",
363 : args.listen, args.listen_tls
364 : );
365 :
366 : Some((
367 0 : TcpListener::bind(args.listen).await?,
368 0 : TcpListener::bind(args.listen_tls).await?,
369 : ))
370 : } else {
371 0 : None
372 : }
373 : };
374 :
375 : // TODO: rename the argument to something like serverless.
376 : // It now covers more than just websockets, it also covers SQL over HTTP.
377 0 : let serverless_listener = if let Some(serverless_address) = args.wss {
378 0 : info!("Starting wss on {serverless_address}");
379 0 : Some(TcpListener::bind(serverless_address).await?)
380 0 : } else if args.is_auth_broker {
381 0 : bail!("wss arg must be present for auth-broker")
382 : } else {
383 0 : None
384 : };
385 :
386 0 : let cancellation_token = CancellationToken::new();
387 0 :
388 0 : let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
389 0 : RateBucketInfo::validate(redis_rps_limit)?;
390 :
391 0 : let redis_kv_client = regional_redis_client
392 0 : .as_ref()
393 0 : .map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
394 0 :
395 0 : // channel size should be higher than redis client limit to avoid blocking
396 0 : let cancel_ch_size = args.cancellation_ch_size;
397 0 : let (tx_cancel, rx_cancel) = tokio::sync::mpsc::channel(cancel_ch_size);
398 0 : let cancellation_handler = Arc::new(CancellationHandler::new(
399 0 : &config.connect_to_compute,
400 0 : Some(tx_cancel),
401 0 : ));
402 0 :
403 0 : let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
404 0 : RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
405 0 : .unwrap_or(EndpointRateLimiter::DEFAULT),
406 0 : 64,
407 0 : ));
408 0 :
409 0 : // client facing tasks. these will exit on error or on cancellation
410 0 : // cancellation returns Ok(())
411 0 : let mut client_tasks = JoinSet::new();
412 0 : match auth_backend {
413 0 : Either::Left(auth_backend) => {
414 0 : if let Some(proxy_listener) = proxy_listener {
415 0 : client_tasks.spawn(crate::proxy::task_main(
416 0 : config,
417 0 : auth_backend,
418 0 : proxy_listener,
419 0 : cancellation_token.clone(),
420 0 : cancellation_handler.clone(),
421 0 : endpoint_rate_limiter.clone(),
422 0 : ));
423 0 : }
424 :
425 0 : if let Some(serverless_listener) = serverless_listener {
426 0 : client_tasks.spawn(serverless::task_main(
427 0 : config,
428 0 : auth_backend,
429 0 : serverless_listener,
430 0 : cancellation_token.clone(),
431 0 : cancellation_handler.clone(),
432 0 : endpoint_rate_limiter.clone(),
433 0 : ));
434 0 : }
435 : }
436 0 : Either::Right(auth_backend) => {
437 0 : if let Some(proxy_listener) = proxy_listener {
438 0 : client_tasks.spawn(crate::console_redirect_proxy::task_main(
439 0 : config,
440 0 : auth_backend,
441 0 : proxy_listener,
442 0 : cancellation_token.clone(),
443 0 : cancellation_handler.clone(),
444 0 : ));
445 0 : }
446 : }
447 : }
448 :
449 : // spawn pg-sni-router mode.
450 0 : if let Some((listen, listen_tls)) = sni_router_listeners {
451 0 : let args = args.pg_sni_router;
452 0 : let dest = args.dest.expect("already asserted it is set");
453 0 : let key_path = args.tls_key.expect("already asserted it is set");
454 0 : let cert_path = args.tls_cert.expect("already asserted it is set");
455 :
456 0 : let tls_config = super::pg_sni_router::parse_tls(&key_path, &cert_path)?;
457 :
458 0 : let dest = Arc::new(dest);
459 0 :
460 0 : client_tasks.spawn(super::pg_sni_router::task_main(
461 0 : dest.clone(),
462 0 : tls_config.clone(),
463 0 : None,
464 0 : listen,
465 0 : cancellation_token.clone(),
466 0 : ));
467 0 :
468 0 : client_tasks.spawn(super::pg_sni_router::task_main(
469 0 : dest,
470 0 : tls_config,
471 0 : Some(config.connect_to_compute.tls.clone()),
472 0 : listen_tls,
473 0 : cancellation_token.clone(),
474 0 : ));
475 0 : }
476 :
477 0 : client_tasks.spawn(crate::context::parquet::worker(
478 0 : cancellation_token.clone(),
479 0 : args.parquet_upload,
480 0 : ));
481 0 :
482 0 : // maintenance tasks. these never return unless there's an error
483 0 : let mut maintenance_tasks = JoinSet::new();
484 0 : maintenance_tasks.spawn(crate::signals::handle(cancellation_token.clone(), || {}));
485 0 : maintenance_tasks.spawn(http::health_server::task_main(
486 0 : http_listener,
487 0 : AppMetrics {
488 0 : jemalloc,
489 0 : neon_metrics,
490 0 : proxy: crate::metrics::Metrics::get(),
491 0 : },
492 0 : ));
493 0 : maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
494 :
495 0 : if let Some(metrics_config) = &config.metric_collection {
496 0 : // TODO: Add gc regardles of the metric collection being enabled.
497 0 : maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
498 0 : }
499 :
500 : #[cfg_attr(not(any(test, feature = "testing")), expect(irrefutable_let_patterns))]
501 0 : if let Either::Left(auth::Backend::ControlPlane(api, ())) = &auth_backend {
502 0 : if let crate::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api {
503 0 : match (redis_notifications_client, regional_redis_client.clone()) {
504 0 : (None, None) => {}
505 0 : (client1, client2) => {
506 0 : let cache = api.caches.project_info.clone();
507 0 : if let Some(client) = client1 {
508 0 : maintenance_tasks.spawn(notifications::task_main(
509 0 : client,
510 0 : cache.clone(),
511 0 : args.region.clone(),
512 0 : ));
513 0 : }
514 0 : if let Some(client) = client2 {
515 0 : maintenance_tasks.spawn(notifications::task_main(
516 0 : client,
517 0 : cache.clone(),
518 0 : args.region.clone(),
519 0 : ));
520 0 : }
521 0 : maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
522 0 : }
523 : }
524 :
525 : // Try to connect to Redis 3 times with 1 + (0..0.1) second interval.
526 : // This prevents immediate exit and pod restart,
527 : // which can cause hammering of the redis in case of connection issues.
528 0 : if let Some(mut redis_kv_client) = redis_kv_client {
529 0 : for attempt in (0..3).with_position() {
530 0 : match redis_kv_client.try_connect().await {
531 : Ok(()) => {
532 0 : info!("Connected to Redis KV client");
533 0 : maintenance_tasks.spawn(async move {
534 0 : handle_cancel_messages(
535 0 : &mut redis_kv_client,
536 0 : rx_cancel,
537 0 : args.cancellation_batch_size,
538 0 : )
539 0 : .await?;
540 :
541 0 : drop(redis_kv_client);
542 0 :
543 0 : // `handle_cancel_messages` was terminated due to the tx_cancel
544 0 : // being dropped. this is not worthy of an error, and this task can only return `Err`,
545 0 : // so let's wait forever instead.
546 0 : std::future::pending().await
547 0 : });
548 0 : break;
549 : }
550 0 : Err(e) => {
551 0 : error!("Failed to connect to Redis KV client: {e}");
552 0 : if matches!(attempt, Position::Last(_)) {
553 0 : bail!(
554 0 : "Failed to connect to Redis KV client after {} attempts",
555 0 : attempt.into_inner()
556 0 : );
557 0 : }
558 0 : let jitter = thread_rng().gen_range(0..100);
559 0 : tokio::time::sleep(Duration::from_millis(1000 + jitter)).await;
560 : }
561 : }
562 : }
563 0 : }
564 :
565 0 : if let Some(regional_redis_client) = regional_redis_client {
566 0 : let cache = api.caches.endpoints_cache.clone();
567 0 : let con = regional_redis_client;
568 0 : let span = tracing::info_span!("endpoints_cache");
569 0 : maintenance_tasks.spawn(
570 0 : async move { cache.do_read(con, cancellation_token.clone()).await }
571 0 : .instrument(span),
572 0 : );
573 0 : }
574 0 : }
575 0 : }
576 :
577 : let maintenance = loop {
578 : // get one complete task
579 0 : match futures::future::select(
580 0 : pin!(maintenance_tasks.join_next()),
581 0 : pin!(client_tasks.join_next()),
582 0 : )
583 0 : .await
584 : {
585 : // exit immediately on maintenance task completion
586 0 : Either::Left((Some(res), _)) => break crate::error::flatten_err(res)?,
587 : // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
588 0 : Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
589 : // exit immediately on client task error
590 0 : Either::Right((Some(res), _)) => crate::error::flatten_err(res)?,
591 : // exit if all our client tasks have shutdown gracefully
592 0 : Either::Right((None, _)) => return Ok(()),
593 : }
594 : };
595 :
596 : // maintenance tasks return Infallible success values, this is an impossible value
597 : // so this match statically ensures that there are no possibilities for that value
598 : match maintenance {}
599 0 : }
600 :
601 : /// ProxyConfig is created at proxy startup, and lives forever.
602 0 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
603 0 : let thread_pool = ThreadPool::new(args.scram_thread_pool_size);
604 0 : Metrics::install(thread_pool.metrics.clone());
605 :
606 0 : let tls_config = match (&args.tls_key, &args.tls_cert) {
607 0 : (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
608 0 : key_path,
609 0 : cert_path,
610 0 : args.certs_dir.as_deref(),
611 0 : args.allow_tls_keylogfile,
612 0 : )?),
613 0 : (None, None) => None,
614 0 : _ => bail!("either both or neither tls-key and tls-cert must be specified"),
615 : };
616 0 : let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));
617 0 :
618 0 : let backup_metric_collection_config = config::MetricBackupCollectionConfig {
619 0 : remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
620 0 : chunk_size: args.metric_backup_collection_chunk_size,
621 0 : };
622 :
623 0 : let metric_collection = match (
624 0 : &args.metric_collection_endpoint,
625 0 : &args.metric_collection_interval,
626 : ) {
627 0 : (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
628 0 : endpoint: endpoint.parse()?,
629 0 : interval: humantime::parse_duration(interval)?,
630 0 : backup_metric_collection_config,
631 : }),
632 0 : (None, None) => None,
633 0 : _ => bail!(
634 0 : "either both or neither metric-collection-endpoint \
635 0 : and metric-collection-interval must be specified"
636 0 : ),
637 : };
638 :
639 : let config::ConcurrencyLockOptions {
640 0 : shards,
641 0 : limiter,
642 0 : epoch,
643 0 : timeout,
644 0 : } = args.connect_compute_lock.parse()?;
645 0 : info!(
646 : ?limiter,
647 : shards,
648 : ?epoch,
649 0 : "Using NodeLocks (connect_compute)"
650 : );
651 0 : let connect_compute_locks = control_plane::locks::ApiLocks::new(
652 0 : "connect_compute_lock",
653 0 : limiter,
654 0 : shards,
655 0 : timeout,
656 0 : epoch,
657 0 : &Metrics::get().proxy.connect_compute_lock,
658 0 : );
659 0 :
660 0 : let http_config = HttpConfig {
661 0 : accept_websockets: !args.is_auth_broker,
662 0 : pool_options: GlobalConnPoolOptions {
663 0 : max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
664 0 : gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
665 0 : pool_shards: args.sql_over_http.sql_over_http_pool_shards,
666 0 : idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
667 0 : opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
668 0 : max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
669 0 : },
670 0 : cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
671 0 : client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
672 0 : max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
673 0 : max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
674 0 : };
675 0 : let authentication_config = AuthenticationConfig {
676 0 : jwks_cache: JwkCache::default(),
677 0 : thread_pool,
678 0 : scram_protocol_timeout: args.scram_protocol_timeout,
679 0 : ip_allowlist_check_enabled: !args.is_private_access_proxy,
680 0 : is_vpc_acccess_proxy: args.is_private_access_proxy,
681 0 : is_auth_broker: args.is_auth_broker,
682 0 : accept_jwts: args.is_auth_broker,
683 0 : console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
684 0 : };
685 :
686 0 : let compute_config = ComputeConfig {
687 0 : retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,
688 0 : tls: Arc::new(compute_client_config_with_root_certs()?),
689 0 : timeout: Duration::from_secs(2),
690 : };
691 :
692 0 : let config = ProxyConfig {
693 0 : tls_config,
694 0 : metric_collection,
695 0 : http_config,
696 0 : authentication_config,
697 0 : proxy_protocol_v2: args.proxy_protocol_v2,
698 0 : handshake_timeout: args.handshake_timeout,
699 0 : region: args.region.clone(),
700 0 : wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
701 0 : connect_compute_locks,
702 0 : connect_to_compute: compute_config,
703 0 : };
704 0 :
705 0 : let config = Box::leak(Box::new(config));
706 0 :
707 0 : tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
708 0 :
709 0 : Ok(config)
710 0 : }
711 :
712 : /// auth::Backend is created at proxy startup, and lives forever.
713 0 : fn build_auth_backend(
714 0 : args: &ProxyCliArgs,
715 0 : ) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {
716 0 : match &args.auth_backend {
717 : AuthBackendType::ControlPlane => {
718 0 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
719 0 : let project_info_cache_config: ProjectInfoCacheOptions =
720 0 : args.project_info_cache.parse()?;
721 0 : let endpoint_cache_config: config::EndpointCacheConfig =
722 0 : args.endpoint_cache_config.parse()?;
723 :
724 0 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
725 0 : info!(
726 0 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
727 : );
728 0 : info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
729 0 : let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
730 0 : wake_compute_cache_config,
731 0 : project_info_cache_config,
732 0 : endpoint_cache_config,
733 0 : )));
734 :
735 : let config::ConcurrencyLockOptions {
736 0 : shards,
737 0 : limiter,
738 0 : epoch,
739 0 : timeout,
740 0 : } = args.wake_compute_lock.parse()?;
741 0 : info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
742 0 : let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
743 0 : "wake_compute_lock",
744 0 : limiter,
745 0 : shards,
746 0 : timeout,
747 0 : epoch,
748 0 : &Metrics::get().wake_compute_lock,
749 0 : )));
750 0 : tokio::spawn(locks.garbage_collect_worker());
751 :
752 0 : let url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
753 :
754 0 : let endpoint = http::Endpoint::new(url, http::new_client());
755 0 :
756 0 : let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
757 0 : RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
758 0 : let wake_compute_endpoint_rate_limiter =
759 0 : Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
760 0 :
761 0 : let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
762 0 : endpoint,
763 0 : args.control_plane_token.clone(),
764 0 : caches,
765 0 : locks,
766 0 : wake_compute_endpoint_rate_limiter,
767 0 : );
768 0 :
769 0 : let api = control_plane::client::ControlPlaneClient::ProxyV1(api);
770 0 : let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
771 0 : let config = Box::leak(Box::new(auth_backend));
772 0 :
773 0 : Ok(Either::Left(config))
774 : }
775 :
776 : #[cfg(any(test, feature = "testing"))]
777 : AuthBackendType::Postgres => {
778 0 : let mut url: ApiUrl = args.auth_endpoint.parse()?;
779 0 : if url.password().is_none() {
780 0 : let password = env::var("PGPASSWORD")
781 0 : .with_context(|| "auth-endpoint does not contain a password and environment variable `PGPASSWORD` is not set")?;
782 0 : url.set_password(Some(&password))
783 0 : .expect("Failed to set password");
784 0 : }
785 0 : let api = control_plane::client::mock::MockControlPlane::new(
786 0 : url,
787 0 : !args.is_private_access_proxy,
788 0 : );
789 0 : let api = control_plane::client::ControlPlaneClient::PostgresMock(api);
790 0 :
791 0 : let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
792 0 :
793 0 : let config = Box::leak(Box::new(auth_backend));
794 0 :
795 0 : Ok(Either::Left(config))
796 : }
797 :
798 : AuthBackendType::ConsoleRedirect => {
799 0 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
800 0 : let project_info_cache_config: ProjectInfoCacheOptions =
801 0 : args.project_info_cache.parse()?;
802 0 : let endpoint_cache_config: config::EndpointCacheConfig =
803 0 : args.endpoint_cache_config.parse()?;
804 :
805 0 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
806 0 : info!(
807 0 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
808 : );
809 0 : info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
810 0 : let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
811 0 : wake_compute_cache_config,
812 0 : project_info_cache_config,
813 0 : endpoint_cache_config,
814 0 : )));
815 :
816 : let config::ConcurrencyLockOptions {
817 0 : shards,
818 0 : limiter,
819 0 : epoch,
820 0 : timeout,
821 0 : } = args.wake_compute_lock.parse()?;
822 0 : info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
823 0 : let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
824 0 : "wake_compute_lock",
825 0 : limiter,
826 0 : shards,
827 0 : timeout,
828 0 : epoch,
829 0 : &Metrics::get().wake_compute_lock,
830 0 : )));
831 :
832 0 : let url = args.uri.clone().parse()?;
833 0 : let ep_url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
834 0 : let endpoint = http::Endpoint::new(ep_url, http::new_client());
835 0 : let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
836 0 : RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
837 0 : let wake_compute_endpoint_rate_limiter =
838 0 : Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
839 0 :
840 0 : // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
841 0 : // and locks are not used in ConsoleRedirectBackend,
842 0 : // but they are required by the NeonControlPlaneClient
843 0 : let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
844 0 : endpoint,
845 0 : args.control_plane_token.clone(),
846 0 : caches,
847 0 : locks,
848 0 : wake_compute_endpoint_rate_limiter,
849 0 : );
850 0 :
851 0 : let backend = ConsoleRedirectBackend::new(url, api);
852 0 : let config = Box::leak(Box::new(backend));
853 0 :
854 0 : Ok(Either::Right(config))
855 : }
856 : }
857 0 : }
858 :
859 0 : async fn configure_redis(
860 0 : args: &ProxyCliArgs,
861 0 : ) -> anyhow::Result<(
862 0 : Option<ConnectionWithCredentialsProvider>,
863 0 : Option<ConnectionWithCredentialsProvider>,
864 0 : )> {
865 : // TODO: untangle the config args
866 0 : let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
867 0 : ("plain", redis_url) => match redis_url {
868 : None => {
869 0 : bail!("plain auth requires redis_notifications to be set");
870 : }
871 0 : Some(url) => {
872 0 : Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url.clone()))
873 : }
874 : },
875 0 : ("irsa", _) => match (&args.redis_host, args.redis_port) {
876 0 : (Some(host), Some(port)) => Some(
877 0 : ConnectionWithCredentialsProvider::new_with_credentials_provider(
878 0 : host.clone(),
879 0 : port,
880 0 : elasticache::CredentialsProvider::new(
881 0 : args.aws_region.clone(),
882 0 : args.redis_cluster_name.clone(),
883 0 : args.redis_user_id.clone(),
884 0 : )
885 0 : .await,
886 : ),
887 : ),
888 : (None, None) => {
889 : // todo: upgrade to error?
890 0 : warn!(
891 0 : "irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client"
892 : );
893 0 : None
894 : }
895 : _ => {
896 0 : bail!("redis-host and redis-port must be specified together");
897 : }
898 : },
899 : _ => {
900 0 : bail!("unknown auth type given");
901 : }
902 : };
903 :
904 0 : let redis_notifications_client = if let Some(url) = &args.redis_notifications {
905 0 : Some(ConnectionWithCredentialsProvider::new_with_static_credentials(&**url))
906 : } else {
907 0 : regional_redis_client.clone()
908 : };
909 :
910 0 : Ok((regional_redis_client, redis_notifications_client))
911 0 : }
912 :
913 : #[cfg(test)]
914 : mod tests {
915 : use std::time::Duration;
916 :
917 : use clap::Parser;
918 :
919 : use crate::rate_limiter::RateBucketInfo;
920 :
921 : #[test]
922 1 : fn parse_endpoint_rps_limit() {
923 1 : let config = super::ProxyCliArgs::parse_from([
924 1 : "proxy",
925 1 : "--endpoint-rps-limit",
926 1 : "100@1s",
927 1 : "--endpoint-rps-limit",
928 1 : "20@30s",
929 1 : ]);
930 1 :
931 1 : assert_eq!(
932 1 : config.endpoint_rps_limit,
933 1 : vec![
934 1 : RateBucketInfo::new(100, Duration::from_secs(1)),
935 1 : RateBucketInfo::new(20, Duration::from_secs(30)),
936 1 : ]
937 1 : );
938 1 : }
939 : }
|