Line data Source code
1 : use std::env;
2 : use std::net::SocketAddr;
3 : use std::path::PathBuf;
4 : use std::pin::pin;
5 : use std::sync::Arc;
6 : use std::time::Duration;
7 :
8 : #[cfg(any(test, feature = "testing"))]
9 : use anyhow::Context;
10 : use anyhow::{bail, ensure};
11 : use arc_swap::ArcSwapOption;
12 : #[cfg(any(test, feature = "testing"))]
13 : use camino::Utf8PathBuf;
14 : use futures::future::Either;
15 : use itertools::{Itertools, Position};
16 : use rand::Rng;
17 : use remote_storage::RemoteStorageConfig;
18 : use tokio::net::TcpListener;
19 : #[cfg(any(test, feature = "testing"))]
20 : use tokio::sync::Notify;
21 : use tokio::task::JoinSet;
22 : use tokio_util::sync::CancellationToken;
23 : use tracing::{debug, error, info, warn};
24 : use utils::sentry_init::init_sentry;
25 : use utils::{project_build_tag, project_git_version};
26 :
27 : use crate::auth::backend::jwt::JwkCache;
28 : #[cfg(any(test, feature = "testing"))]
29 : use crate::auth::backend::local::LocalBackend;
30 : use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
31 : use crate::batch::BatchQueue;
32 : use crate::cancellation::{CancellationHandler, CancellationProcessor};
33 : #[cfg(feature = "rest_broker")]
34 : use crate::config::RestConfig;
35 : #[cfg(any(test, feature = "testing"))]
36 : use crate::config::refresh_config_loop;
37 : use crate::config::{
38 : self, AuthenticationConfig, CacheOptions, ComputeConfig, HttpConfig, ProjectInfoCacheOptions,
39 : ProxyConfig, ProxyProtocolV2, remote_storage_from_toml,
40 : };
41 : use crate::context::parquet::ParquetUploadArgs;
42 : use crate::http::health_server::AppMetrics;
43 : use crate::metrics::Metrics;
44 : use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
45 : use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
46 : use crate::redis::kv_ops::RedisKVClient;
47 : use crate::redis::{elasticache, notifications};
48 : use crate::scram::threadpool::ThreadPool;
49 : use crate::serverless::GlobalConnPoolOptions;
50 : use crate::serverless::cancel_set::CancelSet;
51 : #[cfg(feature = "rest_broker")]
52 : use crate::serverless::rest::DbSchemaCache;
53 : use crate::tls::client_config::compute_client_config_with_root_certs;
54 : #[cfg(any(test, feature = "testing"))]
55 : use crate::url::ApiUrl;
56 : use crate::{auth, control_plane, http, serverless, usage_metrics};
57 :
58 : project_git_version!(GIT_VERSION);
59 : project_build_tag!(BUILD_TAG);
60 :
61 : use clap::{Parser, ValueEnum};
62 :
63 : #[derive(Clone, Debug, ValueEnum)]
64 : #[clap(rename_all = "kebab-case")]
65 : enum AuthBackendType {
66 : #[clap(alias("cplane-v1"))]
67 : ControlPlane,
68 :
69 : #[clap(alias("link"))]
70 : ConsoleRedirect,
71 :
72 : #[cfg(any(test, feature = "testing"))]
73 : Postgres,
74 :
75 : #[cfg(any(test, feature = "testing"))]
76 : Local,
77 : }
78 :
79 : /// Neon proxy/router
80 : #[derive(Parser)]
81 : #[command(version = GIT_VERSION, about)]
82 : struct ProxyCliArgs {
83 : /// Name of the region this proxy is deployed in
84 : #[clap(long, default_value_t = String::new())]
85 : region: String,
86 : /// listen for incoming client connections on ip:port
87 : #[clap(short, long, default_value = "127.0.0.1:4432")]
88 : proxy: SocketAddr,
89 : #[clap(value_enum, long, default_value_t = AuthBackendType::ConsoleRedirect)]
90 : auth_backend: AuthBackendType,
91 : /// Path of the local proxy config file (used for local-file auth backend)
92 : #[clap(long, default_value = "./local_proxy.json")]
93 : #[cfg(any(test, feature = "testing"))]
94 : config_path: Utf8PathBuf,
95 : /// listen for management callback connection on ip:port
96 : #[clap(short, long, default_value = "127.0.0.1:7000")]
97 : mgmt: SocketAddr,
98 : /// listen for incoming http connections (metrics, etc) on ip:port
99 : #[clap(long, default_value = "127.0.0.1:7001")]
100 : http: SocketAddr,
101 : /// listen for incoming wss connections on ip:port
102 : #[clap(long)]
103 : wss: Option<SocketAddr>,
104 : /// redirect unauthenticated users to the given uri in case of console redirect auth
105 : #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
106 : uri: String,
107 : /// cloud API endpoint for authenticating users
108 : #[clap(
109 : short,
110 : long,
111 : default_value = "http://localhost:3000/authenticate_proxy_request/"
112 : )]
113 : auth_endpoint: String,
114 : /// JWT used to connect to control plane.
115 : #[clap(
116 : long,
117 : value_name = "JWT",
118 : default_value = "",
119 : env = "NEON_PROXY_TO_CONTROLPLANE_TOKEN"
120 : )]
121 : control_plane_token: Arc<str>,
122 : /// if this is not local proxy, this toggles whether we accept jwt or passwords for http
123 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
124 : is_auth_broker: bool,
125 : /// path to TLS key for client postgres connections
126 : ///
127 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
128 : #[clap(short = 'k', long, alias = "ssl-key")]
129 : tls_key: Option<PathBuf>,
130 : /// path to TLS cert for client postgres connections
131 : ///
132 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
133 : #[clap(short = 'c', long, alias = "ssl-cert")]
134 : tls_cert: Option<PathBuf>,
135 : /// Allow writing TLS session keys to the given file pointed to by the environment variable `SSLKEYLOGFILE`.
136 : #[clap(long, alias = "allow-ssl-keylogfile")]
137 : allow_tls_keylogfile: bool,
138 : /// path to directory with TLS certificates for client postgres connections
139 : #[clap(long)]
140 : certs_dir: Option<PathBuf>,
141 : /// timeout for the TLS handshake
142 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
143 : handshake_timeout: tokio::time::Duration,
144 : /// cache for `wake_compute` api method (use `size=0` to disable)
145 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
146 : wake_compute_cache: String,
147 : /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
148 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
149 : wake_compute_lock: String,
150 : /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
151 : #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
152 : connect_compute_lock: String,
153 : #[clap(flatten)]
154 : sql_over_http: SqlOverHttpArgs,
155 : /// timeout for scram authentication protocol
156 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
157 : scram_protocol_timeout: tokio::time::Duration,
158 : /// size of the threadpool for password hashing
159 : #[clap(long, default_value_t = 4)]
160 : scram_thread_pool_size: u8,
161 : /// Endpoint rate limiter max number of requests per second.
162 : ///
163 : /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
164 : /// Can be given multiple times for different bucket sizes.
165 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_ENDPOINT_SET)]
166 : endpoint_rps_limit: Vec<RateBucketInfo>,
167 : /// Wake compute rate limiter max number of requests per second.
168 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
169 : wake_compute_limit: Vec<RateBucketInfo>,
170 : /// Cancellation channel size (max queue size for redis kv client)
171 : #[clap(long, default_value_t = 1024)]
172 : cancellation_ch_size: usize,
173 : /// Cancellation ops batch size for redis
174 : #[clap(long, default_value_t = 8)]
175 : cancellation_batch_size: usize,
176 : /// redis url for plain authentication
177 : #[clap(long, alias("redis-notifications"))]
178 : redis_plain: Option<String>,
179 : /// what from the available authentications type to use for redis. Supported are "irsa" and "plain".
180 : #[clap(long, default_value = "irsa")]
181 : redis_auth_type: String,
182 : /// redis host for irsa authentication
183 : #[clap(long)]
184 : redis_host: Option<String>,
185 : /// redis port for irsa authentication
186 : #[clap(long)]
187 : redis_port: Option<u16>,
188 : /// redis cluster name for irsa authentication
189 : #[clap(long)]
190 : redis_cluster_name: Option<String>,
191 : /// redis user_id for irsa authentication
192 : #[clap(long)]
193 : redis_user_id: Option<String>,
194 : /// aws region for irsa authentication
195 : #[clap(long, default_value_t = String::new())]
196 : aws_region: String,
197 : /// cache for `project_info` (use `size=0` to disable)
198 : #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
199 : project_info_cache: String,
200 : /// cache for all valid endpoints
201 : // TODO: remove after a couple of releases.
202 : #[clap(long, default_value_t = String::new())]
203 : #[deprecated]
204 : endpoint_cache_config: String,
205 : #[clap(flatten)]
206 : parquet_upload: ParquetUploadArgs,
207 :
208 : /// http endpoint to receive periodic metric updates
209 : #[clap(long)]
210 : metric_collection_endpoint: Option<String>,
211 : /// how often metrics should be sent to a collection endpoint
212 : #[clap(long)]
213 : metric_collection_interval: Option<String>,
214 : /// interval for backup metric collection
215 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
216 : metric_backup_collection_interval: std::time::Duration,
217 : /// remote storage configuration for backup metric collection
218 : /// Encoded as toml (same format as pageservers), eg
219 : /// `{bucket_name='the-bucket',bucket_region='us-east-1',prefix_in_bucket='proxy',endpoint='http://minio:9000'}`
220 : #[clap(long, value_parser = remote_storage_from_toml)]
221 : metric_backup_collection_remote_storage: Option<RemoteStorageConfig>,
222 : /// chunk size for backup metric collection
223 : /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
224 : #[clap(long, default_value = "4194304")]
225 : metric_backup_collection_chunk_size: usize,
226 :
227 : /// Whether to retry the connection to the compute node
228 : #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
229 : connect_to_compute_retry: String,
230 : /// Whether to retry the wake_compute request
231 : #[clap(long, default_value = config::RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)]
232 : wake_compute_retry: String,
233 :
234 : /// Configure if this is a private access proxy for the POC: In that case the proxy will ignore the IP allowlist
235 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
236 : is_private_access_proxy: bool,
237 :
238 : /// Configure whether all incoming requests have a Proxy Protocol V2 packet.
239 : #[clap(value_enum, long, default_value_t = ProxyProtocolV2::Rejected)]
240 : proxy_protocol_v2: ProxyProtocolV2,
241 :
242 : /// Time the proxy waits for the webauth session to be confirmed by the control plane.
243 : // TODO: rename to `console_redirect_confirmation_timeout`.
244 : #[clap(long, default_value = "2m", value_parser = humantime::parse_duration)]
245 : webauth_confirmation_timeout: std::time::Duration,
246 :
247 : #[clap(flatten)]
248 : pg_sni_router: PgSniRouterArgs,
249 :
250 : /// if this is not local proxy, this toggles whether we accept Postgres REST requests
251 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
252 : #[cfg(feature = "rest_broker")]
253 : is_rest_broker: bool,
254 :
255 : /// cache for `db_schema_cache` introspection (use `size=0` to disable)
256 : #[clap(long, default_value = "size=1000,ttl=1h")]
257 : #[cfg(feature = "rest_broker")]
258 : db_schema_cache: String,
259 :
260 : /// Maximum size allowed for schema in bytes
261 : #[clap(long, default_value_t = 5 * 1024 * 1024)] // 5MB
262 : #[cfg(feature = "rest_broker")]
263 : max_schema_size: usize,
264 :
265 : /// Hostname prefix to strip from request hostname to get database hostname
266 : #[clap(long, default_value = "apirest.")]
267 : #[cfg(feature = "rest_broker")]
268 : hostname_prefix: String,
269 : }
270 :
271 : #[derive(clap::Args, Clone, Copy, Debug)]
272 : struct SqlOverHttpArgs {
273 : /// timeout for http connection requests
274 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
275 : sql_over_http_timeout: tokio::time::Duration,
276 :
277 : /// Whether the SQL over http pool is opt-in
278 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
279 : sql_over_http_pool_opt_in: bool,
280 :
281 : /// How many connections to pool for each endpoint. Excess connections are discarded
282 : #[clap(long, default_value_t = 20)]
283 : sql_over_http_pool_max_conns_per_endpoint: usize,
284 :
285 : /// How many connections to pool for each endpoint. Excess connections are discarded
286 : #[clap(long, default_value_t = 20000)]
287 : sql_over_http_pool_max_total_conns: usize,
288 :
289 : /// How long pooled connections should remain idle for before closing
290 : #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
291 : sql_over_http_idle_timeout: tokio::time::Duration,
292 :
293 : /// Duration each shard will wait on average before a GC sweep.
294 : /// A longer time will causes sweeps to take longer but will interfere less frequently.
295 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
296 : sql_over_http_pool_gc_epoch: tokio::time::Duration,
297 :
298 : /// How many shards should the global pool have. Must be a power of two.
299 : /// More shards will introduce less contention for pool operations, but can
300 : /// increase memory used by the pool
301 : #[clap(long, default_value_t = 128)]
302 : sql_over_http_pool_shards: usize,
303 :
304 : #[clap(long, default_value_t = 10000)]
305 : sql_over_http_client_conn_threshold: u64,
306 :
307 : #[clap(long, default_value_t = 64)]
308 : sql_over_http_cancel_set_shards: usize,
309 :
310 : #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
311 : sql_over_http_max_request_size_bytes: usize,
312 :
313 : #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
314 : sql_over_http_max_response_size_bytes: usize,
315 : }
316 :
317 : #[derive(clap::Args, Clone, Debug)]
318 : struct PgSniRouterArgs {
319 : /// listen for incoming client connections on ip:port
320 : #[clap(id = "sni-router-listen", long, default_value = "127.0.0.1:4432")]
321 : listen: SocketAddr,
322 : /// listen for incoming client connections on ip:port, requiring TLS to compute
323 : #[clap(id = "sni-router-listen-tls", long, default_value = "127.0.0.1:4433")]
324 : listen_tls: SocketAddr,
325 : /// path to TLS key for client postgres connections
326 : #[clap(id = "sni-router-tls-key", long)]
327 : tls_key: Option<PathBuf>,
328 : /// path to TLS cert for client postgres connections
329 : #[clap(id = "sni-router-tls-cert", long)]
330 : tls_cert: Option<PathBuf>,
331 : /// append this domain zone to the SNI hostname to get the destination address
332 : #[clap(id = "sni-router-destination", long)]
333 : dest: Option<String>,
334 : }
335 :
336 0 : pub async fn run() -> anyhow::Result<()> {
337 0 : let _logging_guard = crate::logging::init()?;
338 0 : let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
339 0 : let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
340 :
341 : // TODO: refactor these to use labels
342 0 : info!("Version: {GIT_VERSION}");
343 0 : info!("Build_tag: {BUILD_TAG}");
344 0 : let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo {
345 0 : revision: GIT_VERSION,
346 0 : build_tag: BUILD_TAG,
347 0 : });
348 :
349 0 : let jemalloc = match crate::jemalloc::MetricRecorder::new() {
350 0 : Ok(t) => Some(t),
351 0 : Err(e) => {
352 0 : error!(error = ?e, "could not start jemalloc metrics loop");
353 0 : None
354 : }
355 : };
356 :
357 0 : let args = ProxyCliArgs::parse();
358 0 : let config = build_config(&args)?;
359 0 : let auth_backend = build_auth_backend(&args)?;
360 :
361 0 : match auth_backend {
362 0 : Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
363 0 : Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
364 : }
365 0 : info!("Using region: {}", args.aws_region);
366 0 : let redis_client = configure_redis(&args).await?;
367 :
368 : // Check that we can bind to address before further initialization
369 0 : info!("Starting http on {}", args.http);
370 0 : let http_listener = TcpListener::bind(args.http).await?.into_std()?;
371 :
372 0 : info!("Starting mgmt on {}", args.mgmt);
373 0 : let mgmt_listener = TcpListener::bind(args.mgmt).await?;
374 :
375 0 : let proxy_listener = if args.is_auth_broker {
376 0 : None
377 : } else {
378 0 : info!("Starting proxy on {}", args.proxy);
379 0 : Some(TcpListener::bind(args.proxy).await?)
380 : };
381 :
382 0 : let sni_router_listeners = {
383 0 : let args = &args.pg_sni_router;
384 0 : if args.dest.is_some() {
385 0 : ensure!(
386 0 : args.tls_key.is_some(),
387 0 : "sni-router-tls-key must be provided"
388 : );
389 0 : ensure!(
390 0 : args.tls_cert.is_some(),
391 0 : "sni-router-tls-cert must be provided"
392 : );
393 :
394 0 : info!(
395 0 : "Starting pg-sni-router on {} and {}",
396 : args.listen, args.listen_tls
397 : );
398 :
399 : Some((
400 0 : TcpListener::bind(args.listen).await?,
401 0 : TcpListener::bind(args.listen_tls).await?,
402 : ))
403 : } else {
404 0 : None
405 : }
406 : };
407 :
408 : // TODO: rename the argument to something like serverless.
409 : // It now covers more than just websockets, it also covers SQL over HTTP.
410 0 : let serverless_listener = if let Some(serverless_address) = args.wss {
411 0 : info!("Starting wss on {serverless_address}");
412 0 : Some(TcpListener::bind(serverless_address).await?)
413 0 : } else if args.is_auth_broker {
414 0 : bail!("wss arg must be present for auth-broker")
415 : } else {
416 0 : None
417 : };
418 :
419 0 : let cancellation_token = CancellationToken::new();
420 :
421 0 : let cancellation_handler = Arc::new(CancellationHandler::new(&config.connect_to_compute));
422 :
423 0 : let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
424 0 : RateBucketInfo::to_leaky_bucket(&args.endpoint_rps_limit)
425 0 : .unwrap_or(EndpointRateLimiter::DEFAULT),
426 : 64,
427 : ));
428 :
429 : #[cfg(any(test, feature = "testing"))]
430 0 : let refresh_config_notify = Arc::new(Notify::new());
431 : // client facing tasks. these will exit on error or on cancellation
432 : // cancellation returns Ok(())
433 0 : let mut client_tasks = JoinSet::new();
434 0 : match auth_backend {
435 0 : Either::Left(auth_backend) => {
436 0 : if let Some(proxy_listener) = proxy_listener {
437 0 : client_tasks.spawn(crate::pglb::task_main(
438 0 : config,
439 0 : auth_backend,
440 0 : proxy_listener,
441 0 : cancellation_token.clone(),
442 0 : cancellation_handler.clone(),
443 0 : endpoint_rate_limiter.clone(),
444 0 : ));
445 0 : }
446 :
447 0 : if let Some(serverless_listener) = serverless_listener {
448 0 : client_tasks.spawn(serverless::task_main(
449 0 : config,
450 0 : auth_backend,
451 0 : serverless_listener,
452 0 : cancellation_token.clone(),
453 0 : cancellation_handler.clone(),
454 0 : endpoint_rate_limiter.clone(),
455 0 : ));
456 0 : }
457 :
458 : // if auth backend is local, we need to load the config file
459 : #[cfg(any(test, feature = "testing"))]
460 0 : if let auth::Backend::Local(_) = &auth_backend {
461 0 : refresh_config_notify.notify_one();
462 0 : tokio::spawn(refresh_config_loop(
463 0 : config,
464 0 : args.config_path,
465 0 : refresh_config_notify.clone(),
466 0 : ));
467 0 : }
468 : }
469 0 : Either::Right(auth_backend) => {
470 0 : if let Some(proxy_listener) = proxy_listener {
471 0 : client_tasks.spawn(crate::console_redirect_proxy::task_main(
472 0 : config,
473 0 : auth_backend,
474 0 : proxy_listener,
475 0 : cancellation_token.clone(),
476 0 : cancellation_handler.clone(),
477 0 : ));
478 0 : }
479 : }
480 : }
481 :
482 : // spawn pg-sni-router mode.
483 0 : if let Some((listen, listen_tls)) = sni_router_listeners {
484 0 : let args = args.pg_sni_router;
485 0 : let dest = args.dest.expect("already asserted it is set");
486 0 : let key_path = args.tls_key.expect("already asserted it is set");
487 0 : let cert_path = args.tls_cert.expect("already asserted it is set");
488 :
489 0 : let tls_config = super::pg_sni_router::parse_tls(&key_path, &cert_path)?;
490 :
491 0 : let dest = Arc::new(dest);
492 :
493 0 : client_tasks.spawn(super::pg_sni_router::task_main(
494 0 : dest.clone(),
495 0 : tls_config.clone(),
496 0 : None,
497 0 : listen,
498 0 : cancellation_token.clone(),
499 : ));
500 :
501 0 : client_tasks.spawn(super::pg_sni_router::task_main(
502 0 : dest,
503 0 : tls_config,
504 0 : Some(config.connect_to_compute.tls.clone()),
505 0 : listen_tls,
506 0 : cancellation_token.clone(),
507 : ));
508 0 : }
509 :
510 0 : client_tasks.spawn(crate::context::parquet::worker(
511 0 : cancellation_token.clone(),
512 0 : args.parquet_upload,
513 0 : args.region,
514 : ));
515 :
516 : // maintenance tasks. these never return unless there's an error
517 0 : let mut maintenance_tasks = JoinSet::new();
518 :
519 0 : maintenance_tasks.spawn(crate::signals::handle(cancellation_token.clone(), {
520 0 : move || {
521 : #[cfg(any(test, feature = "testing"))]
522 0 : refresh_config_notify.notify_one();
523 0 : }
524 : }));
525 0 : maintenance_tasks.spawn(http::health_server::task_main(
526 0 : http_listener,
527 0 : AppMetrics {
528 0 : jemalloc,
529 0 : neon_metrics,
530 0 : proxy: crate::metrics::Metrics::get(),
531 0 : },
532 : ));
533 0 : maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
534 :
535 : // add a task to flush the db_schema cache every 10 minutes
536 : #[cfg(feature = "rest_broker")]
537 0 : if let Some(db_schema_cache) = &config.rest_config.db_schema_cache {
538 0 : maintenance_tasks.spawn(async move {
539 : loop {
540 0 : tokio::time::sleep(Duration::from_secs(600)).await;
541 0 : db_schema_cache.flush();
542 : }
543 : });
544 0 : }
545 :
546 0 : if let Some(metrics_config) = &config.metric_collection {
547 0 : // TODO: Add gc regardles of the metric collection being enabled.
548 0 : maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
549 0 : }
550 :
551 0 : if let Some(client) = redis_client {
552 : // Try to connect to Redis 3 times with 1 + (0..0.1) second interval.
553 : // This prevents immediate exit and pod restart,
554 : // which can cause hammering of the redis in case of connection issues.
555 : // cancellation key management
556 0 : let mut redis_kv_client = RedisKVClient::new(client.clone());
557 0 : for attempt in (0..3).with_position() {
558 0 : match redis_kv_client.try_connect().await {
559 : Ok(()) => {
560 0 : info!("Connected to Redis KV client");
561 0 : cancellation_handler.init_tx(BatchQueue::new(CancellationProcessor {
562 0 : client: redis_kv_client,
563 0 : batch_size: args.cancellation_batch_size,
564 0 : }));
565 :
566 0 : break;
567 : }
568 0 : Err(e) => {
569 0 : error!("Failed to connect to Redis KV client: {e}");
570 0 : if matches!(attempt, Position::Last(_)) {
571 0 : bail!(
572 0 : "Failed to connect to Redis KV client after {} attempts",
573 0 : attempt.into_inner()
574 : );
575 0 : }
576 0 : let jitter = rand::rng().random_range(0..100);
577 0 : tokio::time::sleep(Duration::from_millis(1000 + jitter)).await;
578 : }
579 : }
580 : }
581 :
582 : #[allow(irrefutable_let_patterns)]
583 0 : if let Either::Left(auth::Backend::ControlPlane(api, ())) = &auth_backend
584 0 : && let crate::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api
585 : {
586 : // project info cache and invalidation of that cache.
587 0 : let cache = api.caches.project_info.clone();
588 0 : maintenance_tasks.spawn(notifications::task_main(client, cache.clone()));
589 0 : maintenance_tasks.spawn(async move { cache.gc_worker().await });
590 0 : }
591 0 : }
592 :
593 : let maintenance = loop {
594 : // get one complete task
595 0 : match futures::future::select(
596 0 : pin!(maintenance_tasks.join_next()),
597 0 : pin!(client_tasks.join_next()),
598 : )
599 0 : .await
600 : {
601 : // exit immediately on maintenance task completion
602 0 : Either::Left((Some(res), _)) => break crate::error::flatten_err(res)?,
603 : // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
604 0 : Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
605 : // exit immediately on client task error
606 0 : Either::Right((Some(res), _)) => crate::error::flatten_err(res)?,
607 : // exit if all our client tasks have shutdown gracefully
608 0 : Either::Right((None, _)) => return Ok(()),
609 : }
610 : };
611 :
612 : // maintenance tasks return Infallible success values, this is an impossible value
613 : // so this match statically ensures that there are no possibilities for that value
614 : match maintenance {}
615 0 : }
616 :
617 : /// ProxyConfig is created at proxy startup, and lives forever.
618 0 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
619 0 : let thread_pool = ThreadPool::new(args.scram_thread_pool_size);
620 0 : Metrics::install(thread_pool.metrics.clone());
621 :
622 0 : let tls_config = match (&args.tls_key, &args.tls_cert) {
623 0 : (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
624 0 : key_path,
625 0 : cert_path,
626 0 : args.certs_dir.as_deref(),
627 0 : args.allow_tls_keylogfile,
628 0 : )?),
629 0 : (None, None) => None,
630 0 : _ => bail!("either both or neither tls-key and tls-cert must be specified"),
631 : };
632 0 : let tls_config = ArcSwapOption::from(tls_config.map(Arc::new));
633 :
634 0 : let backup_metric_collection_config = config::MetricBackupCollectionConfig {
635 0 : remote_storage_config: args.metric_backup_collection_remote_storage.clone(),
636 0 : chunk_size: args.metric_backup_collection_chunk_size,
637 0 : };
638 :
639 0 : let metric_collection = match (
640 0 : &args.metric_collection_endpoint,
641 0 : &args.metric_collection_interval,
642 : ) {
643 0 : (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
644 0 : endpoint: endpoint.parse()?,
645 0 : interval: humantime::parse_duration(interval)?,
646 0 : backup_metric_collection_config,
647 : }),
648 0 : (None, None) => None,
649 0 : _ => bail!(
650 0 : "either both or neither metric-collection-endpoint \
651 0 : and metric-collection-interval must be specified"
652 : ),
653 : };
654 :
655 : let config::ConcurrencyLockOptions {
656 0 : shards,
657 0 : limiter,
658 0 : epoch,
659 0 : timeout,
660 0 : } = args.connect_compute_lock.parse()?;
661 0 : info!(
662 : ?limiter,
663 : shards,
664 : ?epoch,
665 0 : "Using NodeLocks (connect_compute)"
666 : );
667 0 : let connect_compute_locks = control_plane::locks::ApiLocks::new(
668 : "connect_compute_lock",
669 0 : limiter,
670 0 : shards,
671 0 : timeout,
672 0 : epoch,
673 0 : &Metrics::get().proxy.connect_compute_lock,
674 : );
675 :
676 0 : let http_config = HttpConfig {
677 0 : accept_websockets: !args.is_auth_broker,
678 0 : pool_options: GlobalConnPoolOptions {
679 0 : max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
680 0 : gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
681 0 : pool_shards: args.sql_over_http.sql_over_http_pool_shards,
682 0 : idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
683 0 : opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
684 0 : max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
685 0 : },
686 0 : cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
687 0 : client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
688 0 : max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
689 0 : max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
690 0 : };
691 0 : let authentication_config = AuthenticationConfig {
692 0 : jwks_cache: JwkCache::default(),
693 0 : thread_pool,
694 0 : scram_protocol_timeout: args.scram_protocol_timeout,
695 0 : ip_allowlist_check_enabled: !args.is_private_access_proxy,
696 0 : is_vpc_acccess_proxy: args.is_private_access_proxy,
697 0 : is_auth_broker: args.is_auth_broker,
698 0 : accept_jwts: args.is_auth_broker,
699 0 : console_redirect_confirmation_timeout: args.webauth_confirmation_timeout,
700 0 : };
701 :
702 0 : let compute_config = ComputeConfig {
703 0 : retry: config::RetryConfig::parse(&args.connect_to_compute_retry)?,
704 0 : tls: Arc::new(compute_client_config_with_root_certs()?),
705 0 : timeout: Duration::from_secs(2),
706 : };
707 :
708 : #[cfg(feature = "rest_broker")]
709 0 : let rest_config = {
710 0 : let db_schema_cache_config: CacheOptions = args.db_schema_cache.parse()?;
711 0 : info!("Using DbSchemaCache with options={db_schema_cache_config:?}");
712 :
713 0 : let db_schema_cache = if args.is_rest_broker {
714 0 : Some(DbSchemaCache::new(
715 0 : "db_schema_cache",
716 0 : db_schema_cache_config.size,
717 0 : db_schema_cache_config.ttl,
718 0 : true,
719 0 : ))
720 : } else {
721 0 : None
722 : };
723 :
724 0 : RestConfig {
725 0 : is_rest_broker: args.is_rest_broker,
726 0 : db_schema_cache,
727 0 : max_schema_size: args.max_schema_size,
728 0 : hostname_prefix: args.hostname_prefix.clone(),
729 0 : }
730 : };
731 :
732 0 : let mut greetings = env::var_os("NEON_MOTD").map_or(String::new(), |s| match s.into_string() {
733 0 : Ok(s) => s,
734 : Err(_) => {
735 0 : debug!("NEON_MOTD environment variable is not valid UTF-8");
736 0 : String::new()
737 : }
738 0 : });
739 :
740 0 : match &args.auth_backend {
741 0 : AuthBackendType::ControlPlane => {}
742 : #[cfg(any(test, feature = "testing"))]
743 0 : AuthBackendType::Postgres => {}
744 : #[cfg(any(test, feature = "testing"))]
745 0 : AuthBackendType::Local => {}
746 0 : AuthBackendType::ConsoleRedirect => {
747 0 : greetings = "Connected to database".to_string();
748 0 : }
749 : }
750 :
751 0 : let config = ProxyConfig {
752 0 : tls_config,
753 0 : metric_collection,
754 0 : http_config,
755 0 : authentication_config,
756 0 : proxy_protocol_v2: args.proxy_protocol_v2,
757 0 : handshake_timeout: args.handshake_timeout,
758 0 : wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
759 0 : connect_compute_locks,
760 0 : connect_to_compute: compute_config,
761 0 : greetings,
762 : #[cfg(feature = "testing")]
763 : disable_pg_session_jwt: false,
764 : #[cfg(feature = "rest_broker")]
765 0 : rest_config,
766 : };
767 :
768 0 : let config = Box::leak(Box::new(config));
769 :
770 0 : tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
771 :
772 0 : Ok(config)
773 0 : }
774 :
775 : /// auth::Backend is created at proxy startup, and lives forever.
776 0 : fn build_auth_backend(
777 0 : args: &ProxyCliArgs,
778 0 : ) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {
779 0 : match &args.auth_backend {
780 : AuthBackendType::ControlPlane => {
781 0 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
782 0 : let project_info_cache_config: ProjectInfoCacheOptions =
783 0 : args.project_info_cache.parse()?;
784 :
785 0 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
786 0 : info!(
787 0 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
788 : );
789 :
790 0 : let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
791 0 : wake_compute_cache_config,
792 0 : project_info_cache_config,
793 : )));
794 :
795 : let config::ConcurrencyLockOptions {
796 0 : shards,
797 0 : limiter,
798 0 : epoch,
799 0 : timeout,
800 0 : } = args.wake_compute_lock.parse()?;
801 0 : info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
802 0 : let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
803 : "wake_compute_lock",
804 0 : limiter,
805 0 : shards,
806 0 : timeout,
807 0 : epoch,
808 0 : &Metrics::get().wake_compute_lock,
809 : )));
810 0 : tokio::spawn(locks.garbage_collect_worker());
811 :
812 0 : let url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
813 :
814 0 : let endpoint = http::Endpoint::new(url, http::new_client());
815 :
816 0 : let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
817 0 : RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
818 0 : let wake_compute_endpoint_rate_limiter =
819 0 : Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
820 :
821 0 : let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
822 0 : endpoint,
823 0 : args.control_plane_token.clone(),
824 0 : caches,
825 0 : locks,
826 0 : wake_compute_endpoint_rate_limiter,
827 : );
828 :
829 0 : let api = control_plane::client::ControlPlaneClient::ProxyV1(api);
830 0 : let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
831 0 : let config = Box::leak(Box::new(auth_backend));
832 :
833 0 : Ok(Either::Left(config))
834 : }
835 :
836 : #[cfg(any(test, feature = "testing"))]
837 : AuthBackendType::Postgres => {
838 0 : let mut url: ApiUrl = args.auth_endpoint.parse()?;
839 0 : if url.password().is_none() {
840 0 : let password = env::var("PGPASSWORD")
841 0 : .with_context(|| "auth-endpoint does not contain a password and environment variable `PGPASSWORD` is not set")?;
842 0 : url.set_password(Some(&password))
843 0 : .expect("Failed to set password");
844 0 : }
845 0 : let api = control_plane::client::mock::MockControlPlane::new(
846 0 : url,
847 0 : !args.is_private_access_proxy,
848 : );
849 0 : let api = control_plane::client::ControlPlaneClient::PostgresMock(api);
850 :
851 0 : let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());
852 :
853 0 : let config = Box::leak(Box::new(auth_backend));
854 :
855 0 : Ok(Either::Left(config))
856 : }
857 :
858 : AuthBackendType::ConsoleRedirect => {
859 0 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
860 0 : let project_info_cache_config: ProjectInfoCacheOptions =
861 0 : args.project_info_cache.parse()?;
862 :
863 0 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
864 0 : info!(
865 0 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
866 : );
867 :
868 0 : let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
869 0 : wake_compute_cache_config,
870 0 : project_info_cache_config,
871 : )));
872 :
873 : let config::ConcurrencyLockOptions {
874 0 : shards,
875 0 : limiter,
876 0 : epoch,
877 0 : timeout,
878 0 : } = args.wake_compute_lock.parse()?;
879 0 : info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
880 0 : let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
881 : "wake_compute_lock",
882 0 : limiter,
883 0 : shards,
884 0 : timeout,
885 0 : epoch,
886 0 : &Metrics::get().wake_compute_lock,
887 : )));
888 :
889 0 : let url = args.uri.clone().parse()?;
890 0 : let ep_url: crate::url::ApiUrl = args.auth_endpoint.parse()?;
891 0 : let endpoint = http::Endpoint::new(ep_url, http::new_client());
892 0 : let mut wake_compute_rps_limit = args.wake_compute_limit.clone();
893 0 : RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
894 0 : let wake_compute_endpoint_rate_limiter =
895 0 : Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
896 :
897 : // Since we use only get_allowed_ips_and_secret() wake_compute_endpoint_rate_limiter
898 : // and locks are not used in ConsoleRedirectBackend,
899 : // but they are required by the NeonControlPlaneClient
900 0 : let api = control_plane::client::cplane_proxy_v1::NeonControlPlaneClient::new(
901 0 : endpoint,
902 0 : args.control_plane_token.clone(),
903 0 : caches,
904 0 : locks,
905 0 : wake_compute_endpoint_rate_limiter,
906 : );
907 :
908 0 : let backend = ConsoleRedirectBackend::new(url, api);
909 0 : let config = Box::leak(Box::new(backend));
910 :
911 0 : Ok(Either::Right(config))
912 : }
913 :
914 : #[cfg(any(test, feature = "testing"))]
915 : AuthBackendType::Local => {
916 0 : let postgres: SocketAddr = "127.0.0.1:7432".parse()?;
917 0 : let compute_ctl: ApiUrl = "http://127.0.0.1:3081/".parse()?;
918 0 : let auth_backend = crate::auth::Backend::Local(
919 0 : crate::auth::backend::MaybeOwned::Owned(LocalBackend::new(postgres, compute_ctl)),
920 0 : );
921 :
922 0 : let config = Box::leak(Box::new(auth_backend));
923 :
924 0 : Ok(Either::Left(config))
925 : }
926 : }
927 0 : }
928 :
929 0 : async fn configure_redis(
930 0 : args: &ProxyCliArgs,
931 0 : ) -> anyhow::Result<Option<ConnectionWithCredentialsProvider>> {
932 : // TODO: untangle the config args
933 0 : let redis_client = match &*args.redis_auth_type {
934 0 : "plain" => match &args.redis_plain {
935 : None => {
936 0 : bail!("plain auth requires redis_plain to be set");
937 : }
938 0 : Some(url) => {
939 0 : Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url.clone()))
940 : }
941 : },
942 0 : "irsa" => match (&args.redis_host, args.redis_port) {
943 0 : (Some(host), Some(port)) => Some(
944 0 : ConnectionWithCredentialsProvider::new_with_credentials_provider(
945 0 : host.clone(),
946 0 : port,
947 0 : elasticache::CredentialsProvider::new(
948 0 : args.aws_region.clone(),
949 0 : args.redis_cluster_name.clone(),
950 0 : args.redis_user_id.clone(),
951 0 : )
952 0 : .await,
953 : ),
954 : ),
955 : (None, None) => {
956 : // todo: upgrade to error?
957 0 : warn!(
958 0 : "irsa auth requires redis-host and redis-port to be set, continuing without regional_redis_client"
959 : );
960 0 : None
961 : }
962 : _ => {
963 0 : bail!("redis-host and redis-port must be specified together");
964 : }
965 : },
966 0 : auth_type => {
967 0 : bail!("unknown auth type {auth_type:?} given")
968 : }
969 : };
970 :
971 0 : Ok(redis_client)
972 0 : }
973 :
974 : #[cfg(test)]
975 : mod tests {
976 : use std::time::Duration;
977 :
978 : use clap::Parser;
979 :
980 : use crate::rate_limiter::RateBucketInfo;
981 :
982 : #[test]
983 1 : fn parse_endpoint_rps_limit() {
984 1 : let config = super::ProxyCliArgs::parse_from([
985 1 : "proxy",
986 1 : "--endpoint-rps-limit",
987 1 : "100@1s",
988 1 : "--endpoint-rps-limit",
989 1 : "20@30s",
990 1 : ]);
991 :
992 1 : assert_eq!(
993 : config.endpoint_rps_limit,
994 1 : vec![
995 1 : RateBucketInfo::new(100, Duration::from_secs(1)),
996 1 : RateBucketInfo::new(20, Duration::from_secs(30)),
997 : ]
998 : );
999 1 : }
1000 : }
|