Line data Source code
1 : use futures::future::Either;
2 : use proxy::auth;
3 : use proxy::auth::backend::MaybeOwned;
4 : use proxy::cancellation::CancelMap;
5 : use proxy::cancellation::CancellationHandler;
6 : use proxy::config::AuthenticationConfig;
7 : use proxy::config::CacheOptions;
8 : use proxy::config::HttpConfig;
9 : use proxy::config::ProjectInfoCacheOptions;
10 : use proxy::console;
11 : use proxy::context::parquet::ParquetUploadArgs;
12 : use proxy::http;
13 : use proxy::rate_limiter::EndpointRateLimiter;
14 : use proxy::rate_limiter::RateBucketInfo;
15 : use proxy::rate_limiter::RateLimiterConfig;
16 : use proxy::redis::notifications;
17 : use proxy::redis::publisher::RedisPublisherClient;
18 : use proxy::serverless::GlobalConnPoolOptions;
19 : use proxy::usage_metrics;
20 :
21 : use anyhow::bail;
22 : use proxy::config::{self, ProxyConfig};
23 : use proxy::serverless;
24 : use std::net::SocketAddr;
25 : use std::pin::pin;
26 : use std::sync::Arc;
27 : use tokio::net::TcpListener;
28 : use tokio::sync::Mutex;
29 : use tokio::task::JoinSet;
30 : use tokio_util::sync::CancellationToken;
31 : use tracing::info;
32 : use tracing::warn;
33 : use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
34 :
35 : project_git_version!(GIT_VERSION);
36 : project_build_tag!(BUILD_TAG);
37 :
38 : use clap::{Parser, ValueEnum};
39 :
40 : #[global_allocator]
41 4864 : static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
42 :
43 247 : #[derive(Clone, Debug, ValueEnum)]
44 : enum AuthBackend {
45 : Console,
46 : #[cfg(feature = "testing")]
47 : Postgres,
48 : Link,
49 : }
50 :
51 : /// Neon proxy/router
52 54 : #[derive(Parser)]
53 : #[command(version = GIT_VERSION, about)]
54 : struct ProxyCliArgs {
55 : /// Name of the region this proxy is deployed in
56 27 : #[clap(long, default_value_t = String::new())]
57 0 : region: String,
58 : /// listen for incoming client connections on ip:port
59 : #[clap(short, long, default_value = "127.0.0.1:4432")]
60 0 : proxy: String,
61 27 : #[clap(value_enum, long, default_value_t = AuthBackend::Link)]
62 0 : auth_backend: AuthBackend,
63 : /// listen for management callback connection on ip:port
64 : #[clap(short, long, default_value = "127.0.0.1:7000")]
65 0 : mgmt: String,
66 : /// listen for incoming http connections (metrics, etc) on ip:port
67 : #[clap(long, default_value = "127.0.0.1:7001")]
68 0 : http: String,
69 : /// listen for incoming wss connections on ip:port
70 : #[clap(long)]
71 : wss: Option<String>,
72 : /// redirect unauthenticated users to the given uri in case of link auth
73 : #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
74 0 : uri: String,
75 : /// cloud API endpoint for authenticating users
76 : #[clap(
77 : short,
78 : long,
79 : default_value = "http://localhost:3000/authenticate_proxy_request/"
80 : )]
81 0 : auth_endpoint: String,
82 : /// path to TLS key for client postgres connections
83 : ///
84 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
85 : #[clap(short = 'k', long, alias = "ssl-key")]
86 : tls_key: Option<String>,
87 : /// path to TLS cert for client postgres connections
88 : ///
89 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
90 : #[clap(short = 'c', long, alias = "ssl-cert")]
91 : tls_cert: Option<String>,
92 : /// path to directory with TLS certificates for client postgres connections
93 : #[clap(long)]
94 : certs_dir: Option<String>,
95 : /// timeout for the TLS handshake
96 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
97 0 : handshake_timeout: tokio::time::Duration,
98 : /// http endpoint to receive periodic metric updates
99 : #[clap(long)]
100 : metric_collection_endpoint: Option<String>,
101 : /// how often metrics should be sent to a collection endpoint
102 : #[clap(long)]
103 : metric_collection_interval: Option<String>,
104 : /// cache for `wake_compute` api method (use `size=0` to disable)
105 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
106 0 : wake_compute_cache: String,
107 : /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
108 : #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
109 0 : wake_compute_lock: String,
110 : /// Allow self-signed certificates for compute nodes (for testing)
111 27 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
112 0 : allow_self_signed_compute: bool,
113 : #[clap(flatten)]
114 : sql_over_http: SqlOverHttpArgs,
115 : /// timeout for scram authentication protocol
116 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
117 0 : scram_protocol_timeout: tokio::time::Duration,
118 : /// Require that all incoming requests have a Proxy Protocol V2 packet **and** have an IP address associated.
119 27 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
120 0 : require_client_ip: bool,
121 : /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
122 27 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
123 0 : disable_dynamic_rate_limiter: bool,
124 : /// Rate limit algorithm. Makes sense only if `disable_rate_limiter` is `false`.
125 27 : #[clap(value_enum, long, default_value_t = proxy::rate_limiter::RateLimitAlgorithm::Aimd)]
126 0 : rate_limit_algorithm: proxy::rate_limiter::RateLimitAlgorithm,
127 : /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
128 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
129 0 : rate_limiter_timeout: tokio::time::Duration,
130 : /// Endpoint rate limiter max number of requests per second.
131 : ///
132 : /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
133 : /// Can be given multiple times for different bucket sizes.
134 135 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
135 27 : endpoint_rps_limit: Vec<RateBucketInfo>,
136 : /// Redis rate limiter max number of requests per second.
137 135 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
138 27 : redis_rps_limit: Vec<RateBucketInfo>,
139 : /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
140 27 : #[clap(long, default_value_t = 100)]
141 0 : initial_limit: usize,
142 : #[clap(flatten)]
143 : aimd_config: proxy::rate_limiter::AimdConfig,
144 : /// cache for `allowed_ips` (use `size=0` to disable)
145 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
146 0 : allowed_ips_cache: String,
147 : /// cache for `role_secret` (use `size=0` to disable)
148 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
149 0 : role_secret_cache: String,
150 : /// disable ip check for http requests. If it is too time consuming, it could be turned off.
151 27 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
152 0 : disable_ip_check_for_http: bool,
153 : /// redis url for notifications.
154 : #[clap(long)]
155 : redis_notifications: Option<String>,
156 : /// cache for `project_info` (use `size=0` to disable)
157 : #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
158 0 : project_info_cache: String,
159 :
160 : #[clap(flatten)]
161 : parquet_upload: ParquetUploadArgs,
162 : }
163 :
164 54 : #[derive(clap::Args, Clone, Copy, Debug)]
165 : struct SqlOverHttpArgs {
166 : /// timeout for http connection requests
167 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
168 0 : sql_over_http_timeout: tokio::time::Duration,
169 :
170 : /// Whether the SQL over http pool is opt-in
171 27 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
172 0 : sql_over_http_pool_opt_in: bool,
173 :
174 : /// How many connections to pool for each endpoint. Excess connections are discarded
175 27 : #[clap(long, default_value_t = 20)]
176 0 : sql_over_http_pool_max_conns_per_endpoint: usize,
177 :
178 : /// How many connections to pool for each endpoint. Excess connections are discarded
179 27 : #[clap(long, default_value_t = 20000)]
180 0 : sql_over_http_pool_max_total_conns: usize,
181 :
182 : /// How long pooled connections should remain idle for before closing
183 : #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
184 0 : sql_over_http_idle_timeout: tokio::time::Duration,
185 :
186 : /// Duration each shard will wait on average before a GC sweep.
187 : /// A longer time will causes sweeps to take longer but will interfere less frequently.
188 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
189 0 : sql_over_http_pool_gc_epoch: tokio::time::Duration,
190 :
191 : /// How many shards should the global pool have. Must be a power of two.
192 : /// More shards will introduce less contention for pool operations, but can
193 : /// increase memory used by the pool
194 27 : #[clap(long, default_value_t = 128)]
195 0 : sql_over_http_pool_shards: usize,
196 : }
197 :
198 : #[tokio::main]
199 25 : async fn main() -> anyhow::Result<()> {
200 25 : let _logging_guard = proxy::logging::init().await?;
201 25 : let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
202 25 : let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
203 25 :
204 25 : info!("Version: {GIT_VERSION}");
205 25 : info!("Build_tag: {BUILD_TAG}");
206 25 : ::metrics::set_build_info_metric(GIT_VERSION, BUILD_TAG);
207 25 :
208 25 : match proxy::jemalloc::MetricRecorder::new(prometheus::default_registry()) {
209 25 : Ok(t) => {
210 25 : t.start();
211 25 : }
212 25 : Err(e) => tracing::error!(error = ?e, "could not start jemalloc metrics loop"),
213 25 : }
214 25 :
215 25 : let args = ProxyCliArgs::parse();
216 25 : let config = build_config(&args)?;
217 25 :
218 25 : info!("Authentication backend: {}", config.auth_backend);
219 25 :
220 25 : // Check that we can bind to address before further initialization
221 25 : let http_address: SocketAddr = args.http.parse()?;
222 25 : info!("Starting http on {http_address}");
223 25 : let http_listener = TcpListener::bind(http_address).await?.into_std()?;
224 25 :
225 25 : let mgmt_address: SocketAddr = args.mgmt.parse()?;
226 25 : info!("Starting mgmt on {mgmt_address}");
227 25 : let mgmt_listener = TcpListener::bind(mgmt_address).await?;
228 25 :
229 25 : let proxy_address: SocketAddr = args.proxy.parse()?;
230 25 : info!("Starting proxy on {proxy_address}");
231 25 : let proxy_listener = TcpListener::bind(proxy_address).await?;
232 25 : let cancellation_token = CancellationToken::new();
233 25 :
234 25 : let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
235 25 : let cancel_map = CancelMap::default();
236 25 : let redis_publisher = match &args.redis_notifications {
237 25 : Some(url) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
238 0 : url,
239 0 : args.region.clone(),
240 0 : &config.redis_rps_limit,
241 0 : )?))),
242 25 : None => None,
243 25 : };
244 25 : let cancellation_handler = Arc::new(CancellationHandler::new(
245 25 : cancel_map.clone(),
246 25 : redis_publisher,
247 25 : ));
248 25 :
249 25 : // client facing tasks. these will exit on error or on cancellation
250 25 : // cancellation returns Ok(())
251 25 : let mut client_tasks = JoinSet::new();
252 25 : client_tasks.spawn(proxy::proxy::task_main(
253 25 : config,
254 25 : proxy_listener,
255 25 : cancellation_token.clone(),
256 25 : endpoint_rate_limiter.clone(),
257 25 : cancellation_handler.clone(),
258 25 : ));
259 25 :
260 25 : // TODO: rename the argument to something like serverless.
261 25 : // It now covers more than just websockets, it also covers SQL over HTTP.
262 25 : if let Some(serverless_address) = args.wss {
263 25 : let serverless_address: SocketAddr = serverless_address.parse()?;
264 25 : info!("Starting wss on {serverless_address}");
265 25 : let serverless_listener = TcpListener::bind(serverless_address).await?;
266 25 :
267 25 : client_tasks.spawn(serverless::task_main(
268 25 : config,
269 25 : serverless_listener,
270 25 : cancellation_token.clone(),
271 25 : endpoint_rate_limiter.clone(),
272 25 : cancellation_handler.clone(),
273 25 : ));
274 25 : }
275 25 :
276 25 : client_tasks.spawn(proxy::context::parquet::worker(
277 25 : cancellation_token.clone(),
278 25 : args.parquet_upload,
279 25 : ));
280 25 :
281 25 : // maintenance tasks. these never return unless there's an error
282 25 : let mut maintenance_tasks = JoinSet::new();
283 25 : maintenance_tasks.spawn(proxy::handle_signals(cancellation_token));
284 25 : maintenance_tasks.spawn(http::health_server::task_main(http_listener));
285 25 : maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
286 25 :
287 25 : if let Some(metrics_config) = &config.metric_collection {
288 1 : maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
289 24 : }
290 25 :
291 25 : if let auth::BackendType::Console(api, _) = &config.auth_backend {
292 25 : if let proxy::console::provider::ConsoleBackend::Console(api) = &**api {
293 25 : let cache = api.caches.project_info.clone();
294 25 : if let Some(url) = args.redis_notifications {
295 25 : info!("Starting redis notifications listener ({url})");
296 25 : maintenance_tasks.spawn(notifications::task_main(
297 0 : url.to_owned(),
298 0 : cache.clone(),
299 0 : cancel_map.clone(),
300 0 : args.region.clone(),
301 0 : ));
302 25 : }
303 25 : maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
304 25 : }
305 25 : }
306 25 :
307 25 : let maintenance = loop {
308 25 : // get one complete task
309 100 : match futures::future::select(
310 100 : pin!(maintenance_tasks.join_next()),
311 100 : pin!(client_tasks.join_next()),
312 100 : )
313 63 : .await
314 25 : {
315 25 : // exit immediately on maintenance task completion
316 25 : Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
317 25 : // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
318 25 : Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
319 25 : // exit immediately on client task error
320 75 : Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
321 25 : // exit if all our client tasks have shutdown gracefully
322 25 : Either::Right((None, _)) => return Ok(()),
323 25 : }
324 25 : };
325 25 :
326 25 : // maintenance tasks return Infallible success values, this is an impossible value
327 25 : // so this match statically ensures that there are no possibilities for that value
328 25 : match maintenance {}
329 25 : }
330 :
331 : /// ProxyConfig is created at proxy startup, and lives forever.
332 25 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
333 25 : let tls_config = match (&args.tls_key, &args.tls_cert) {
334 25 : (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
335 25 : key_path,
336 25 : cert_path,
337 25 : args.certs_dir.as_ref(),
338 25 : )?),
339 0 : (None, None) => None,
340 0 : _ => bail!("either both or neither tls-key and tls-cert must be specified"),
341 : };
342 :
343 25 : if args.allow_self_signed_compute {
344 3 : warn!("allowing self-signed compute certificates");
345 22 : }
346 :
347 25 : let metric_collection = match (
348 25 : &args.metric_collection_endpoint,
349 25 : &args.metric_collection_interval,
350 : ) {
351 1 : (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
352 1 : endpoint: endpoint.parse()?,
353 1 : interval: humantime::parse_duration(interval)?,
354 : }),
355 24 : (None, None) => None,
356 0 : _ => bail!(
357 0 : "either both or neither metric-collection-endpoint \
358 0 : and metric-collection-interval must be specified"
359 0 : ),
360 : };
361 25 : let rate_limiter_config = RateLimiterConfig {
362 25 : disable: args.disable_dynamic_rate_limiter,
363 25 : algorithm: args.rate_limit_algorithm,
364 25 : timeout: args.rate_limiter_timeout,
365 25 : initial_limit: args.initial_limit,
366 25 : aimd_config: Some(args.aimd_config),
367 25 : };
368 :
369 25 : let auth_backend = match &args.auth_backend {
370 : AuthBackend::Console => {
371 1 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
372 1 : let project_info_cache_config: ProjectInfoCacheOptions =
373 1 : args.project_info_cache.parse()?;
374 :
375 1 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
376 1 : info!(
377 1 : "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
378 1 : );
379 1 : let caches = Box::leak(Box::new(console::caches::ApiCaches::new(
380 1 : wake_compute_cache_config,
381 1 : project_info_cache_config,
382 1 : )));
383 :
384 : let config::WakeComputeLockOptions {
385 1 : shards,
386 1 : permits,
387 1 : epoch,
388 1 : timeout,
389 1 : } = args.wake_compute_lock.parse()?;
390 1 : info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
391 1 : let locks = Box::leak(Box::new(
392 1 : console::locks::ApiLocks::new("wake_compute_lock", permits, shards, timeout)
393 1 : .unwrap(),
394 1 : ));
395 1 : tokio::spawn(locks.garbage_collect_worker(epoch));
396 :
397 1 : let url = args.auth_endpoint.parse()?;
398 1 : let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));
399 1 :
400 1 : let api = console::provider::neon::Api::new(endpoint, caches, locks);
401 1 : let api = console::provider::ConsoleBackend::Console(api);
402 1 : auth::BackendType::Console(MaybeOwned::Owned(api), ())
403 : }
404 : #[cfg(feature = "testing")]
405 : AuthBackend::Postgres => {
406 21 : let url = args.auth_endpoint.parse()?;
407 21 : let api = console::provider::mock::Api::new(url);
408 21 : let api = console::provider::ConsoleBackend::Postgres(api);
409 21 : auth::BackendType::Console(MaybeOwned::Owned(api), ())
410 : }
411 : AuthBackend::Link => {
412 3 : let url = args.uri.parse()?;
413 3 : auth::BackendType::Link(MaybeOwned::Owned(url), ())
414 : }
415 : };
416 25 : let http_config = HttpConfig {
417 25 : request_timeout: args.sql_over_http.sql_over_http_timeout,
418 25 : pool_options: GlobalConnPoolOptions {
419 25 : max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
420 25 : gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
421 25 : pool_shards: args.sql_over_http.sql_over_http_pool_shards,
422 25 : idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
423 25 : opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
424 25 : max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
425 25 : },
426 25 : };
427 25 : let authentication_config = AuthenticationConfig {
428 25 : scram_protocol_timeout: args.scram_protocol_timeout,
429 25 : };
430 25 :
431 25 : let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
432 25 : RateBucketInfo::validate(&mut endpoint_rps_limit)?;
433 25 : let mut redis_rps_limit = args.redis_rps_limit.clone();
434 25 : RateBucketInfo::validate(&mut redis_rps_limit)?;
435 :
436 25 : let config = Box::leak(Box::new(ProxyConfig {
437 25 : tls_config,
438 25 : auth_backend,
439 25 : metric_collection,
440 25 : allow_self_signed_compute: args.allow_self_signed_compute,
441 25 : http_config,
442 25 : authentication_config,
443 25 : require_client_ip: args.require_client_ip,
444 25 : disable_ip_check_for_http: args.disable_ip_check_for_http,
445 25 : endpoint_rps_limit,
446 25 : redis_rps_limit,
447 25 : handshake_timeout: args.handshake_timeout,
448 25 : // TODO: add this argument
449 25 : region: args.region.clone(),
450 25 : }));
451 25 :
452 25 : Ok(config)
453 25 : }
454 :
455 : #[cfg(test)]
456 : mod tests {
457 : use std::time::Duration;
458 :
459 : use clap::Parser;
460 : use proxy::rate_limiter::RateBucketInfo;
461 :
462 2 : #[test]
463 2 : fn parse_endpoint_rps_limit() {
464 2 : let config = super::ProxyCliArgs::parse_from([
465 2 : "proxy",
466 2 : "--endpoint-rps-limit",
467 2 : "100@1s",
468 2 : "--endpoint-rps-limit",
469 2 : "20@30s",
470 2 : ]);
471 2 :
472 2 : assert_eq!(
473 2 : config.endpoint_rps_limit,
474 2 : vec![
475 2 : RateBucketInfo::new(100, Duration::from_secs(1)),
476 2 : RateBucketInfo::new(20, Duration::from_secs(30)),
477 2 : ]
478 2 : );
479 2 : }
480 : }
|