TLA Line data Source code
1 : use futures::future::Either;
2 : use proxy::auth;
3 : use proxy::config::AuthenticationConfig;
4 : use proxy::config::CacheOptions;
5 : use proxy::config::HttpConfig;
6 : use proxy::console;
7 : use proxy::console::provider::AllowedIpsCache;
8 : use proxy::console::provider::NodeInfoCache;
9 : use proxy::console::provider::RoleSecretCache;
10 : use proxy::context::parquet::ParquetUploadArgs;
11 : use proxy::http;
12 : use proxy::rate_limiter::EndpointRateLimiter;
13 : use proxy::rate_limiter::RateBucketInfo;
14 : use proxy::rate_limiter::RateLimiterConfig;
15 : use proxy::serverless::GlobalConnPoolOptions;
16 : use proxy::usage_metrics;
17 :
18 : use anyhow::bail;
19 : use proxy::config::{self, ProxyConfig};
20 : use proxy::serverless;
21 : use std::pin::pin;
22 : use std::sync::Arc;
23 : use std::{borrow::Cow, net::SocketAddr};
24 : use tokio::net::TcpListener;
25 : use tokio::task::JoinSet;
26 : use tokio_util::sync::CancellationToken;
27 : use tracing::info;
28 : use tracing::warn;
29 : use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
30 :
31 : project_git_version!(GIT_VERSION);
32 : project_build_tag!(BUILD_TAG);
33 :
34 : use clap::{Parser, ValueEnum};
35 :
36 CBC 210 : #[derive(Clone, Debug, ValueEnum)]
37 : enum AuthBackend {
38 : Console,
39 : #[cfg(feature = "testing")]
40 : Postgres,
41 : Link,
42 : }
43 :
44 : /// Neon proxy/router
45 23 : #[derive(Parser)]
46 : #[command(version = GIT_VERSION, about)]
47 : struct ProxyCliArgs {
48 : /// Name of the region this proxy is deployed in
49 23 : #[clap(long, default_value_t = String::new())]
50 UBC 0 : region: String,
51 : /// listen for incoming client connections on ip:port
52 : #[clap(short, long, default_value = "127.0.0.1:4432")]
53 0 : proxy: String,
54 CBC 23 : #[clap(value_enum, long, default_value_t = AuthBackend::Link)]
55 UBC 0 : auth_backend: AuthBackend,
56 : /// listen for management callback connection on ip:port
57 : #[clap(short, long, default_value = "127.0.0.1:7000")]
58 0 : mgmt: String,
59 : /// listen for incoming http connections (metrics, etc) on ip:port
60 : #[clap(long, default_value = "127.0.0.1:7001")]
61 0 : http: String,
62 : /// listen for incoming wss connections on ip:port
63 : #[clap(long)]
64 : wss: Option<String>,
65 : /// redirect unauthenticated users to the given uri in case of link auth
66 : #[clap(short, long, default_value = "http://localhost:3000/psql_session/")]
67 0 : uri: String,
68 : /// cloud API endpoint for authenticating users
69 : #[clap(
70 : short,
71 : long,
72 : default_value = "http://localhost:3000/authenticate_proxy_request/"
73 : )]
74 0 : auth_endpoint: String,
75 : /// path to TLS key for client postgres connections
76 : ///
77 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
78 : #[clap(short = 'k', long, alias = "ssl-key")]
79 : tls_key: Option<String>,
80 : /// path to TLS cert for client postgres connections
81 : ///
82 : /// tls-key and tls-cert are for backwards compatibility, we can put all certs in one dir
83 : #[clap(short = 'c', long, alias = "ssl-cert")]
84 : tls_cert: Option<String>,
85 : /// path to directory with TLS certificates for client postgres connections
86 : #[clap(long)]
87 : certs_dir: Option<String>,
88 : /// http endpoint to receive periodic metric updates
89 : #[clap(long)]
90 : metric_collection_endpoint: Option<String>,
91 : /// how often metrics should be sent to a collection endpoint
92 : #[clap(long)]
93 : metric_collection_interval: Option<String>,
94 : /// cache for `wake_compute` api method (use `size=0` to disable)
95 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
96 0 : wake_compute_cache: String,
97 : /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
98 : #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
99 0 : wake_compute_lock: String,
100 : /// Allow self-signed certificates for compute nodes (for testing)
101 CBC 23 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
102 UBC 0 : allow_self_signed_compute: bool,
103 : #[clap(flatten)]
104 : sql_over_http: SqlOverHttpArgs,
105 : /// timeout for scram authentication protocol
106 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
107 0 : scram_protocol_timeout: tokio::time::Duration,
108 : /// Require that all incoming requests have a Proxy Protocol V2 packet **and** have an IP address associated.
109 CBC 23 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
110 UBC 0 : require_client_ip: bool,
111 : /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
112 CBC 23 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
113 UBC 0 : disable_dynamic_rate_limiter: bool,
114 : /// Rate limit algorithm. Makes sense only if `disable_rate_limiter` is `false`.
115 CBC 23 : #[clap(value_enum, long, default_value_t = proxy::rate_limiter::RateLimitAlgorithm::Aimd)]
116 UBC 0 : rate_limit_algorithm: proxy::rate_limiter::RateLimitAlgorithm,
117 : /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
118 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
119 0 : rate_limiter_timeout: tokio::time::Duration,
120 : /// Endpoint rate limiter max number of requests per second.
121 : ///
122 : /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
123 : /// Can be given multiple times for different bucket sizes.
124 CBC 115 : #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
125 23 : endpoint_rps_limit: Vec<RateBucketInfo>,
126 : /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
127 23 : #[clap(long, default_value_t = 100)]
128 UBC 0 : initial_limit: usize,
129 : #[clap(flatten)]
130 : aimd_config: proxy::rate_limiter::AimdConfig,
131 : /// cache for `allowed_ips` (use `size=0` to disable)
132 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
133 0 : allowed_ips_cache: String,
134 : /// cache for `role_secret` (use `size=0` to disable)
135 : #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
136 0 : role_secret_cache: String,
137 : /// disable ip check for http requests. If it is too time consuming, it could be turned off.
138 CBC 23 : #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
139 UBC 0 : disable_ip_check_for_http: bool,
140 :
141 : #[clap(flatten)]
142 : parquet_upload: ParquetUploadArgs,
143 : }
144 :
145 CBC 23 : #[derive(clap::Args, Clone, Copy, Debug)]
146 : struct SqlOverHttpArgs {
147 : /// timeout for http connection requests
148 : #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
149 UBC 0 : sql_over_http_timeout: tokio::time::Duration,
150 :
151 : /// Whether the SQL over http pool is opt-in
152 CBC 23 : #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
153 UBC 0 : sql_over_http_pool_opt_in: bool,
154 :
155 : /// How many connections to pool for each endpoint. Excess connections are discarded
156 CBC 23 : #[clap(long, default_value_t = 20)]
157 UBC 0 : sql_over_http_pool_max_conns_per_endpoint: usize,
158 :
159 : /// How long pooled connections should remain idle for before closing
160 : #[clap(long, default_value = "5m", value_parser = humantime::parse_duration)]
161 0 : sql_over_http_idle_timeout: tokio::time::Duration,
162 :
163 : /// Duration each shard will wait on average before a GC sweep.
164 : /// A longer time will causes sweeps to take longer but will interfere less frequently.
165 : #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
166 0 : sql_over_http_pool_gc_epoch: tokio::time::Duration,
167 :
168 : /// How many shards should the global pool have. Must be a power of two.
169 : /// More shards will introduce less contention for pool operations, but can
170 : /// increase memory used by the pool
171 CBC 23 : #[clap(long, default_value_t = 128)]
172 UBC 0 : sql_over_http_pool_shards: usize,
173 : }
174 :
175 : #[tokio::main]
176 CBC 22 : async fn main() -> anyhow::Result<()> {
177 22 : let _logging_guard = proxy::logging::init().await?;
178 22 : let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
179 22 : let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
180 22 :
181 22 : info!("Version: {GIT_VERSION}");
182 22 : info!("Build_tag: {BUILD_TAG}");
183 22 : ::metrics::set_build_info_metric(GIT_VERSION, BUILD_TAG);
184 22 :
185 22 : let args = ProxyCliArgs::parse();
186 22 : let config = build_config(&args)?;
187 :
188 22 : info!("Authentication backend: {}", config.auth_backend);
189 :
190 : // Check that we can bind to address before further initialization
191 22 : let http_address: SocketAddr = args.http.parse()?;
192 22 : info!("Starting http on {http_address}");
193 22 : let http_listener = TcpListener::bind(http_address).await?.into_std()?;
194 :
195 22 : let mgmt_address: SocketAddr = args.mgmt.parse()?;
196 22 : info!("Starting mgmt on {mgmt_address}");
197 22 : let mgmt_listener = TcpListener::bind(mgmt_address).await?;
198 :
199 22 : let proxy_address: SocketAddr = args.proxy.parse()?;
200 22 : info!("Starting proxy on {proxy_address}");
201 22 : let proxy_listener = TcpListener::bind(proxy_address).await?;
202 22 : let cancellation_token = CancellationToken::new();
203 22 :
204 22 : let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
205 22 :
206 22 : // client facing tasks. these will exit on error or on cancellation
207 22 : // cancellation returns Ok(())
208 22 : let mut client_tasks = JoinSet::new();
209 22 : client_tasks.spawn(proxy::proxy::task_main(
210 22 : config,
211 22 : proxy_listener,
212 22 : cancellation_token.clone(),
213 22 : endpoint_rate_limiter.clone(),
214 22 : ));
215 :
216 : // TODO: rename the argument to something like serverless.
217 : // It now covers more than just websockets, it also covers SQL over HTTP.
218 22 : if let Some(serverless_address) = args.wss {
219 22 : let serverless_address: SocketAddr = serverless_address.parse()?;
220 22 : info!("Starting wss on {serverless_address}");
221 22 : let serverless_listener = TcpListener::bind(serverless_address).await?;
222 :
223 22 : client_tasks.spawn(serverless::task_main(
224 22 : config,
225 22 : serverless_listener,
226 22 : cancellation_token.clone(),
227 22 : endpoint_rate_limiter.clone(),
228 22 : ));
229 UBC 0 : }
230 :
231 CBC 22 : client_tasks.spawn(proxy::context::parquet::worker(
232 22 : cancellation_token.clone(),
233 22 : args.parquet_upload,
234 22 : ));
235 22 :
236 22 : // maintenance tasks. these never return unless there's an error
237 22 : let mut maintenance_tasks = JoinSet::new();
238 22 : maintenance_tasks.spawn(proxy::handle_signals(cancellation_token));
239 22 : maintenance_tasks.spawn(http::health_server::task_main(http_listener));
240 22 : maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener));
241 :
242 22 : if let Some(metrics_config) = &config.metric_collection {
243 1 : maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
244 21 : }
245 :
246 : let maintenance = loop {
247 : // get one complete task
248 88 : match futures::future::select(
249 88 : pin!(maintenance_tasks.join_next()),
250 88 : pin!(client_tasks.join_next()),
251 88 : )
252 51 : .await
253 : {
254 : // exit immediately on maintenance task completion
255 UBC 0 : Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
256 : // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
257 0 : Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
258 : // exit immediately on client task error
259 CBC 66 : Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
260 : // exit if all our client tasks have shutdown gracefully
261 22 : Either::Right((None, _)) => return Ok(()),
262 : }
263 : };
264 :
265 : // maintenance tasks return Infallible success values, this is an impossible value
266 : // so this match statically ensures that there are no possibilities for that value
267 : match maintenance {}
268 : }
269 :
270 : /// ProxyConfig is created at proxy startup, and lives forever.
271 22 : fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
272 22 : let tls_config = match (&args.tls_key, &args.tls_cert) {
273 22 : (Some(key_path), Some(cert_path)) => Some(config::configure_tls(
274 22 : key_path,
275 22 : cert_path,
276 22 : args.certs_dir.as_ref(),
277 22 : )?),
278 UBC 0 : (None, None) => None,
279 0 : _ => bail!("either both or neither tls-key and tls-cert must be specified"),
280 : };
281 :
282 CBC 22 : if args.allow_self_signed_compute {
283 3 : warn!("allowing self-signed compute certificates");
284 19 : }
285 :
286 22 : let metric_collection = match (
287 22 : &args.metric_collection_endpoint,
288 22 : &args.metric_collection_interval,
289 : ) {
290 1 : (Some(endpoint), Some(interval)) => Some(config::MetricCollectionConfig {
291 1 : endpoint: endpoint.parse()?,
292 1 : interval: humantime::parse_duration(interval)?,
293 : }),
294 21 : (None, None) => None,
295 UBC 0 : _ => bail!(
296 0 : "either both or neither metric-collection-endpoint \
297 0 : and metric-collection-interval must be specified"
298 0 : ),
299 : };
300 CBC 22 : let rate_limiter_config = RateLimiterConfig {
301 22 : disable: args.disable_dynamic_rate_limiter,
302 22 : algorithm: args.rate_limit_algorithm,
303 22 : timeout: args.rate_limiter_timeout,
304 22 : initial_limit: args.initial_limit,
305 22 : aimd_config: Some(args.aimd_config),
306 22 : };
307 :
308 22 : let auth_backend = match &args.auth_backend {
309 : AuthBackend::Console => {
310 1 : let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
311 1 : let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?;
312 1 : let role_secret_cache_config: CacheOptions = args.role_secret_cache.parse()?;
313 :
314 1 : info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
315 1 : info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}");
316 1 : info!("Using RoleSecretCache (wake_compute) with options={role_secret_cache_config:?}");
317 1 : let caches = Box::leak(Box::new(console::caches::ApiCaches {
318 1 : node_info: NodeInfoCache::new(
319 1 : "node_info_cache",
320 1 : wake_compute_cache_config.size,
321 1 : wake_compute_cache_config.ttl,
322 1 : true,
323 1 : ),
324 1 : allowed_ips: AllowedIpsCache::new(
325 1 : "allowed_ips_cache",
326 1 : allowed_ips_cache_config.size,
327 1 : allowed_ips_cache_config.ttl,
328 1 : false,
329 1 : ),
330 1 : role_secret: RoleSecretCache::new(
331 1 : "role_secret_cache",
332 1 : role_secret_cache_config.size,
333 1 : role_secret_cache_config.ttl,
334 1 : false,
335 1 : ),
336 1 : }));
337 :
338 : let config::WakeComputeLockOptions {
339 1 : shards,
340 1 : permits,
341 1 : epoch,
342 1 : timeout,
343 1 : } = args.wake_compute_lock.parse()?;
344 1 : info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
345 1 : let locks = Box::leak(Box::new(
346 1 : console::locks::ApiLocks::new("wake_compute_lock", permits, shards, timeout)
347 1 : .unwrap(),
348 1 : ));
349 1 : tokio::spawn(locks.garbage_collect_worker(epoch));
350 :
351 1 : let url = args.auth_endpoint.parse()?;
352 1 : let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));
353 1 :
354 1 : let api = console::provider::neon::Api::new(endpoint, caches, locks);
355 1 : auth::BackendType::Console(Cow::Owned(api), ())
356 : }
357 : #[cfg(feature = "testing")]
358 : AuthBackend::Postgres => {
359 18 : let url = args.auth_endpoint.parse()?;
360 18 : let api = console::provider::mock::Api::new(url);
361 18 : auth::BackendType::Postgres(Cow::Owned(api), ())
362 : }
363 : AuthBackend::Link => {
364 3 : let url = args.uri.parse()?;
365 3 : auth::BackendType::Link(Cow::Owned(url))
366 : }
367 : };
368 22 : let http_config = HttpConfig {
369 22 : request_timeout: args.sql_over_http.sql_over_http_timeout,
370 22 : pool_options: GlobalConnPoolOptions {
371 22 : max_conns_per_endpoint: args.sql_over_http.sql_over_http_pool_max_conns_per_endpoint,
372 22 : gc_epoch: args.sql_over_http.sql_over_http_pool_gc_epoch,
373 22 : pool_shards: args.sql_over_http.sql_over_http_pool_shards,
374 22 : idle_timeout: args.sql_over_http.sql_over_http_idle_timeout,
375 22 : opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
376 22 : },
377 22 : };
378 22 : let authentication_config = AuthenticationConfig {
379 22 : scram_protocol_timeout: args.scram_protocol_timeout,
380 22 : };
381 22 :
382 22 : let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
383 22 : RateBucketInfo::validate(&mut endpoint_rps_limit)?;
384 :
385 22 : let config = Box::leak(Box::new(ProxyConfig {
386 22 : tls_config,
387 22 : auth_backend,
388 22 : metric_collection,
389 22 : allow_self_signed_compute: args.allow_self_signed_compute,
390 22 : http_config,
391 22 : authentication_config,
392 22 : require_client_ip: args.require_client_ip,
393 22 : disable_ip_check_for_http: args.disable_ip_check_for_http,
394 22 : endpoint_rps_limit,
395 22 : // TODO: add this argument
396 22 : region: args.region.clone(),
397 22 : }));
398 22 :
399 22 : Ok(config)
400 22 : }
401 :
402 : #[cfg(test)]
403 : mod tests {
404 : use std::time::Duration;
405 :
406 : use clap::Parser;
407 : use proxy::rate_limiter::RateBucketInfo;
408 :
409 1 : #[test]
410 1 : fn parse_endpoint_rps_limit() {
411 1 : let config = super::ProxyCliArgs::parse_from([
412 1 : "proxy",
413 1 : "--endpoint-rps-limit",
414 1 : "100@1s",
415 1 : "--endpoint-rps-limit",
416 1 : "20@30s",
417 1 : ]);
418 1 :
419 1 : assert_eq!(
420 1 : config.endpoint_rps_limit,
421 1 : vec![
422 1 : RateBucketInfo::new(100, Duration::from_secs(1)),
423 1 : RateBucketInfo::new(20, Duration::from_secs(30)),
424 1 : ]
425 1 : );
426 1 : }
427 : }
|