Line data Source code
1 : use std::collections::HashMap;
2 : use std::fmt::Write;
3 : use std::fs;
4 : use std::fs::File;
5 : use std::io::{BufRead, BufReader};
6 : use std::os::unix::fs::PermissionsExt;
7 : use std::path::Path;
8 : use std::process::Child;
9 : use std::str::FromStr;
10 : use std::time::{Duration, Instant};
11 :
12 : use anyhow::{Result, bail};
13 : use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
14 : use futures::StreamExt;
15 : use ini::Ini;
16 : use notify::{RecursiveMode, Watcher};
17 : use postgres::config::Config;
18 : use tokio::io::AsyncBufReadExt;
19 : use tokio::task::JoinHandle;
20 : use tokio::time::timeout;
21 : use tokio_postgres;
22 : use tokio_postgres::NoTls;
23 : use tracing::{debug, error, info, instrument};
24 :
25 : const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
26 :
27 : /// Escape a string for including it in a SQL literal.
28 : ///
29 : /// Wrapping the result with `E'{}'` or `'{}'` is not required,
30 : /// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
31 : /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
32 : /// for the original implementation.
33 6 : pub fn escape_literal(s: &str) -> String {
34 6 : let res = s.replace('\'', "''").replace('\\', "\\\\");
35 6 :
36 6 : if res.contains('\\') {
37 2 : format!("E'{}'", res)
38 : } else {
39 4 : format!("'{}'", res)
40 : }
41 6 : }
42 :
43 : /// Escape a string so that it can be used in postgresql.conf. Wrapping the result
44 : /// with `'{}'` is not required, as it returns a ready-to-use config string.
45 8 : pub fn escape_conf_value(s: &str) -> String {
46 8 : let res = s.replace('\'', "''").replace('\\', "\\\\");
47 8 : format!("'{}'", res)
48 8 : }
49 :
50 : pub trait GenericOptionExt {
51 : fn to_pg_option(&self) -> String;
52 : fn to_pg_setting(&self) -> String;
53 : }
54 :
55 : impl GenericOptionExt for GenericOption {
56 : /// Represent `GenericOption` as SQL statement parameter.
57 3 : fn to_pg_option(&self) -> String {
58 3 : if let Some(val) = &self.value {
59 3 : match self.vartype.as_ref() {
60 3 : "string" => format!("{} {}", self.name, escape_literal(val)),
61 1 : _ => format!("{} {}", self.name, val),
62 : }
63 : } else {
64 0 : self.name.to_owned()
65 : }
66 3 : }
67 :
68 : /// Represent `GenericOption` as configuration option.
69 23 : fn to_pg_setting(&self) -> String {
70 23 : if let Some(val) = &self.value {
71 23 : match self.vartype.as_ref() {
72 23 : "string" => format!("{} = {}", self.name, escape_conf_value(val)),
73 15 : _ => format!("{} = {}", self.name, val),
74 : }
75 : } else {
76 0 : self.name.to_owned()
77 : }
78 23 : }
79 : }
80 :
81 : pub trait PgOptionsSerialize {
82 : fn as_pg_options(&self) -> String;
83 : fn as_pg_settings(&self) -> String;
84 : }
85 :
86 : impl PgOptionsSerialize for GenericOptions {
87 : /// Serialize an optional collection of `GenericOption`'s to
88 : /// Postgres SQL statement arguments.
89 2 : fn as_pg_options(&self) -> String {
90 2 : if let Some(ops) = &self {
91 1 : ops.iter()
92 3 : .map(|op| op.to_pg_option())
93 1 : .collect::<Vec<String>>()
94 1 : .join(" ")
95 : } else {
96 1 : "".to_string()
97 : }
98 2 : }
99 :
100 : /// Serialize an optional collection of `GenericOption`'s to
101 : /// `postgresql.conf` compatible format.
102 1 : fn as_pg_settings(&self) -> String {
103 1 : if let Some(ops) = &self {
104 1 : ops.iter()
105 23 : .map(|op| op.to_pg_setting())
106 1 : .collect::<Vec<String>>()
107 1 : .join("\n")
108 1 : + "\n" // newline after last setting
109 : } else {
110 0 : "".to_string()
111 : }
112 1 : }
113 : }
114 :
115 : pub trait GenericOptionsSearch {
116 : fn find(&self, name: &str) -> Option<String>;
117 : fn find_ref(&self, name: &str) -> Option<&GenericOption>;
118 : }
119 :
120 : impl GenericOptionsSearch for GenericOptions {
121 : /// Lookup option by name
122 9 : fn find(&self, name: &str) -> Option<String> {
123 9 : let ops = self.as_ref()?;
124 6 : let op = ops.iter().find(|s| s.name == name)?;
125 2 : op.value.clone()
126 9 : }
127 :
128 : /// Lookup option by name, returning ref
129 0 : fn find_ref(&self, name: &str) -> Option<&GenericOption> {
130 0 : let ops = self.as_ref()?;
131 0 : ops.iter().find(|s| s.name == name)
132 0 : }
133 : }
134 :
135 : pub trait RoleExt {
136 : fn to_pg_options(&self) -> String;
137 : }
138 :
139 : impl RoleExt for Role {
140 : /// Serialize a list of role parameters into a Postgres-acceptable
141 : /// string of arguments.
142 1 : fn to_pg_options(&self) -> String {
143 1 : // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
144 1 : let mut params: String = self.options.as_pg_options();
145 1 : params.push_str(" LOGIN");
146 :
147 1 : if let Some(pass) = &self.encrypted_password {
148 : // Some time ago we supported only md5 and treated all encrypted_password as md5.
149 : // Now we also support SCRAM-SHA-256 and to preserve compatibility
150 : // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
151 1 : if pass.starts_with("SCRAM-SHA-256") {
152 0 : write!(params, " PASSWORD '{pass}'")
153 0 : .expect("String is documented to not to error during write operations");
154 1 : } else {
155 1 : write!(params, " PASSWORD 'md5{pass}'")
156 1 : .expect("String is documented to not to error during write operations");
157 1 : }
158 0 : } else {
159 0 : params.push_str(" PASSWORD NULL");
160 0 : }
161 :
162 1 : params
163 1 : }
164 : }
165 :
166 : pub trait DatabaseExt {
167 : fn to_pg_options(&self) -> String;
168 : }
169 :
170 : impl DatabaseExt for Database {
171 : /// Serialize a list of database parameters into a Postgres-acceptable
172 : /// string of arguments.
173 : /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
174 : /// to use `template0` and `template1`, so it is not a problem. Yet in the future
175 : /// it may require a proper quoting too.
176 1 : fn to_pg_options(&self) -> String {
177 1 : let mut params: String = self.options.as_pg_options();
178 1 : write!(params, " OWNER {}", &self.owner.pg_quote())
179 1 : .expect("String is documented to not to error during write operations");
180 1 :
181 1 : params
182 1 : }
183 : }
184 :
185 : /// Generic trait used to provide quoting / encoding for strings used in the
186 : /// Postgres SQL queries and DATABASE_URL.
187 : pub trait Escaping {
188 : fn pg_quote(&self) -> String;
189 : }
190 :
191 : impl Escaping for PgIdent {
192 : /// This is intended to mimic Postgres quote_ident(), but for simplicity it
193 : /// always quotes provided string with `""` and escapes every `"`.
194 : /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
195 2 : fn pg_quote(&self) -> String {
196 2 : let result = format!("\"{}\"", self.replace('"', "\"\""));
197 2 : result
198 2 : }
199 : }
200 :
201 : /// Build a list of existing Postgres roles
202 0 : pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
203 0 : let postgres_roles = client
204 0 : .query_raw::<str, &String, &[String; 0]>(
205 0 : "SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
206 0 : &[],
207 0 : )
208 0 : .await?
209 0 : .filter_map(|row| async { row.ok() })
210 0 : .map(|row| Role {
211 0 : name: row.get("rolname"),
212 0 : encrypted_password: row.get("rolpassword"),
213 0 : options: None,
214 0 : })
215 0 : .collect()
216 0 : .await;
217 :
218 0 : Ok(postgres_roles)
219 0 : }
220 :
221 : /// Build a list of existing Postgres databases
222 0 : pub async fn get_existing_dbs_async(
223 0 : client: &tokio_postgres::Client,
224 0 : ) -> Result<HashMap<String, Database>> {
225 : // `pg_database.datconnlimit = -2` means that the database is in the
226 : // invalid state. See:
227 : // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
228 0 : let rowstream = client
229 0 : .query_raw::<str, &String, &[String; 0]>(
230 0 : "SELECT
231 0 : datname AS name,
232 0 : datdba::regrole::text AS owner,
233 0 : NOT datallowconn AS restrict_conn,
234 0 : datconnlimit = - 2 AS invalid
235 0 : FROM
236 0 : pg_catalog.pg_database;",
237 0 : &[],
238 0 : )
239 0 : .await?;
240 :
241 0 : let dbs_map = rowstream
242 0 : .filter_map(|r| async { r.ok() })
243 0 : .map(|row| Database {
244 0 : name: row.get("name"),
245 0 : owner: row.get("owner"),
246 0 : restrict_conn: row.get("restrict_conn"),
247 0 : invalid: row.get("invalid"),
248 0 : options: None,
249 0 : })
250 0 : .map(|db| (db.name.clone(), db.clone()))
251 0 : .collect::<HashMap<_, _>>()
252 0 : .await;
253 :
254 0 : Ok(dbs_map)
255 0 : }
256 :
257 : /// Wait for Postgres to become ready to accept connections. It's ready to
258 : /// accept connections when the state-field in `pgdata/postmaster.pid` says
259 : /// 'ready'.
260 : #[instrument(skip_all, fields(pgdata = %pgdata.display()))]
261 : pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
262 : let pid_path = pgdata.join("postmaster.pid");
263 :
264 : // PostgreSQL writes line "ready" to the postmaster.pid file, when it has
265 : // completed initialization and is ready to accept connections. We want to
266 : // react quickly and perform the rest of our initialization as soon as
267 : // PostgreSQL starts accepting connections. Use 'notify' to be notified
268 : // whenever the PID file is changed, and whenever it changes, read it to
269 : // check if it's now "ready".
270 : //
271 : // You cannot actually watch a file before it exists, so we first watch the
272 : // data directory, and once the postmaster.pid file appears, we switch to
273 : // watch the file instead. We also wake up every 100 ms to poll, just in
274 : // case we miss some events for some reason. Not strictly necessary, but
275 : // better safe than sorry.
276 : let (tx, rx) = std::sync::mpsc::channel();
277 0 : let watcher_res = notify::recommended_watcher(move |res| {
278 0 : let _ = tx.send(res);
279 0 : });
280 : let (mut watcher, rx): (Box<dyn Watcher>, _) = match watcher_res {
281 : Ok(watcher) => (Box::new(watcher), rx),
282 : Err(e) => {
283 : match e.kind {
284 : notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
285 : // docker on m1 macs does not support recommended_watcher
286 : // but return "Function not implemented (os error 38)"
287 : // see https://github.com/notify-rs/notify/issues/423
288 : let (tx, rx) = std::sync::mpsc::channel();
289 :
290 : // let's poll it faster than what we check the results for (100ms)
291 : let config =
292 : notify::Config::default().with_poll_interval(Duration::from_millis(50));
293 :
294 : let watcher = notify::PollWatcher::new(
295 0 : move |res| {
296 0 : let _ = tx.send(res);
297 0 : },
298 : config,
299 : )?;
300 :
301 : (Box::new(watcher), rx)
302 : }
303 : _ => return Err(e.into()),
304 : }
305 : }
306 : };
307 :
308 : watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
309 :
310 : let started_at = Instant::now();
311 : let mut postmaster_pid_seen = false;
312 : loop {
313 : if let Ok(Some(status)) = pg.try_wait() {
314 : // Postgres exited, that is not what we expected, bail out earlier.
315 : let code = status.code().unwrap_or(-1);
316 : bail!("Postgres exited unexpectedly with code {}", code);
317 : }
318 :
319 : let res = rx.recv_timeout(Duration::from_millis(100));
320 : debug!("woken up by notify: {res:?}");
321 : // If there are multiple events in the channel already, we only need to be
322 : // check once. Swallow the extra events before we go ahead to check the
323 : // pid file.
324 : while let Ok(res) = rx.try_recv() {
325 : debug!("swallowing extra event: {res:?}");
326 : }
327 :
328 : // Check that we can open pid file first.
329 : if let Ok(file) = File::open(&pid_path) {
330 : if !postmaster_pid_seen {
331 : debug!("postmaster.pid appeared");
332 : watcher
333 : .unwatch(pgdata)
334 : .expect("Failed to remove pgdata dir watch");
335 : watcher
336 : .watch(&pid_path, RecursiveMode::NonRecursive)
337 : .expect("Failed to add postmaster.pid file watch");
338 : postmaster_pid_seen = true;
339 : }
340 :
341 : let file = BufReader::new(file);
342 : let last_line = file.lines().last();
343 :
344 : // Pid file could be there and we could read it, but it could be empty, for example.
345 : if let Some(Ok(line)) = last_line {
346 : let status = line.trim();
347 : debug!("last line of postmaster.pid: {status:?}");
348 :
349 : // Now Postgres is ready to accept connections
350 : if status == "ready" {
351 : break;
352 : }
353 : }
354 : }
355 :
356 : // Give up after POSTGRES_WAIT_TIMEOUT.
357 : let duration = started_at.elapsed();
358 : if duration >= POSTGRES_WAIT_TIMEOUT {
359 : bail!("timed out while waiting for Postgres to start");
360 : }
361 : }
362 :
363 : tracing::info!("PostgreSQL is now running, continuing to configure it");
364 :
365 : Ok(())
366 : }
367 :
368 : /// Remove `pgdata` directory and create it again with right permissions.
369 0 : pub fn create_pgdata(pgdata: &str) -> Result<()> {
370 0 : // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
371 0 : // If it is something different then create_dir() will error out anyway.
372 0 : let _ok = fs::remove_dir_all(pgdata);
373 0 : fs::create_dir(pgdata)?;
374 0 : fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
375 :
376 0 : Ok(())
377 0 : }
378 :
379 : /// Update pgbouncer.ini with provided options
380 0 : fn update_pgbouncer_ini(
381 0 : pgbouncer_config: HashMap<String, String>,
382 0 : pgbouncer_ini_path: &str,
383 0 : ) -> Result<()> {
384 0 : let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
385 0 : let section = conf.section_mut(Some("pgbouncer")).unwrap();
386 :
387 0 : for (option_name, value) in pgbouncer_config.iter() {
388 0 : section.insert(option_name, value);
389 0 : debug!(
390 0 : "Updating pgbouncer.ini with new values {}={}",
391 : option_name, value
392 : );
393 : }
394 :
395 0 : conf.write_to_file(pgbouncer_ini_path)?;
396 0 : Ok(())
397 0 : }
398 :
399 : /// Tune pgbouncer.
400 : /// 1. Apply new config using pgbouncer admin console
401 : /// 2. Add new values to pgbouncer.ini to preserve them after restart
402 0 : pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
403 0 : let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
404 : // for VMs use pgbouncer specific way to connect to
405 : // pgbouncer admin console without password
406 : // when pgbouncer is running under the same user.
407 0 : "host=/tmp port=6432 dbname=pgbouncer user=pgbouncer".to_string()
408 : } else {
409 : // for k8s use normal connection string with password
410 : // to connect to pgbouncer admin console
411 0 : let mut pgbouncer_connstr =
412 0 : "host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
413 0 : if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
414 0 : pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
415 0 : }
416 0 : pgbouncer_connstr
417 : };
418 :
419 0 : info!(
420 0 : "Connecting to pgbouncer with connection string: {}",
421 : pgbouncer_connstr
422 : );
423 :
424 : // connect to pgbouncer, retrying several times
425 : // because pgbouncer may not be ready yet
426 0 : let mut retries = 3;
427 0 : let client = loop {
428 0 : match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {
429 0 : Ok((client, connection)) => {
430 0 : tokio::spawn(async move {
431 0 : if let Err(e) = connection.await {
432 0 : eprintln!("connection error: {}", e);
433 0 : }
434 0 : });
435 0 : break client;
436 : }
437 0 : Err(e) => {
438 0 : if retries == 0 {
439 0 : return Err(e.into());
440 0 : }
441 0 : error!("Failed to connect to pgbouncer: pgbouncer_connstr {}", e);
442 0 : retries -= 1;
443 0 : tokio::time::sleep(Duration::from_secs(1)).await;
444 : }
445 : }
446 : };
447 :
448 : // Apply new config
449 0 : for (option_name, value) in pgbouncer_config.iter() {
450 0 : let query = format!("SET {}={}", option_name, value);
451 0 : // keep this log line for debugging purposes
452 0 : info!("Applying pgbouncer setting change: {}", query);
453 :
454 0 : if let Err(err) = client.simple_query(&query).await {
455 : // Don't fail on error, just print it into log
456 0 : error!(
457 0 : "Failed to apply pgbouncer setting change: {}, {}",
458 : query, err
459 : );
460 0 : };
461 : }
462 :
463 : // save values to pgbouncer.ini
464 : // so that they are preserved after pgbouncer restart
465 0 : let pgbouncer_ini_path = if std::env::var_os("AUTOSCALING").is_some() {
466 : // in VMs we use /etc/pgbouncer.ini
467 0 : "/etc/pgbouncer.ini".to_string()
468 : } else {
469 : // in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini
470 : // this is a shared volume between pgbouncer and postgres containers
471 : // FIXME: fix permissions for this file
472 0 : "/var/db/postgres/pgbouncer/pgbouncer.ini".to_string()
473 : };
474 0 : update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
475 :
476 0 : Ok(())
477 0 : }
478 :
479 : /// Spawn a task that will read Postgres logs from `stderr`, join multiline logs
480 : /// and send them to the logger. In the future we may also want to add context to
481 : /// these logs.
482 0 : pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<Result<()>> {
483 0 : tokio::spawn(async move {
484 0 : let stderr = tokio::process::ChildStderr::from_std(stderr)?;
485 0 : handle_postgres_logs_async(stderr).await
486 0 : })
487 0 : }
488 :
489 : /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
490 : /// - next line starts with timestamp
491 : /// - EOF
492 : /// - no new lines were written for the last 100 milliseconds
493 0 : async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
494 0 : let mut lines = tokio::io::BufReader::new(stderr).lines();
495 0 : let timeout_duration = Duration::from_millis(100);
496 0 : let ts_regex =
497 0 : regex::Regex::new(r"^\d+-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("regex is valid");
498 0 :
499 0 : let mut buf = vec![];
500 : loop {
501 0 : let next_line = timeout(timeout_duration, lines.next_line()).await;
502 :
503 : // we should flush lines from the buffer if we cannot continue reading multiline message
504 0 : let should_flush_buf = match next_line {
505 : // Flushing if new line starts with timestamp
506 0 : Ok(Ok(Some(ref line))) => ts_regex.is_match(line),
507 : // Flushing on EOF, timeout or error
508 0 : _ => true,
509 : };
510 :
511 0 : if !buf.is_empty() && should_flush_buf {
512 : // join multiline message into a single line, separated by unicode Zero Width Space.
513 : // "PG:" suffix is used to distinguish postgres logs from other logs.
514 0 : let combined = format!("PG:{}\n", buf.join("\u{200B}"));
515 0 : buf.clear();
516 :
517 : // sync write to stderr to avoid interleaving with other logs
518 : use std::io::Write;
519 0 : let res = std::io::stderr().lock().write_all(combined.as_bytes());
520 0 : if let Err(e) = res {
521 0 : tracing::error!("error while writing to stderr: {}", e);
522 0 : }
523 0 : }
524 :
525 : // if not timeout, append line to the buffer
526 0 : if next_line.is_ok() {
527 0 : match next_line?? {
528 0 : Some(line) => buf.push(line),
529 : // EOF
530 0 : None => break,
531 : };
532 0 : }
533 : }
534 :
535 0 : Ok(())
536 0 : }
537 :
538 : /// `Postgres::config::Config` handles database names with whitespaces
539 : /// and special characters properly.
540 0 : pub fn postgres_conf_for_db(connstr: &url::Url, dbname: &str) -> Result<Config> {
541 0 : let mut conf = Config::from_str(connstr.as_str())?;
542 0 : conf.dbname(dbname);
543 0 : Ok(conf)
544 0 : }
|