Line data Source code
1 : use std::collections::HashMap;
2 : use std::fmt::Write;
3 : use std::fs;
4 : use std::fs::File;
5 : use std::io::{BufRead, BufReader};
6 : use std::os::unix::fs::PermissionsExt;
7 : use std::path::Path;
8 : use std::process::Child;
9 : use std::str::FromStr;
10 : use std::time::{Duration, Instant};
11 :
12 : use anyhow::{bail, Result};
13 : use futures::StreamExt;
14 : use ini::Ini;
15 : use notify::{RecursiveMode, Watcher};
16 : use postgres::config::Config;
17 : use tokio::io::AsyncBufReadExt;
18 : use tokio::task::JoinHandle;
19 : use tokio::time::timeout;
20 : use tokio_postgres;
21 : use tokio_postgres::NoTls;
22 : use tracing::{debug, error, info, instrument};
23 :
24 : use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
25 :
26 : const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
27 :
28 : /// Escape a string for including it in a SQL literal.
29 : ///
30 : /// Wrapping the result with `E'{}'` or `'{}'` is not required,
31 : /// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
32 : /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
33 : /// for the original implementation.
34 6 : pub fn escape_literal(s: &str) -> String {
35 6 : let res = s.replace('\'', "''").replace('\\', "\\\\");
36 6 :
37 6 : if res.contains('\\') {
38 2 : format!("E'{}'", res)
39 : } else {
40 4 : format!("'{}'", res)
41 : }
42 6 : }
43 :
44 : /// Escape a string so that it can be used in postgresql.conf. Wrapping the result
45 : /// with `'{}'` is not required, as it returns a ready-to-use config string.
46 8 : pub fn escape_conf_value(s: &str) -> String {
47 8 : let res = s.replace('\'', "''").replace('\\', "\\\\");
48 8 : format!("'{}'", res)
49 8 : }
50 :
51 : pub trait GenericOptionExt {
52 : fn to_pg_option(&self) -> String;
53 : fn to_pg_setting(&self) -> String;
54 : }
55 :
56 : impl GenericOptionExt for GenericOption {
57 : /// Represent `GenericOption` as SQL statement parameter.
58 3 : fn to_pg_option(&self) -> String {
59 3 : if let Some(val) = &self.value {
60 3 : match self.vartype.as_ref() {
61 3 : "string" => format!("{} {}", self.name, escape_literal(val)),
62 1 : _ => format!("{} {}", self.name, val),
63 : }
64 : } else {
65 0 : self.name.to_owned()
66 : }
67 3 : }
68 :
69 : /// Represent `GenericOption` as configuration option.
70 23 : fn to_pg_setting(&self) -> String {
71 23 : if let Some(val) = &self.value {
72 23 : match self.vartype.as_ref() {
73 23 : "string" => format!("{} = {}", self.name, escape_conf_value(val)),
74 15 : _ => format!("{} = {}", self.name, val),
75 : }
76 : } else {
77 0 : self.name.to_owned()
78 : }
79 23 : }
80 : }
81 :
82 : pub trait PgOptionsSerialize {
83 : fn as_pg_options(&self) -> String;
84 : fn as_pg_settings(&self) -> String;
85 : }
86 :
87 : impl PgOptionsSerialize for GenericOptions {
88 : /// Serialize an optional collection of `GenericOption`'s to
89 : /// Postgres SQL statement arguments.
90 2 : fn as_pg_options(&self) -> String {
91 2 : if let Some(ops) = &self {
92 1 : ops.iter()
93 3 : .map(|op| op.to_pg_option())
94 1 : .collect::<Vec<String>>()
95 1 : .join(" ")
96 : } else {
97 1 : "".to_string()
98 : }
99 2 : }
100 :
101 : /// Serialize an optional collection of `GenericOption`'s to
102 : /// `postgresql.conf` compatible format.
103 1 : fn as_pg_settings(&self) -> String {
104 1 : if let Some(ops) = &self {
105 1 : ops.iter()
106 23 : .map(|op| op.to_pg_setting())
107 1 : .collect::<Vec<String>>()
108 1 : .join("\n")
109 1 : + "\n" // newline after last setting
110 : } else {
111 0 : "".to_string()
112 : }
113 1 : }
114 : }
115 :
116 : pub trait GenericOptionsSearch {
117 : fn find(&self, name: &str) -> Option<String>;
118 : fn find_ref(&self, name: &str) -> Option<&GenericOption>;
119 : }
120 :
121 : impl GenericOptionsSearch for GenericOptions {
122 : /// Lookup option by name
123 9 : fn find(&self, name: &str) -> Option<String> {
124 9 : let ops = self.as_ref()?;
125 6 : let op = ops.iter().find(|s| s.name == name)?;
126 2 : op.value.clone()
127 9 : }
128 :
129 : /// Lookup option by name, returning ref
130 0 : fn find_ref(&self, name: &str) -> Option<&GenericOption> {
131 0 : let ops = self.as_ref()?;
132 0 : ops.iter().find(|s| s.name == name)
133 0 : }
134 : }
135 :
136 : pub trait RoleExt {
137 : fn to_pg_options(&self) -> String;
138 : }
139 :
140 : impl RoleExt for Role {
141 : /// Serialize a list of role parameters into a Postgres-acceptable
142 : /// string of arguments.
143 1 : fn to_pg_options(&self) -> String {
144 1 : // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
145 1 : let mut params: String = self.options.as_pg_options();
146 1 : params.push_str(" LOGIN");
147 :
148 1 : if let Some(pass) = &self.encrypted_password {
149 : // Some time ago we supported only md5 and treated all encrypted_password as md5.
150 : // Now we also support SCRAM-SHA-256 and to preserve compatibility
151 : // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
152 1 : if pass.starts_with("SCRAM-SHA-256") {
153 0 : write!(params, " PASSWORD '{pass}'")
154 0 : .expect("String is documented to not to error during write operations");
155 1 : } else {
156 1 : write!(params, " PASSWORD 'md5{pass}'")
157 1 : .expect("String is documented to not to error during write operations");
158 1 : }
159 0 : } else {
160 0 : params.push_str(" PASSWORD NULL");
161 0 : }
162 :
163 1 : params
164 1 : }
165 : }
166 :
167 : pub trait DatabaseExt {
168 : fn to_pg_options(&self) -> String;
169 : }
170 :
171 : impl DatabaseExt for Database {
172 : /// Serialize a list of database parameters into a Postgres-acceptable
173 : /// string of arguments.
174 : /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
175 : /// to use `template0` and `template1`, so it is not a problem. Yet in the future
176 : /// it may require a proper quoting too.
177 1 : fn to_pg_options(&self) -> String {
178 1 : let mut params: String = self.options.as_pg_options();
179 1 : write!(params, " OWNER {}", &self.owner.pg_quote())
180 1 : .expect("String is documented to not to error during write operations");
181 1 :
182 1 : params
183 1 : }
184 : }
185 :
186 : /// Generic trait used to provide quoting / encoding for strings used in the
187 : /// Postgres SQL queries and DATABASE_URL.
188 : pub trait Escaping {
189 : fn pg_quote(&self) -> String;
190 : }
191 :
192 : impl Escaping for PgIdent {
193 : /// This is intended to mimic Postgres quote_ident(), but for simplicity it
194 : /// always quotes provided string with `""` and escapes every `"`.
195 : /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
196 2 : fn pg_quote(&self) -> String {
197 2 : let result = format!("\"{}\"", self.replace('"', "\"\""));
198 2 : result
199 2 : }
200 : }
201 :
202 : /// Build a list of existing Postgres roles
203 0 : pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
204 0 : let postgres_roles = client
205 0 : .query_raw::<str, &String, &[String; 0]>(
206 0 : "SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
207 0 : &[],
208 0 : )
209 0 : .await?
210 0 : .filter_map(|row| async { row.ok() })
211 0 : .map(|row| Role {
212 0 : name: row.get("rolname"),
213 0 : encrypted_password: row.get("rolpassword"),
214 0 : options: None,
215 0 : })
216 0 : .collect()
217 0 : .await;
218 :
219 0 : Ok(postgres_roles)
220 0 : }
221 :
222 : /// Build a list of existing Postgres databases
223 0 : pub async fn get_existing_dbs_async(
224 0 : client: &tokio_postgres::Client,
225 0 : ) -> Result<HashMap<String, Database>> {
226 : // `pg_database.datconnlimit = -2` means that the database is in the
227 : // invalid state. See:
228 : // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
229 0 : let rowstream = client
230 0 : .query_raw::<str, &String, &[String; 0]>(
231 0 : "SELECT
232 0 : datname AS name,
233 0 : datdba::regrole::text AS owner,
234 0 : NOT datallowconn AS restrict_conn,
235 0 : datconnlimit = - 2 AS invalid
236 0 : FROM
237 0 : pg_catalog.pg_database;",
238 0 : &[],
239 0 : )
240 0 : .await?;
241 :
242 0 : let dbs_map = rowstream
243 0 : .filter_map(|r| async { r.ok() })
244 0 : .map(|row| Database {
245 0 : name: row.get("name"),
246 0 : owner: row.get("owner"),
247 0 : restrict_conn: row.get("restrict_conn"),
248 0 : invalid: row.get("invalid"),
249 0 : options: None,
250 0 : })
251 0 : .map(|db| (db.name.clone(), db.clone()))
252 0 : .collect::<HashMap<_, _>>()
253 0 : .await;
254 :
255 0 : Ok(dbs_map)
256 0 : }
257 :
258 : /// Wait for Postgres to become ready to accept connections. It's ready to
259 : /// accept connections when the state-field in `pgdata/postmaster.pid` says
260 : /// 'ready'.
261 : #[instrument(skip_all, fields(pgdata = %pgdata.display()))]
262 : pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
263 : let pid_path = pgdata.join("postmaster.pid");
264 :
265 : // PostgreSQL writes line "ready" to the postmaster.pid file, when it has
266 : // completed initialization and is ready to accept connections. We want to
267 : // react quickly and perform the rest of our initialization as soon as
268 : // PostgreSQL starts accepting connections. Use 'notify' to be notified
269 : // whenever the PID file is changed, and whenever it changes, read it to
270 : // check if it's now "ready".
271 : //
272 : // You cannot actually watch a file before it exists, so we first watch the
273 : // data directory, and once the postmaster.pid file appears, we switch to
274 : // watch the file instead. We also wake up every 100 ms to poll, just in
275 : // case we miss some events for some reason. Not strictly necessary, but
276 : // better safe than sorry.
277 : let (tx, rx) = std::sync::mpsc::channel();
278 0 : let watcher_res = notify::recommended_watcher(move |res| {
279 0 : let _ = tx.send(res);
280 0 : });
281 : let (mut watcher, rx): (Box<dyn Watcher>, _) = match watcher_res {
282 : Ok(watcher) => (Box::new(watcher), rx),
283 : Err(e) => {
284 : match e.kind {
285 : notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
286 : // docker on m1 macs does not support recommended_watcher
287 : // but return "Function not implemented (os error 38)"
288 : // see https://github.com/notify-rs/notify/issues/423
289 : let (tx, rx) = std::sync::mpsc::channel();
290 :
291 : // let's poll it faster than what we check the results for (100ms)
292 : let config =
293 : notify::Config::default().with_poll_interval(Duration::from_millis(50));
294 :
295 : let watcher = notify::PollWatcher::new(
296 0 : move |res| {
297 0 : let _ = tx.send(res);
298 0 : },
299 : config,
300 : )?;
301 :
302 : (Box::new(watcher), rx)
303 : }
304 : _ => return Err(e.into()),
305 : }
306 : }
307 : };
308 :
309 : watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
310 :
311 : let started_at = Instant::now();
312 : let mut postmaster_pid_seen = false;
313 : loop {
314 : if let Ok(Some(status)) = pg.try_wait() {
315 : // Postgres exited, that is not what we expected, bail out earlier.
316 : let code = status.code().unwrap_or(-1);
317 : bail!("Postgres exited unexpectedly with code {}", code);
318 : }
319 :
320 : let res = rx.recv_timeout(Duration::from_millis(100));
321 : debug!("woken up by notify: {res:?}");
322 : // If there are multiple events in the channel already, we only need to be
323 : // check once. Swallow the extra events before we go ahead to check the
324 : // pid file.
325 : while let Ok(res) = rx.try_recv() {
326 : debug!("swallowing extra event: {res:?}");
327 : }
328 :
329 : // Check that we can open pid file first.
330 : if let Ok(file) = File::open(&pid_path) {
331 : if !postmaster_pid_seen {
332 : debug!("postmaster.pid appeared");
333 : watcher
334 : .unwatch(pgdata)
335 : .expect("Failed to remove pgdata dir watch");
336 : watcher
337 : .watch(&pid_path, RecursiveMode::NonRecursive)
338 : .expect("Failed to add postmaster.pid file watch");
339 : postmaster_pid_seen = true;
340 : }
341 :
342 : let file = BufReader::new(file);
343 : let last_line = file.lines().last();
344 :
345 : // Pid file could be there and we could read it, but it could be empty, for example.
346 : if let Some(Ok(line)) = last_line {
347 : let status = line.trim();
348 : debug!("last line of postmaster.pid: {status:?}");
349 :
350 : // Now Postgres is ready to accept connections
351 : if status == "ready" {
352 : break;
353 : }
354 : }
355 : }
356 :
357 : // Give up after POSTGRES_WAIT_TIMEOUT.
358 : let duration = started_at.elapsed();
359 : if duration >= POSTGRES_WAIT_TIMEOUT {
360 : bail!("timed out while waiting for Postgres to start");
361 : }
362 : }
363 :
364 : tracing::info!("PostgreSQL is now running, continuing to configure it");
365 :
366 : Ok(())
367 : }
368 :
369 : /// Remove `pgdata` directory and create it again with right permissions.
370 0 : pub fn create_pgdata(pgdata: &str) -> Result<()> {
371 0 : // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
372 0 : // If it is something different then create_dir() will error out anyway.
373 0 : let _ok = fs::remove_dir_all(pgdata);
374 0 : fs::create_dir(pgdata)?;
375 0 : fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
376 :
377 0 : Ok(())
378 0 : }
379 :
380 : /// Update pgbouncer.ini with provided options
381 0 : fn update_pgbouncer_ini(
382 0 : pgbouncer_config: HashMap<String, String>,
383 0 : pgbouncer_ini_path: &str,
384 0 : ) -> Result<()> {
385 0 : let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
386 0 : let section = conf.section_mut(Some("pgbouncer")).unwrap();
387 :
388 0 : for (option_name, value) in pgbouncer_config.iter() {
389 0 : section.insert(option_name, value);
390 0 : debug!(
391 0 : "Updating pgbouncer.ini with new values {}={}",
392 : option_name, value
393 : );
394 : }
395 :
396 0 : conf.write_to_file(pgbouncer_ini_path)?;
397 0 : Ok(())
398 0 : }
399 :
400 : /// Tune pgbouncer.
401 : /// 1. Apply new config using pgbouncer admin console
402 : /// 2. Add new values to pgbouncer.ini to preserve them after restart
403 0 : pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
404 0 : let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
405 : // for VMs use pgbouncer specific way to connect to
406 : // pgbouncer admin console without password
407 : // when pgbouncer is running under the same user.
408 0 : "host=/tmp port=6432 dbname=pgbouncer user=pgbouncer".to_string()
409 : } else {
410 : // for k8s use normal connection string with password
411 : // to connect to pgbouncer admin console
412 0 : let mut pgbouncer_connstr =
413 0 : "host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
414 0 : if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
415 0 : pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
416 0 : }
417 0 : pgbouncer_connstr
418 : };
419 :
420 0 : info!(
421 0 : "Connecting to pgbouncer with connection string: {}",
422 : pgbouncer_connstr
423 : );
424 :
425 : // connect to pgbouncer, retrying several times
426 : // because pgbouncer may not be ready yet
427 0 : let mut retries = 3;
428 0 : let client = loop {
429 0 : match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {
430 0 : Ok((client, connection)) => {
431 0 : tokio::spawn(async move {
432 0 : if let Err(e) = connection.await {
433 0 : eprintln!("connection error: {}", e);
434 0 : }
435 0 : });
436 0 : break client;
437 : }
438 0 : Err(e) => {
439 0 : if retries == 0 {
440 0 : return Err(e.into());
441 0 : }
442 0 : error!("Failed to connect to pgbouncer: pgbouncer_connstr {}", e);
443 0 : retries -= 1;
444 0 : tokio::time::sleep(Duration::from_secs(1)).await;
445 : }
446 : }
447 : };
448 :
449 : // Apply new config
450 0 : for (option_name, value) in pgbouncer_config.iter() {
451 0 : let query = format!("SET {}={}", option_name, value);
452 0 : // keep this log line for debugging purposes
453 0 : info!("Applying pgbouncer setting change: {}", query);
454 :
455 0 : if let Err(err) = client.simple_query(&query).await {
456 : // Don't fail on error, just print it into log
457 0 : error!(
458 0 : "Failed to apply pgbouncer setting change: {}, {}",
459 : query, err
460 : );
461 0 : };
462 : }
463 :
464 : // save values to pgbouncer.ini
465 : // so that they are preserved after pgbouncer restart
466 0 : let pgbouncer_ini_path = if std::env::var_os("AUTOSCALING").is_some() {
467 : // in VMs we use /etc/pgbouncer.ini
468 0 : "/etc/pgbouncer.ini".to_string()
469 : } else {
470 : // in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini
471 : // this is a shared volume between pgbouncer and postgres containers
472 : // FIXME: fix permissions for this file
473 0 : "/var/db/postgres/pgbouncer/pgbouncer.ini".to_string()
474 : };
475 0 : update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
476 :
477 0 : Ok(())
478 0 : }
479 :
480 : /// Spawn a task that will read Postgres logs from `stderr`, join multiline logs
481 : /// and send them to the logger. In the future we may also want to add context to
482 : /// these logs.
483 0 : pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<Result<()>> {
484 0 : tokio::spawn(async move {
485 0 : let stderr = tokio::process::ChildStderr::from_std(stderr)?;
486 0 : handle_postgres_logs_async(stderr).await
487 0 : })
488 0 : }
489 :
490 : /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
491 : /// - next line starts with timestamp
492 : /// - EOF
493 : /// - no new lines were written for the last 100 milliseconds
494 0 : async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
495 0 : let mut lines = tokio::io::BufReader::new(stderr).lines();
496 0 : let timeout_duration = Duration::from_millis(100);
497 0 : let ts_regex =
498 0 : regex::Regex::new(r"^\d+-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("regex is valid");
499 0 :
500 0 : let mut buf = vec![];
501 : loop {
502 0 : let next_line = timeout(timeout_duration, lines.next_line()).await;
503 :
504 : // we should flush lines from the buffer if we cannot continue reading multiline message
505 0 : let should_flush_buf = match next_line {
506 : // Flushing if new line starts with timestamp
507 0 : Ok(Ok(Some(ref line))) => ts_regex.is_match(line),
508 : // Flushing on EOF, timeout or error
509 0 : _ => true,
510 : };
511 :
512 0 : if !buf.is_empty() && should_flush_buf {
513 : // join multiline message into a single line, separated by unicode Zero Width Space.
514 : // "PG:" suffix is used to distinguish postgres logs from other logs.
515 0 : let combined = format!("PG:{}\n", buf.join("\u{200B}"));
516 0 : buf.clear();
517 :
518 : // sync write to stderr to avoid interleaving with other logs
519 : use std::io::Write;
520 0 : let res = std::io::stderr().lock().write_all(combined.as_bytes());
521 0 : if let Err(e) = res {
522 0 : tracing::error!("error while writing to stderr: {}", e);
523 0 : }
524 0 : }
525 :
526 : // if not timeout, append line to the buffer
527 0 : if next_line.is_ok() {
528 0 : match next_line?? {
529 0 : Some(line) => buf.push(line),
530 : // EOF
531 0 : None => break,
532 : };
533 0 : }
534 : }
535 :
536 0 : Ok(())
537 0 : }
538 :
539 : /// `Postgres::config::Config` handles database names with whitespaces
540 : /// and special characters properly.
541 0 : pub fn postgres_conf_for_db(connstr: &url::Url, dbname: &str) -> Result<Config> {
542 0 : let mut conf = Config::from_str(connstr.as_str())?;
543 0 : conf.dbname(dbname);
544 0 : Ok(conf)
545 0 : }
|