Line data Source code
1 : use std::collections::HashMap;
2 : use std::fmt::Write;
3 : use std::fs;
4 : use std::fs::File;
5 : use std::io::{BufRead, BufReader};
6 : use std::os::unix::fs::PermissionsExt;
7 : use std::path::Path;
8 : use std::process::Child;
9 : use std::thread::JoinHandle;
10 : use std::time::{Duration, Instant};
11 :
12 : use anyhow::{bail, Result};
13 : use futures::StreamExt;
14 : use ini::Ini;
15 : use notify::{RecursiveMode, Watcher};
16 : use tokio::io::AsyncBufReadExt;
17 : use tokio::time::timeout;
18 : use tokio_postgres::NoTls;
19 : use tracing::{debug, error, info, instrument};
20 :
21 : use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
22 :
23 : const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
24 :
25 : /// Escape a string for including it in a SQL literal.
26 : ///
27 : /// Wrapping the result with `E'{}'` or `'{}'` is not required,
28 : /// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
29 : /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
30 : /// for the original implementation.
31 6 : pub fn escape_literal(s: &str) -> String {
32 6 : let res = s.replace('\'', "''").replace('\\', "\\\\");
33 6 :
34 6 : if res.contains('\\') {
35 2 : format!("E'{}'", res)
36 : } else {
37 4 : format!("'{}'", res)
38 : }
39 6 : }
40 :
41 : /// Escape a string so that it can be used in postgresql.conf. Wrapping the result
42 : /// with `'{}'` is not required, as it returns a ready-to-use config string.
43 8 : pub fn escape_conf_value(s: &str) -> String {
44 8 : let res = s.replace('\'', "''").replace('\\', "\\\\");
45 8 : format!("'{}'", res)
46 8 : }
47 :
48 : pub trait GenericOptionExt {
49 : fn to_pg_option(&self) -> String;
50 : fn to_pg_setting(&self) -> String;
51 : }
52 :
53 : impl GenericOptionExt for GenericOption {
54 : /// Represent `GenericOption` as SQL statement parameter.
55 3 : fn to_pg_option(&self) -> String {
56 3 : if let Some(val) = &self.value {
57 3 : match self.vartype.as_ref() {
58 3 : "string" => format!("{} {}", self.name, escape_literal(val)),
59 1 : _ => format!("{} {}", self.name, val),
60 : }
61 : } else {
62 0 : self.name.to_owned()
63 : }
64 3 : }
65 :
66 : /// Represent `GenericOption` as configuration option.
67 23 : fn to_pg_setting(&self) -> String {
68 23 : if let Some(val) = &self.value {
69 23 : match self.vartype.as_ref() {
70 23 : "string" => format!("{} = {}", self.name, escape_conf_value(val)),
71 15 : _ => format!("{} = {}", self.name, val),
72 : }
73 : } else {
74 0 : self.name.to_owned()
75 : }
76 23 : }
77 : }
78 :
79 : pub trait PgOptionsSerialize {
80 : fn as_pg_options(&self) -> String;
81 : fn as_pg_settings(&self) -> String;
82 : }
83 :
84 : impl PgOptionsSerialize for GenericOptions {
85 : /// Serialize an optional collection of `GenericOption`'s to
86 : /// Postgres SQL statement arguments.
87 2 : fn as_pg_options(&self) -> String {
88 2 : if let Some(ops) = &self {
89 1 : ops.iter()
90 3 : .map(|op| op.to_pg_option())
91 1 : .collect::<Vec<String>>()
92 1 : .join(" ")
93 : } else {
94 1 : "".to_string()
95 : }
96 2 : }
97 :
98 : /// Serialize an optional collection of `GenericOption`'s to
99 : /// `postgresql.conf` compatible format.
100 1 : fn as_pg_settings(&self) -> String {
101 1 : if let Some(ops) = &self {
102 1 : ops.iter()
103 23 : .map(|op| op.to_pg_setting())
104 1 : .collect::<Vec<String>>()
105 1 : .join("\n")
106 1 : + "\n" // newline after last setting
107 : } else {
108 0 : "".to_string()
109 : }
110 1 : }
111 : }
112 :
113 : pub trait GenericOptionsSearch {
114 : fn find(&self, name: &str) -> Option<String>;
115 : fn find_ref(&self, name: &str) -> Option<&GenericOption>;
116 : }
117 :
118 : impl GenericOptionsSearch for GenericOptions {
119 : /// Lookup option by name
120 9 : fn find(&self, name: &str) -> Option<String> {
121 9 : let ops = self.as_ref()?;
122 6 : let op = ops.iter().find(|s| s.name == name)?;
123 2 : op.value.clone()
124 9 : }
125 :
126 : /// Lookup option by name, returning ref
127 0 : fn find_ref(&self, name: &str) -> Option<&GenericOption> {
128 0 : let ops = self.as_ref()?;
129 0 : ops.iter().find(|s| s.name == name)
130 0 : }
131 : }
132 :
133 : pub trait RoleExt {
134 : fn to_pg_options(&self) -> String;
135 : }
136 :
137 : impl RoleExt for Role {
138 : /// Serialize a list of role parameters into a Postgres-acceptable
139 : /// string of arguments.
140 1 : fn to_pg_options(&self) -> String {
141 1 : // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
142 1 : let mut params: String = self.options.as_pg_options();
143 1 : params.push_str(" LOGIN");
144 :
145 1 : if let Some(pass) = &self.encrypted_password {
146 : // Some time ago we supported only md5 and treated all encrypted_password as md5.
147 : // Now we also support SCRAM-SHA-256 and to preserve compatibility
148 : // we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
149 1 : if pass.starts_with("SCRAM-SHA-256") {
150 0 : write!(params, " PASSWORD '{pass}'")
151 0 : .expect("String is documented to not to error during write operations");
152 1 : } else {
153 1 : write!(params, " PASSWORD 'md5{pass}'")
154 1 : .expect("String is documented to not to error during write operations");
155 1 : }
156 0 : } else {
157 0 : params.push_str(" PASSWORD NULL");
158 0 : }
159 :
160 1 : params
161 1 : }
162 : }
163 :
164 : pub trait DatabaseExt {
165 : fn to_pg_options(&self) -> String;
166 : }
167 :
168 : impl DatabaseExt for Database {
169 : /// Serialize a list of database parameters into a Postgres-acceptable
170 : /// string of arguments.
171 : /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
172 : /// to use `template0` and `template1`, so it is not a problem. Yet in the future
173 : /// it may require a proper quoting too.
174 1 : fn to_pg_options(&self) -> String {
175 1 : let mut params: String = self.options.as_pg_options();
176 1 : write!(params, " OWNER {}", &self.owner.pg_quote())
177 1 : .expect("String is documented to not to error during write operations");
178 1 :
179 1 : params
180 1 : }
181 : }
182 :
183 : /// Generic trait used to provide quoting / encoding for strings used in the
184 : /// Postgres SQL queries and DATABASE_URL.
185 : pub trait Escaping {
186 : fn pg_quote(&self) -> String;
187 : }
188 :
189 : impl Escaping for PgIdent {
190 : /// This is intended to mimic Postgres quote_ident(), but for simplicity it
191 : /// always quotes provided string with `""` and escapes every `"`.
192 : /// **Not idempotent**, i.e. if string is already escaped it will be escaped again.
193 2 : fn pg_quote(&self) -> String {
194 2 : let result = format!("\"{}\"", self.replace('"', "\"\""));
195 2 : result
196 2 : }
197 : }
198 :
199 : /// Build a list of existing Postgres roles
200 0 : pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
201 0 : let postgres_roles = client
202 0 : .query_raw::<str, &String, &[String; 0]>(
203 0 : "SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
204 0 : &[],
205 0 : )
206 0 : .await?
207 0 : .filter_map(|row| async { row.ok() })
208 0 : .map(|row| Role {
209 0 : name: row.get("rolname"),
210 0 : encrypted_password: row.get("rolpassword"),
211 0 : options: None,
212 0 : })
213 0 : .collect()
214 0 : .await;
215 :
216 0 : Ok(postgres_roles)
217 0 : }
218 :
219 : /// Build a list of existing Postgres databases
220 0 : pub async fn get_existing_dbs_async(
221 0 : client: &tokio_postgres::Client,
222 0 : ) -> Result<HashMap<String, Database>> {
223 : // `pg_database.datconnlimit = -2` means that the database is in the
224 : // invalid state. See:
225 : // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
226 0 : let rowstream = client
227 0 : .query_raw::<str, &String, &[String; 0]>(
228 0 : "SELECT
229 0 : datname AS name,
230 0 : datdba::regrole::text AS owner,
231 0 : NOT datallowconn AS restrict_conn,
232 0 : datconnlimit = - 2 AS invalid
233 0 : FROM
234 0 : pg_catalog.pg_database;",
235 0 : &[],
236 0 : )
237 0 : .await?;
238 :
239 0 : let dbs_map = rowstream
240 0 : .filter_map(|r| async { r.ok() })
241 0 : .map(|row| Database {
242 0 : name: row.get("name"),
243 0 : owner: row.get("owner"),
244 0 : restrict_conn: row.get("restrict_conn"),
245 0 : invalid: row.get("invalid"),
246 0 : options: None,
247 0 : })
248 0 : .map(|db| (db.name.clone(), db.clone()))
249 0 : .collect::<HashMap<_, _>>()
250 0 : .await;
251 :
252 0 : Ok(dbs_map)
253 0 : }
254 :
255 : /// Wait for Postgres to become ready to accept connections. It's ready to
256 : /// accept connections when the state-field in `pgdata/postmaster.pid` says
257 : /// 'ready'.
258 0 : #[instrument(skip_all, fields(pgdata = %pgdata.display()))]
259 : pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
260 : let pid_path = pgdata.join("postmaster.pid");
261 :
262 : // PostgreSQL writes line "ready" to the postmaster.pid file, when it has
263 : // completed initialization and is ready to accept connections. We want to
264 : // react quickly and perform the rest of our initialization as soon as
265 : // PostgreSQL starts accepting connections. Use 'notify' to be notified
266 : // whenever the PID file is changed, and whenever it changes, read it to
267 : // check if it's now "ready".
268 : //
269 : // You cannot actually watch a file before it exists, so we first watch the
270 : // data directory, and once the postmaster.pid file appears, we switch to
271 : // watch the file instead. We also wake up every 100 ms to poll, just in
272 : // case we miss some events for some reason. Not strictly necessary, but
273 : // better safe than sorry.
274 : let (tx, rx) = std::sync::mpsc::channel();
275 0 : let watcher_res = notify::recommended_watcher(move |res| {
276 0 : let _ = tx.send(res);
277 0 : });
278 : let (mut watcher, rx): (Box<dyn Watcher>, _) = match watcher_res {
279 : Ok(watcher) => (Box::new(watcher), rx),
280 : Err(e) => {
281 : match e.kind {
282 : notify::ErrorKind::Io(os) if os.raw_os_error() == Some(38) => {
283 : // docker on m1 macs does not support recommended_watcher
284 : // but return "Function not implemented (os error 38)"
285 : // see https://github.com/notify-rs/notify/issues/423
286 : let (tx, rx) = std::sync::mpsc::channel();
287 :
288 : // let's poll it faster than what we check the results for (100ms)
289 : let config =
290 : notify::Config::default().with_poll_interval(Duration::from_millis(50));
291 :
292 : let watcher = notify::PollWatcher::new(
293 0 : move |res| {
294 0 : let _ = tx.send(res);
295 0 : },
296 : config,
297 : )?;
298 :
299 : (Box::new(watcher), rx)
300 : }
301 : _ => return Err(e.into()),
302 : }
303 : }
304 : };
305 :
306 : watcher.watch(pgdata, RecursiveMode::NonRecursive)?;
307 :
308 : let started_at = Instant::now();
309 : let mut postmaster_pid_seen = false;
310 : loop {
311 : if let Ok(Some(status)) = pg.try_wait() {
312 : // Postgres exited, that is not what we expected, bail out earlier.
313 : let code = status.code().unwrap_or(-1);
314 : bail!("Postgres exited unexpectedly with code {}", code);
315 : }
316 :
317 : let res = rx.recv_timeout(Duration::from_millis(100));
318 : debug!("woken up by notify: {res:?}");
319 : // If there are multiple events in the channel already, we only need to be
320 : // check once. Swallow the extra events before we go ahead to check the
321 : // pid file.
322 : while let Ok(res) = rx.try_recv() {
323 : debug!("swallowing extra event: {res:?}");
324 : }
325 :
326 : // Check that we can open pid file first.
327 : if let Ok(file) = File::open(&pid_path) {
328 : if !postmaster_pid_seen {
329 : debug!("postmaster.pid appeared");
330 : watcher
331 : .unwatch(pgdata)
332 : .expect("Failed to remove pgdata dir watch");
333 : watcher
334 : .watch(&pid_path, RecursiveMode::NonRecursive)
335 : .expect("Failed to add postmaster.pid file watch");
336 : postmaster_pid_seen = true;
337 : }
338 :
339 : let file = BufReader::new(file);
340 : let last_line = file.lines().last();
341 :
342 : // Pid file could be there and we could read it, but it could be empty, for example.
343 : if let Some(Ok(line)) = last_line {
344 : let status = line.trim();
345 : debug!("last line of postmaster.pid: {status:?}");
346 :
347 : // Now Postgres is ready to accept connections
348 : if status == "ready" {
349 : break;
350 : }
351 : }
352 : }
353 :
354 : // Give up after POSTGRES_WAIT_TIMEOUT.
355 : let duration = started_at.elapsed();
356 : if duration >= POSTGRES_WAIT_TIMEOUT {
357 : bail!("timed out while waiting for Postgres to start");
358 : }
359 : }
360 :
361 : tracing::info!("PostgreSQL is now running, continuing to configure it");
362 :
363 : Ok(())
364 : }
365 :
366 : /// Remove `pgdata` directory and create it again with right permissions.
367 0 : pub fn create_pgdata(pgdata: &str) -> Result<()> {
368 0 : // Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
369 0 : // If it is something different then create_dir() will error out anyway.
370 0 : let _ok = fs::remove_dir_all(pgdata);
371 0 : fs::create_dir(pgdata)?;
372 0 : fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?;
373 :
374 0 : Ok(())
375 0 : }
376 :
377 : /// Update pgbouncer.ini with provided options
378 0 : fn update_pgbouncer_ini(
379 0 : pgbouncer_config: HashMap<String, String>,
380 0 : pgbouncer_ini_path: &str,
381 0 : ) -> Result<()> {
382 0 : let mut conf = Ini::load_from_file(pgbouncer_ini_path)?;
383 0 : let section = conf.section_mut(Some("pgbouncer")).unwrap();
384 :
385 0 : for (option_name, value) in pgbouncer_config.iter() {
386 0 : section.insert(option_name, value);
387 0 : debug!(
388 0 : "Updating pgbouncer.ini with new values {}={}",
389 : option_name, value
390 : );
391 : }
392 :
393 0 : conf.write_to_file(pgbouncer_ini_path)?;
394 0 : Ok(())
395 0 : }
396 :
397 : /// Tune pgbouncer.
398 : /// 1. Apply new config using pgbouncer admin console
399 : /// 2. Add new values to pgbouncer.ini to preserve them after restart
400 0 : pub async fn tune_pgbouncer(pgbouncer_config: HashMap<String, String>) -> Result<()> {
401 0 : let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() {
402 : // for VMs use pgbouncer specific way to connect to
403 : // pgbouncer admin console without password
404 : // when pgbouncer is running under the same user.
405 0 : "host=/tmp port=6432 dbname=pgbouncer user=pgbouncer".to_string()
406 : } else {
407 : // for k8s use normal connection string with password
408 : // to connect to pgbouncer admin console
409 0 : let mut pgbouncer_connstr =
410 0 : "host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
411 0 : if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
412 0 : pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
413 0 : }
414 0 : pgbouncer_connstr
415 : };
416 :
417 0 : info!(
418 0 : "Connecting to pgbouncer with connection string: {}",
419 : pgbouncer_connstr
420 : );
421 :
422 : // connect to pgbouncer, retrying several times
423 : // because pgbouncer may not be ready yet
424 0 : let mut retries = 3;
425 0 : let client = loop {
426 0 : match tokio_postgres::connect(&pgbouncer_connstr, NoTls).await {
427 0 : Ok((client, connection)) => {
428 0 : tokio::spawn(async move {
429 0 : if let Err(e) = connection.await {
430 0 : eprintln!("connection error: {}", e);
431 0 : }
432 0 : });
433 0 : break client;
434 : }
435 0 : Err(e) => {
436 0 : if retries == 0 {
437 0 : return Err(e.into());
438 0 : }
439 0 : error!("Failed to connect to pgbouncer: pgbouncer_connstr {}", e);
440 0 : retries -= 1;
441 0 : tokio::time::sleep(Duration::from_secs(1)).await;
442 : }
443 : }
444 : };
445 :
446 : // Apply new config
447 0 : for (option_name, value) in pgbouncer_config.iter() {
448 0 : let query = format!("SET {}={}", option_name, value);
449 0 : // keep this log line for debugging purposes
450 0 : info!("Applying pgbouncer setting change: {}", query);
451 :
452 0 : if let Err(err) = client.simple_query(&query).await {
453 : // Don't fail on error, just print it into log
454 0 : error!(
455 0 : "Failed to apply pgbouncer setting change: {}, {}",
456 : query, err
457 : );
458 0 : };
459 : }
460 :
461 : // save values to pgbouncer.ini
462 : // so that they are preserved after pgbouncer restart
463 0 : let pgbouncer_ini_path = if std::env::var_os("AUTOSCALING").is_some() {
464 : // in VMs we use /etc/pgbouncer.ini
465 0 : "/etc/pgbouncer.ini".to_string()
466 : } else {
467 : // in pods we use /var/db/postgres/pgbouncer/pgbouncer.ini
468 : // this is a shared volume between pgbouncer and postgres containers
469 : // FIXME: fix permissions for this file
470 0 : "/var/db/postgres/pgbouncer/pgbouncer.ini".to_string()
471 : };
472 0 : update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?;
473 :
474 0 : Ok(())
475 0 : }
476 :
477 : /// Spawn a thread that will read Postgres logs from `stderr`, join multiline logs
478 : /// and send them to the logger. In the future we may also want to add context to
479 : /// these logs.
480 0 : pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()> {
481 0 : std::thread::spawn(move || {
482 0 : let runtime = tokio::runtime::Builder::new_current_thread()
483 0 : .enable_all()
484 0 : .build()
485 0 : .expect("failed to build tokio runtime");
486 0 :
487 0 : let res = runtime.block_on(async move {
488 0 : let stderr = tokio::process::ChildStderr::from_std(stderr)?;
489 0 : handle_postgres_logs_async(stderr).await
490 0 : });
491 0 : if let Err(e) = res {
492 0 : tracing::error!("error while processing postgres logs: {}", e);
493 0 : }
494 0 : })
495 0 : }
496 :
497 : /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
498 : /// - next line starts with timestamp
499 : /// - EOF
500 : /// - no new lines were written for the last 100 milliseconds
501 0 : async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
502 0 : let mut lines = tokio::io::BufReader::new(stderr).lines();
503 0 : let timeout_duration = Duration::from_millis(100);
504 0 : let ts_regex =
505 0 : regex::Regex::new(r"^\d+-\d{2}-\d{2} \d{2}:\d{2}:\d{2}").expect("regex is valid");
506 0 :
507 0 : let mut buf = vec![];
508 : loop {
509 0 : let next_line = timeout(timeout_duration, lines.next_line()).await;
510 :
511 : // we should flush lines from the buffer if we cannot continue reading multiline message
512 0 : let should_flush_buf = match next_line {
513 : // Flushing if new line starts with timestamp
514 0 : Ok(Ok(Some(ref line))) => ts_regex.is_match(line),
515 : // Flushing on EOF, timeout or error
516 0 : _ => true,
517 : };
518 :
519 0 : if !buf.is_empty() && should_flush_buf {
520 : // join multiline message into a single line, separated by unicode Zero Width Space.
521 : // "PG:" suffix is used to distinguish postgres logs from other logs.
522 0 : let combined = format!("PG:{}\n", buf.join("\u{200B}"));
523 0 : buf.clear();
524 :
525 : // sync write to stderr to avoid interleaving with other logs
526 : use std::io::Write;
527 0 : let res = std::io::stderr().lock().write_all(combined.as_bytes());
528 0 : if let Err(e) = res {
529 0 : tracing::error!("error while writing to stderr: {}", e);
530 0 : }
531 0 : }
532 :
533 : // if not timeout, append line to the buffer
534 0 : if next_line.is_ok() {
535 0 : match next_line?? {
536 0 : Some(line) => buf.push(line),
537 : // EOF
538 0 : None => break,
539 : };
540 0 : }
541 : }
542 :
543 0 : Ok(())
544 0 : }
|