LCOV - differential code coverage report
Current view: top level - compute_tools/src - extension_server.rs (source / functions) Coverage Total Hit UBC CBC
Current: f6946e90941b557c917ac98cd5a7e9506d180f3e.info Lines: 43.4 % 152 66 86 66
Current Date: 2023-10-19 02:04:12 Functions: 30.0 % 30 9 21 9
Baseline: c8637f37369098875162f194f92736355783b050.info
Baseline Date: 2023-10-18 20:25:20

           TLA  Line data    Source code
       1                 : // Download extension files from the extension store
       2                 : // and put them in the right place in the postgres directory (share / lib)
       3                 : /*
       4                 : The layout of the S3 bucket is as follows:
       5                 : 5615610098 // this is an extension build number
       6                 : ├── v14
       7                 : │   ├── extensions
       8                 : │   │   ├── anon.tar.zst
       9                 : │   │   └── embedding.tar.zst
      10                 : │   └── ext_index.json
      11                 : └── v15
      12                 :     ├── extensions
      13                 :     │   ├── anon.tar.zst
      14                 :     │   └── embedding.tar.zst
      15                 :     └── ext_index.json
      16                 : 5615261079
      17                 : ├── v14
      18                 : │   ├── extensions
      19                 : │   │   └── anon.tar.zst
      20                 : │   └── ext_index.json
      21                 : └── v15
      22                 :     ├── extensions
      23                 :     │   └── anon.tar.zst
      24                 :     └── ext_index.json
      25                 : 5623261088
      26                 : ├── v14
      27                 : │   ├── extensions
      28                 : │   │   └── embedding.tar.zst
      29                 : │   └── ext_index.json
      30                 : └── v15
      31                 :     ├── extensions
      32                 :     │   └── embedding.tar.zst
      33                 :     └── ext_index.json
      34                 : 
      35                 : Note that build number cannot be part of prefix because we might need extensions
      36                 : from other build numbers.
      37                 : 
      38                 : ext_index.json stores the control files and location of extension archives
      39                 : It also stores a list of public extensions and a library_index
      40                 : 
      41                 : We don't need to duplicate extension.tar.zst files.
      42                 : We only need to upload a new one if it is updated.
      43                 : (Although currently we just upload every time anyways, hopefully will change
      44                 : this sometime)
      45                 : 
      46                 : *access* is controlled by spec
      47                 : 
      48                 : More specifically, here is an example ext_index.json
      49                 : {
      50                 :     "public_extensions": [
      51                 :         "anon",
      52                 :         "pg_buffercache"
      53                 :     ],
      54                 :     "library_index": {
      55                 :         "anon": "anon",
      56                 :         "pg_buffercache": "pg_buffercache"
      57                 :     },
      58                 :     "extension_data": {
      59                 :         "pg_buffercache": {
      60                 :             "control_data": {
      61                 :                 "pg_buffercache.control": "# pg_buffercache extension \ncomment = 'examine the shared buffer cache' \ndefault_version = '1.3' \nmodule_pathname = '$libdir/pg_buffercache' \nrelocatable = true \ntrusted=true"
      62                 :             },
      63                 :             "archive_path": "5670669815/v14/extensions/pg_buffercache.tar.zst"
      64                 :         },
      65                 :         "anon": {
      66                 :             "control_data": {
      67                 :                 "anon.control": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n"
      68                 :             },
      69                 :             "archive_path": "5670669815/v14/extensions/anon.tar.zst"
      70                 :         }
      71                 :     }
      72                 : }
      73                 : */
      74                 : use anyhow::Context;
      75                 : use anyhow::{self, Result};
      76                 : use compute_api::spec::RemoteExtSpec;
      77                 : use regex::Regex;
      78                 : use remote_storage::*;
      79                 : use serde_json;
      80                 : use std::io::Read;
      81                 : use std::num::{NonZeroU32, NonZeroUsize};
      82                 : use std::path::Path;
      83                 : use std::str;
      84                 : use tar::Archive;
      85                 : use tokio::io::AsyncReadExt;
      86                 : use tracing::info;
      87                 : use tracing::log::warn;
      88                 : use zstd::stream::read::Decoder;
      89                 : 
      90 CBC         641 : fn get_pg_config(argument: &str, pgbin: &str) -> String {
      91             641 :     // gives the result of `pg_config [argument]`
      92             641 :     // where argument is a flag like `--version` or `--sharedir`
      93             641 :     let pgconfig = pgbin
      94             641 :         .strip_suffix("postgres")
      95             641 :         .expect("bad pgbin")
      96             641 :         .to_owned()
      97             641 :         + "/pg_config";
      98             641 :     let config_output = std::process::Command::new(pgconfig)
      99             641 :         .arg(argument)
     100             641 :         .output()
     101             641 :         .expect("pg_config error");
     102             641 :     std::str::from_utf8(&config_output.stdout)
     103             641 :         .expect("pg_config error")
     104             641 :         .trim()
     105             641 :         .to_string()
     106             641 : }
     107                 : 
     108             641 : pub fn get_pg_version(pgbin: &str) -> String {
     109             641 :     // pg_config --version returns a (platform specific) human readable string
     110             641 :     // such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
     111             641 :     let human_version = get_pg_config("--version", pgbin);
     112             641 :     return parse_pg_version(&human_version).to_string();
     113             641 : }
     114                 : 
     115             653 : fn parse_pg_version(human_version: &str) -> &str {
     116             653 :     // Normal releases have version strings like "PostgreSQL 15.4". But there
     117             653 :     // are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
     118             653 :     // 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
     119             653 :     // configure option, you can tack any string to the version number,
     120             653 :     // e.g. "PostgreSQL 15.4foobar".
     121             653 :     match Regex::new(r"^PostgreSQL (?<major>\d+).+")
     122             653 :         .unwrap()
     123             653 :         .captures(human_version)
     124                 :     {
     125             653 :         Some(captures) if captures.len() == 2 => match &captures["major"] {
     126             653 :             "14" => return "v14",
     127               9 :             "15" => return "v15",
     128               6 :             "16" => return "v16",
     129               2 :             _ => {}
     130                 :         },
     131 UBC           0 :         _ => {}
     132                 :     }
     133 CBC           2 :     panic!("Unsuported postgres version {human_version}");
     134             651 : }
     135                 : 
     136                 : #[cfg(test)]
     137                 : mod tests {
     138                 :     use super::parse_pg_version;
     139                 : 
     140               1 :     #[test]
     141               1 :     fn test_parse_pg_version() {
     142               1 :         assert_eq!(parse_pg_version("PostgreSQL 15.4"), "v15");
     143               1 :         assert_eq!(parse_pg_version("PostgreSQL 15.14"), "v15");
     144               1 :         assert_eq!(
     145               1 :             parse_pg_version("PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)"),
     146               1 :             "v15"
     147               1 :         );
     148                 : 
     149               1 :         assert_eq!(parse_pg_version("PostgreSQL 14.15"), "v14");
     150               1 :         assert_eq!(parse_pg_version("PostgreSQL 14.0"), "v14");
     151               1 :         assert_eq!(
     152               1 :             parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
     153               1 :             "v14"
     154               1 :         );
     155                 : 
     156               1 :         assert_eq!(parse_pg_version("PostgreSQL 16devel"), "v16");
     157               1 :         assert_eq!(parse_pg_version("PostgreSQL 16beta1"), "v16");
     158               1 :         assert_eq!(parse_pg_version("PostgreSQL 16rc2"), "v16");
     159               1 :         assert_eq!(parse_pg_version("PostgreSQL 16extra"), "v16");
     160               1 :     }
     161                 : 
     162               1 :     #[test]
     163                 :     #[should_panic]
     164               1 :     fn test_parse_pg_unsupported_version() {
     165               1 :         parse_pg_version("PostgreSQL 13.14");
     166               1 :     }
     167                 : 
     168               1 :     #[test]
     169                 :     #[should_panic]
     170               1 :     fn test_parse_pg_incorrect_version_format() {
     171               1 :         parse_pg_version("PostgreSQL 14");
     172               1 :     }
     173                 : }
     174                 : 
     175                 : // download the archive for a given extension,
     176                 : // unzip it, and place files in the appropriate locations (share/lib)
     177 UBC           0 : pub async fn download_extension(
     178               0 :     ext_name: &str,
     179               0 :     ext_path: &RemotePath,
     180               0 :     remote_storage: &GenericRemoteStorage,
     181               0 :     pgbin: &str,
     182               0 : ) -> Result<u64> {
     183               0 :     info!("Download extension {:?} from {:?}", ext_name, ext_path);
     184               0 :     let mut download = remote_storage.download(ext_path).await?;
     185               0 :     let mut download_buffer = Vec::new();
     186               0 :     download
     187               0 :         .download_stream
     188               0 :         .read_to_end(&mut download_buffer)
     189               0 :         .await?;
     190               0 :     let download_size = download_buffer.len() as u64;
     191                 :     // it's unclear whether it is more performant to decompress into memory or not
     192                 :     // TODO: decompressing into memory can be avoided
     193               0 :     let mut decoder = Decoder::new(download_buffer.as_slice())?;
     194               0 :     let mut decompress_buffer = Vec::new();
     195               0 :     decoder.read_to_end(&mut decompress_buffer)?;
     196               0 :     let mut archive = Archive::new(decompress_buffer.as_slice());
     197               0 :     let unzip_dest = pgbin
     198               0 :         .strip_suffix("/bin/postgres")
     199               0 :         .expect("bad pgbin")
     200               0 :         .to_string()
     201               0 :         + "/download_extensions";
     202               0 :     archive.unpack(&unzip_dest)?;
     203               0 :     info!("Download + unzip {:?} completed successfully", &ext_path);
     204                 : 
     205               0 :     let sharedir_paths = (
     206               0 :         unzip_dest.to_string() + "/share/extension",
     207               0 :         Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"),
     208               0 :     );
     209               0 :     let libdir_paths = (
     210               0 :         unzip_dest.to_string() + "/lib",
     211               0 :         Path::new(&get_pg_config("--pkglibdir", pgbin)).to_path_buf(),
     212               0 :     );
     213                 :     // move contents of the libdir / sharedir in unzipped archive to the correct local paths
     214               0 :     for paths in [sharedir_paths, libdir_paths] {
     215               0 :         let (zip_dir, real_dir) = paths;
     216               0 :         info!("mv {zip_dir:?}/*  {real_dir:?}");
     217               0 :         for file in std::fs::read_dir(zip_dir)? {
     218               0 :             let old_file = file?.path();
     219               0 :             let new_file =
     220               0 :                 Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?);
     221               0 :             info!("moving {old_file:?} to {new_file:?}");
     222                 : 
     223                 :             // extension download failed: Directory not empty (os error 39)
     224               0 :             match std::fs::rename(old_file, new_file) {
     225               0 :                 Ok(()) => info!("move succeeded"),
     226               0 :                 Err(e) => {
     227               0 :                     warn!("move failed, probably because the extension already exists: {e}")
     228                 :                 }
     229                 :             }
     230                 :         }
     231                 :     }
     232               0 :     info!("done moving extension {ext_name}");
     233               0 :     Ok(download_size)
     234               0 : }
     235                 : 
     236                 : // Create extension control files from spec
     237               0 : pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
     238               0 :     let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
     239               0 :     for (ext_name, ext_data) in remote_extensions.extension_data.iter() {
     240                 :         // Check if extension is present in public or custom.
     241                 :         // If not, then it is not allowed to be used by this compute.
     242               0 :         if let Some(public_extensions) = &remote_extensions.public_extensions {
     243               0 :             if !public_extensions.contains(ext_name) {
     244               0 :                 if let Some(custom_extensions) = &remote_extensions.custom_extensions {
     245               0 :                     if !custom_extensions.contains(ext_name) {
     246               0 :                         continue; // skip this extension, it is not allowed
     247               0 :                     }
     248               0 :                 }
     249               0 :             }
     250               0 :         }
     251                 : 
     252               0 :         for (control_name, control_content) in &ext_data.control_data {
     253               0 :             let control_path = local_sharedir.join(control_name);
     254               0 :             if !control_path.exists() {
     255               0 :                 info!("writing file {:?}{:?}", control_path, control_content);
     256               0 :                 std::fs::write(control_path, control_content).unwrap();
     257                 :             } else {
     258               0 :                 warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path);
     259                 :             }
     260                 :         }
     261                 :     }
     262               0 : }
     263                 : 
     264                 : // This function initializes the necessary structs to use remote storage
     265               0 : pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
     266               0 :     #[derive(Debug, serde::Deserialize)]
     267                 :     struct RemoteExtJson {
     268                 :         bucket: String,
     269                 :         region: String,
     270                 :         endpoint: Option<String>,
     271                 :         prefix: Option<String>,
     272                 :     }
     273               0 :     let remote_ext_json = serde_json::from_str::<RemoteExtJson>(remote_ext_config)?;
     274                 : 
     275               0 :     let config = S3Config {
     276               0 :         bucket_name: remote_ext_json.bucket,
     277               0 :         bucket_region: remote_ext_json.region,
     278               0 :         prefix_in_bucket: remote_ext_json.prefix,
     279               0 :         endpoint: remote_ext_json.endpoint,
     280               0 :         concurrency_limit: NonZeroUsize::new(100).expect("100 != 0"),
     281               0 :         max_keys_per_list_response: None,
     282               0 :     };
     283               0 :     let config = RemoteStorageConfig {
     284               0 :         max_concurrent_syncs: NonZeroUsize::new(100).expect("100 != 0"),
     285               0 :         max_sync_errors: NonZeroU32::new(100).expect("100 != 0"),
     286               0 :         storage: RemoteStorageKind::AwsS3(config),
     287               0 :     };
     288               0 :     GenericRemoteStorage::from_config(&config)
     289               0 : }
        

Generated by: LCOV version 2.1-beta