LCOV - code coverage report
Current view: top level - safekeeper/src - debug_dump.rs (source / functions) Coverage Total Hit
Test: 8ac049b474321fdc72ddcb56d7165153a1a900e8.info Lines: 90.9 % 110 100
Test Date: 2023-09-06 10:18:01 Functions: 43.0 % 93 40

            Line data    Source code
       1              : //! Utils for dumping full state of the safekeeper.
       2              : 
       3              : use std::fs;
       4              : use std::fs::DirEntry;
       5              : use std::io::BufReader;
       6              : use std::io::Read;
       7              : use std::path::PathBuf;
       8              : 
       9              : use anyhow::Result;
      10              : use chrono::{DateTime, Utc};
      11              : use postgres_ffi::XLogSegNo;
      12              : use serde::Deserialize;
      13              : use serde::Serialize;
      14              : 
      15              : use serde_with::{serde_as, DisplayFromStr};
      16              : use utils::id::NodeId;
      17              : use utils::id::TenantTimelineId;
      18              : use utils::id::{TenantId, TimelineId};
      19              : use utils::lsn::Lsn;
      20              : 
      21              : use crate::safekeeper::SafeKeeperState;
      22              : use crate::safekeeper::SafekeeperMemState;
      23              : use crate::safekeeper::TermHistory;
      24              : use crate::SafeKeeperConf;
      25              : 
      26              : use crate::send_wal::WalSenderState;
      27              : use crate::GlobalTimelines;
      28              : 
      29              : /// Various filters that influence the resulting JSON output.
      30            0 : #[derive(Debug, Serialize, Deserialize)]
      31              : pub struct Args {
      32              :     /// Dump all available safekeeper state. False by default.
      33              :     pub dump_all: bool,
      34              : 
      35              :     /// Dump control_file content. Uses value of `dump_all` by default.
      36              :     pub dump_control_file: bool,
      37              : 
      38              :     /// Dump in-memory state. Uses value of `dump_all` by default.
      39              :     pub dump_memory: bool,
      40              : 
      41              :     /// Dump all disk files in a timeline directory. Uses value of `dump_all` by default.
      42              :     pub dump_disk_content: bool,
      43              : 
      44              :     /// Dump full term history. True by default.
      45              :     pub dump_term_history: bool,
      46              : 
      47              :     /// Filter timelines by tenant_id.
      48              :     pub tenant_id: Option<TenantId>,
      49              : 
      50              :     /// Filter timelines by timeline_id.
      51              :     pub timeline_id: Option<TimelineId>,
      52              : }
      53              : 
      54              : /// Response for debug dump request.
      55           11 : #[derive(Debug, Serialize, Deserialize)]
      56              : pub struct Response {
      57              :     pub start_time: DateTime<Utc>,
      58              :     pub finish_time: DateTime<Utc>,
      59              :     pub timelines: Vec<Timeline>,
      60              :     pub timelines_count: usize,
      61              :     pub config: Config,
      62              : }
      63              : 
      64              : /// Safekeeper configuration.
      65           15 : #[derive(Debug, Serialize, Deserialize)]
      66              : pub struct Config {
      67              :     pub id: NodeId,
      68              :     pub workdir: PathBuf,
      69              :     pub listen_pg_addr: String,
      70              :     pub listen_http_addr: String,
      71              :     pub no_sync: bool,
      72              :     pub max_offloader_lag_bytes: u64,
      73              :     pub wal_backup_enabled: bool,
      74              : }
      75              : 
      76              : #[serde_as]
      77           13 : #[derive(Debug, Serialize, Deserialize)]
      78              : pub struct Timeline {
      79              :     #[serde_as(as = "DisplayFromStr")]
      80              :     pub tenant_id: TenantId,
      81              :     #[serde_as(as = "DisplayFromStr")]
      82              :     pub timeline_id: TimelineId,
      83              :     pub control_file: Option<SafeKeeperState>,
      84              :     pub memory: Option<Memory>,
      85              :     pub disk_content: Option<DiskContent>,
      86              : }
      87              : 
      88           27 : #[derive(Debug, Serialize, Deserialize)]
      89              : pub struct Memory {
      90              :     pub is_cancelled: bool,
      91              :     pub peers_info_len: usize,
      92              :     pub walsenders: Vec<WalSenderState>,
      93              :     pub wal_backup_active: bool,
      94              :     pub active: bool,
      95              :     pub num_computes: u32,
      96              :     pub last_removed_segno: XLogSegNo,
      97              :     pub epoch_start_lsn: Lsn,
      98              :     pub mem_state: SafekeeperMemState,
      99              : 
     100              :     // PhysicalStorage state.
     101              :     pub write_lsn: Lsn,
     102              :     pub write_record_lsn: Lsn,
     103              :     pub flush_lsn: Lsn,
     104              :     pub file_open: bool,
     105              : }
     106              : 
     107            5 : #[derive(Debug, Serialize, Deserialize)]
     108              : pub struct DiskContent {
     109              :     pub files: Vec<FileInfo>,
     110              : }
     111              : 
     112           39 : #[derive(Debug, Serialize, Deserialize)]
     113              : pub struct FileInfo {
     114              :     pub name: String,
     115              :     pub size: u64,
     116              :     pub created: DateTime<Utc>,
     117              :     pub modified: DateTime<Utc>,
     118              :     pub start_zeroes: u64,
     119              :     pub end_zeroes: u64,
     120              :     // TODO: add sha256 checksum
     121              : }
     122              : 
     123              : /// Build debug dump response, using the provided [`Args`] filters.
     124            5 : pub async fn build(args: Args) -> Result<Response> {
     125            5 :     let start_time = Utc::now();
     126            5 :     let timelines_count = GlobalTimelines::timelines_count();
     127              : 
     128            5 :     let ptrs_snapshot = if args.tenant_id.is_some() && args.timeline_id.is_some() {
     129              :         // If both tenant_id and timeline_id are specified, we can just get the
     130              :         // timeline directly, without taking a snapshot of the whole list.
     131            1 :         let ttid = TenantTimelineId::new(args.tenant_id.unwrap(), args.timeline_id.unwrap());
     132            1 :         if let Ok(tli) = GlobalTimelines::get(ttid) {
     133            1 :             vec![tli]
     134              :         } else {
     135            0 :             vec![]
     136              :         }
     137              :     } else {
     138              :         // Otherwise, take a snapshot of the whole list.
     139            4 :         GlobalTimelines::get_all()
     140              :     };
     141              : 
     142              :     // TODO: return Stream instead of Vec
     143            5 :     let mut timelines = Vec::new();
     144           10 :     for tli in ptrs_snapshot {
     145            5 :         let ttid = tli.ttid;
     146            5 :         if let Some(tenant_id) = args.tenant_id {
     147            1 :             if tenant_id != ttid.tenant_id {
     148            0 :                 continue;
     149            1 :             }
     150            4 :         }
     151            5 :         if let Some(timeline_id) = args.timeline_id {
     152            1 :             if timeline_id != ttid.timeline_id {
     153            0 :                 continue;
     154            1 :             }
     155            4 :         }
     156              : 
     157            5 :         let control_file = if args.dump_control_file {
     158            5 :             let mut state = tli.get_state().await.1;
     159            5 :             if !args.dump_term_history {
     160            0 :                 state.acceptor_state.term_history = TermHistory(vec![]);
     161            5 :             }
     162            5 :             Some(state)
     163              :         } else {
     164            0 :             None
     165              :         };
     166              : 
     167            5 :         let memory = if args.dump_memory {
     168            5 :             Some(tli.memory_dump().await)
     169              :         } else {
     170            0 :             None
     171              :         };
     172              : 
     173            5 :         let disk_content = if args.dump_disk_content {
     174              :             // build_disk_content can fail, but we don't want to fail the whole
     175              :             // request because of that.
     176            5 :             build_disk_content(&tli.timeline_dir).ok()
     177              :         } else {
     178            0 :             None
     179              :         };
     180              : 
     181            5 :         let timeline = Timeline {
     182            5 :             tenant_id: ttid.tenant_id,
     183            5 :             timeline_id: ttid.timeline_id,
     184            5 :             control_file,
     185            5 :             memory,
     186            5 :             disk_content,
     187            5 :         };
     188            5 :         timelines.push(timeline);
     189              :     }
     190              : 
     191            5 :     let config = GlobalTimelines::get_global_config();
     192            5 : 
     193            5 :     Ok(Response {
     194            5 :         start_time,
     195            5 :         finish_time: Utc::now(),
     196            5 :         timelines,
     197            5 :         timelines_count,
     198            5 :         config: build_config(config),
     199            5 :     })
     200            5 : }
     201              : 
     202              : /// Builds DiskContent from a directory path. It can fail if the directory
     203              : /// is deleted between the time we get the path and the time we try to open it.
     204            5 : fn build_disk_content(path: &std::path::Path) -> Result<DiskContent> {
     205            5 :     let mut files = Vec::new();
     206           11 :     for entry in fs::read_dir(path)? {
     207           11 :         if entry.is_err() {
     208            0 :             continue;
     209           11 :         }
     210           11 :         let file = build_file_info(entry?);
     211           11 :         if file.is_err() {
     212            0 :             continue;
     213           11 :         }
     214           11 :         files.push(file?);
     215              :     }
     216              : 
     217            5 :     Ok(DiskContent { files })
     218            5 : }
     219              : 
     220              : /// Builds FileInfo from DirEntry. Sometimes it can return an error
     221              : /// if the file is deleted between the time we get the DirEntry
     222              : /// and the time we try to open it.
     223           11 : fn build_file_info(entry: DirEntry) -> Result<FileInfo> {
     224           11 :     let metadata = entry.metadata()?;
     225           11 :     let path = entry.path();
     226           11 :     let name = path
     227           11 :         .file_name()
     228           11 :         .and_then(|x| x.to_str())
     229           11 :         .unwrap_or("")
     230           11 :         .to_owned();
     231           11 :     let mut file = fs::File::open(path)?;
     232    100664508 :     let mut reader = BufReader::new(&mut file).bytes().filter_map(|x| x.ok());
     233           11 : 
     234     54272705 :     let start_zeroes = reader.by_ref().take_while(|&x| x == 0).count() as u64;
     235           11 :     let mut end_zeroes = 0;
     236     46391814 :     for b in reader {
     237     46391803 :         if b == 0 {
     238     38630326 :             end_zeroes += 1;
     239     38630326 :         } else {
     240      7761477 :             end_zeroes = 0;
     241      7761477 :         }
     242              :     }
     243              : 
     244              :     Ok(FileInfo {
     245           11 :         name,
     246           11 :         size: metadata.len(),
     247           11 :         created: DateTime::from(metadata.created()?),
     248           11 :         modified: DateTime::from(metadata.modified()?),
     249           11 :         start_zeroes,
     250           11 :         end_zeroes,
     251              :     })
     252           11 : }
     253              : 
     254              : /// Converts SafeKeeperConf to Config, filtering out the fields that are not
     255              : /// supposed to be exposed.
     256            5 : fn build_config(config: SafeKeeperConf) -> Config {
     257            5 :     Config {
     258            5 :         id: config.my_id,
     259            5 :         workdir: config.workdir,
     260            5 :         listen_pg_addr: config.listen_pg_addr,
     261            5 :         listen_http_addr: config.listen_http_addr,
     262            5 :         no_sync: config.no_sync,
     263            5 :         max_offloader_lag_bytes: config.max_offloader_lag_bytes,
     264            5 :         wal_backup_enabled: config.wal_backup_enabled,
     265            5 :     }
     266            5 : }
        

Generated by: LCOV version 2.1-beta