Line data Source code
1 : //! Utils for dumping full state of the safekeeper.
2 :
3 : use std::fs;
4 : use std::fs::DirEntry;
5 : use std::io::BufReader;
6 : use std::io::Read;
7 : use std::path::PathBuf;
8 :
9 : use anyhow::Result;
10 : use chrono::{DateTime, Utc};
11 : use postgres_ffi::XLogSegNo;
12 : use serde::Deserialize;
13 : use serde::Serialize;
14 :
15 : use serde_with::{serde_as, DisplayFromStr};
16 : use utils::id::NodeId;
17 : use utils::id::TenantTimelineId;
18 : use utils::id::{TenantId, TimelineId};
19 : use utils::lsn::Lsn;
20 :
21 : use crate::safekeeper::SafeKeeperState;
22 : use crate::safekeeper::SafekeeperMemState;
23 : use crate::safekeeper::TermHistory;
24 : use crate::SafeKeeperConf;
25 :
26 : use crate::send_wal::WalSenderState;
27 : use crate::GlobalTimelines;
28 :
29 : /// Various filters that influence the resulting JSON output.
30 0 : #[derive(Debug, Serialize, Deserialize)]
31 : pub struct Args {
32 : /// Dump all available safekeeper state. False by default.
33 : pub dump_all: bool,
34 :
35 : /// Dump control_file content. Uses value of `dump_all` by default.
36 : pub dump_control_file: bool,
37 :
38 : /// Dump in-memory state. Uses value of `dump_all` by default.
39 : pub dump_memory: bool,
40 :
41 : /// Dump all disk files in a timeline directory. Uses value of `dump_all` by default.
42 : pub dump_disk_content: bool,
43 :
44 : /// Dump full term history. True by default.
45 : pub dump_term_history: bool,
46 :
47 : /// Filter timelines by tenant_id.
48 : pub tenant_id: Option<TenantId>,
49 :
50 : /// Filter timelines by timeline_id.
51 : pub timeline_id: Option<TimelineId>,
52 : }
53 :
54 : /// Response for debug dump request.
55 11 : #[derive(Debug, Serialize, Deserialize)]
56 : pub struct Response {
57 : pub start_time: DateTime<Utc>,
58 : pub finish_time: DateTime<Utc>,
59 : pub timelines: Vec<Timeline>,
60 : pub timelines_count: usize,
61 : pub config: Config,
62 : }
63 :
64 : /// Safekeeper configuration.
65 15 : #[derive(Debug, Serialize, Deserialize)]
66 : pub struct Config {
67 : pub id: NodeId,
68 : pub workdir: PathBuf,
69 : pub listen_pg_addr: String,
70 : pub listen_http_addr: String,
71 : pub no_sync: bool,
72 : pub max_offloader_lag_bytes: u64,
73 : pub wal_backup_enabled: bool,
74 : }
75 :
76 : #[serde_as]
77 13 : #[derive(Debug, Serialize, Deserialize)]
78 : pub struct Timeline {
79 : #[serde_as(as = "DisplayFromStr")]
80 : pub tenant_id: TenantId,
81 : #[serde_as(as = "DisplayFromStr")]
82 : pub timeline_id: TimelineId,
83 : pub control_file: Option<SafeKeeperState>,
84 : pub memory: Option<Memory>,
85 : pub disk_content: Option<DiskContent>,
86 : }
87 :
88 27 : #[derive(Debug, Serialize, Deserialize)]
89 : pub struct Memory {
90 : pub is_cancelled: bool,
91 : pub peers_info_len: usize,
92 : pub walsenders: Vec<WalSenderState>,
93 : pub wal_backup_active: bool,
94 : pub active: bool,
95 : pub num_computes: u32,
96 : pub last_removed_segno: XLogSegNo,
97 : pub epoch_start_lsn: Lsn,
98 : pub mem_state: SafekeeperMemState,
99 :
100 : // PhysicalStorage state.
101 : pub write_lsn: Lsn,
102 : pub write_record_lsn: Lsn,
103 : pub flush_lsn: Lsn,
104 : pub file_open: bool,
105 : }
106 :
107 5 : #[derive(Debug, Serialize, Deserialize)]
108 : pub struct DiskContent {
109 : pub files: Vec<FileInfo>,
110 : }
111 :
112 39 : #[derive(Debug, Serialize, Deserialize)]
113 : pub struct FileInfo {
114 : pub name: String,
115 : pub size: u64,
116 : pub created: DateTime<Utc>,
117 : pub modified: DateTime<Utc>,
118 : pub start_zeroes: u64,
119 : pub end_zeroes: u64,
120 : // TODO: add sha256 checksum
121 : }
122 :
123 : /// Build debug dump response, using the provided [`Args`] filters.
124 5 : pub async fn build(args: Args) -> Result<Response> {
125 5 : let start_time = Utc::now();
126 5 : let timelines_count = GlobalTimelines::timelines_count();
127 :
128 5 : let ptrs_snapshot = if args.tenant_id.is_some() && args.timeline_id.is_some() {
129 : // If both tenant_id and timeline_id are specified, we can just get the
130 : // timeline directly, without taking a snapshot of the whole list.
131 1 : let ttid = TenantTimelineId::new(args.tenant_id.unwrap(), args.timeline_id.unwrap());
132 1 : if let Ok(tli) = GlobalTimelines::get(ttid) {
133 1 : vec![tli]
134 : } else {
135 0 : vec![]
136 : }
137 : } else {
138 : // Otherwise, take a snapshot of the whole list.
139 4 : GlobalTimelines::get_all()
140 : };
141 :
142 : // TODO: return Stream instead of Vec
143 5 : let mut timelines = Vec::new();
144 10 : for tli in ptrs_snapshot {
145 5 : let ttid = tli.ttid;
146 5 : if let Some(tenant_id) = args.tenant_id {
147 1 : if tenant_id != ttid.tenant_id {
148 0 : continue;
149 1 : }
150 4 : }
151 5 : if let Some(timeline_id) = args.timeline_id {
152 1 : if timeline_id != ttid.timeline_id {
153 0 : continue;
154 1 : }
155 4 : }
156 :
157 5 : let control_file = if args.dump_control_file {
158 5 : let mut state = tli.get_state().await.1;
159 5 : if !args.dump_term_history {
160 0 : state.acceptor_state.term_history = TermHistory(vec![]);
161 5 : }
162 5 : Some(state)
163 : } else {
164 0 : None
165 : };
166 :
167 5 : let memory = if args.dump_memory {
168 5 : Some(tli.memory_dump().await)
169 : } else {
170 0 : None
171 : };
172 :
173 5 : let disk_content = if args.dump_disk_content {
174 : // build_disk_content can fail, but we don't want to fail the whole
175 : // request because of that.
176 5 : build_disk_content(&tli.timeline_dir).ok()
177 : } else {
178 0 : None
179 : };
180 :
181 5 : let timeline = Timeline {
182 5 : tenant_id: ttid.tenant_id,
183 5 : timeline_id: ttid.timeline_id,
184 5 : control_file,
185 5 : memory,
186 5 : disk_content,
187 5 : };
188 5 : timelines.push(timeline);
189 : }
190 :
191 5 : let config = GlobalTimelines::get_global_config();
192 5 :
193 5 : Ok(Response {
194 5 : start_time,
195 5 : finish_time: Utc::now(),
196 5 : timelines,
197 5 : timelines_count,
198 5 : config: build_config(config),
199 5 : })
200 5 : }
201 :
202 : /// Builds DiskContent from a directory path. It can fail if the directory
203 : /// is deleted between the time we get the path and the time we try to open it.
204 5 : fn build_disk_content(path: &std::path::Path) -> Result<DiskContent> {
205 5 : let mut files = Vec::new();
206 11 : for entry in fs::read_dir(path)? {
207 11 : if entry.is_err() {
208 0 : continue;
209 11 : }
210 11 : let file = build_file_info(entry?);
211 11 : if file.is_err() {
212 0 : continue;
213 11 : }
214 11 : files.push(file?);
215 : }
216 :
217 5 : Ok(DiskContent { files })
218 5 : }
219 :
220 : /// Builds FileInfo from DirEntry. Sometimes it can return an error
221 : /// if the file is deleted between the time we get the DirEntry
222 : /// and the time we try to open it.
223 11 : fn build_file_info(entry: DirEntry) -> Result<FileInfo> {
224 11 : let metadata = entry.metadata()?;
225 11 : let path = entry.path();
226 11 : let name = path
227 11 : .file_name()
228 11 : .and_then(|x| x.to_str())
229 11 : .unwrap_or("")
230 11 : .to_owned();
231 11 : let mut file = fs::File::open(path)?;
232 100664508 : let mut reader = BufReader::new(&mut file).bytes().filter_map(|x| x.ok());
233 11 :
234 54272705 : let start_zeroes = reader.by_ref().take_while(|&x| x == 0).count() as u64;
235 11 : let mut end_zeroes = 0;
236 46391814 : for b in reader {
237 46391803 : if b == 0 {
238 38630326 : end_zeroes += 1;
239 38630326 : } else {
240 7761477 : end_zeroes = 0;
241 7761477 : }
242 : }
243 :
244 : Ok(FileInfo {
245 11 : name,
246 11 : size: metadata.len(),
247 11 : created: DateTime::from(metadata.created()?),
248 11 : modified: DateTime::from(metadata.modified()?),
249 11 : start_zeroes,
250 11 : end_zeroes,
251 : })
252 11 : }
253 :
254 : /// Converts SafeKeeperConf to Config, filtering out the fields that are not
255 : /// supposed to be exposed.
256 5 : fn build_config(config: SafeKeeperConf) -> Config {
257 5 : Config {
258 5 : id: config.my_id,
259 5 : workdir: config.workdir,
260 5 : listen_pg_addr: config.listen_pg_addr,
261 5 : listen_http_addr: config.listen_http_addr,
262 5 : no_sync: config.no_sync,
263 5 : max_offloader_lag_bytes: config.max_offloader_lag_bytes,
264 5 : wal_backup_enabled: config.wal_backup_enabled,
265 5 : }
266 5 : }
|