Line data Source code
1 : use hyper::{Body, Request, Response, StatusCode, Uri};
2 : use once_cell::sync::Lazy;
3 : use serde::{Deserialize, Serialize};
4 : use std::collections::{HashMap, HashSet};
5 : use std::fmt;
6 : use std::io::Write as _;
7 : use std::str::FromStr;
8 : use std::sync::Arc;
9 : use storage_broker::proto::SafekeeperTimelineInfo;
10 : use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
11 : use tokio::sync::mpsc;
12 : use tokio::task;
13 : use tokio_stream::wrappers::ReceiverStream;
14 : use tokio_util::sync::CancellationToken;
15 : use tracing::{info_span, Instrument};
16 : use utils::failpoint_support::failpoints_handler;
17 : use utils::http::endpoint::{
18 : profile_cpu_handler, prometheus_metrics_handler, request_span, ChannelWriter,
19 : };
20 : use utils::http::request::parse_query_param;
21 :
22 : use postgres_ffi::WAL_SEGMENT_SIZE;
23 : use safekeeper_api::models::{SkTimelineInfo, TimelineCopyRequest};
24 : use safekeeper_api::models::{TimelineCreateRequest, TimelineTermBumpRequest};
25 : use utils::{
26 : auth::SwappableJwtAuth,
27 : http::{
28 : endpoint::{self, auth_middleware, check_permission_with},
29 : error::ApiError,
30 : json::{json_request, json_response},
31 : request::{ensure_no_body, parse_request_param},
32 : RequestExt, RouterBuilder,
33 : },
34 : id::{NodeId, TenantId, TenantTimelineId, TimelineId},
35 : lsn::Lsn,
36 : };
37 :
38 : use crate::debug_dump::TimelineDigestRequest;
39 : use crate::receive_wal::WalReceiverState;
40 : use crate::safekeeper::Term;
41 : use crate::safekeeper::{ServerInfo, TermLsn};
42 : use crate::send_wal::WalSenderState;
43 : use crate::timeline::PeerInfo;
44 : use crate::timelines_global_map::TimelineDeleteForceResult;
45 : use crate::GlobalTimelines;
46 : use crate::SafeKeeperConf;
47 : use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline};
48 :
49 : #[derive(Debug, Serialize)]
50 : struct SafekeeperStatus {
51 : id: NodeId,
52 : }
53 :
54 : /// Healthcheck handler.
55 0 : async fn status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
56 0 : check_permission(&request, None)?;
57 0 : let conf = get_conf(&request);
58 0 : let status = SafekeeperStatus { id: conf.my_id };
59 0 : json_response(StatusCode::OK, status)
60 0 : }
61 :
62 0 : fn get_conf(request: &Request<Body>) -> &SafeKeeperConf {
63 0 : request
64 0 : .data::<Arc<SafeKeeperConf>>()
65 0 : .expect("unknown state type")
66 0 : .as_ref()
67 0 : }
68 :
69 : /// Same as TermLsn, but serializes LSN using display serializer
70 : /// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response.
71 0 : #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
72 : pub struct TermSwitchApiEntry {
73 : pub term: Term,
74 : pub lsn: Lsn,
75 : }
76 :
77 : impl From<TermSwitchApiEntry> for TermLsn {
78 0 : fn from(api_val: TermSwitchApiEntry) -> Self {
79 0 : TermLsn {
80 0 : term: api_val.term,
81 0 : lsn: api_val.lsn,
82 0 : }
83 0 : }
84 : }
85 :
86 : /// Augment AcceptorState with last_log_term for convenience
87 0 : #[derive(Debug, Serialize, Deserialize)]
88 : pub struct AcceptorStateStatus {
89 : pub term: Term,
90 : pub epoch: Term, // aka last_log_term
91 : pub term_history: Vec<TermSwitchApiEntry>,
92 : }
93 :
94 : /// Info about timeline on safekeeper ready for reporting.
95 0 : #[derive(Debug, Serialize, Deserialize)]
96 : pub struct TimelineStatus {
97 : pub tenant_id: TenantId,
98 : pub timeline_id: TimelineId,
99 : pub acceptor_state: AcceptorStateStatus,
100 : pub pg_info: ServerInfo,
101 : pub flush_lsn: Lsn,
102 : pub timeline_start_lsn: Lsn,
103 : pub local_start_lsn: Lsn,
104 : pub commit_lsn: Lsn,
105 : pub backup_lsn: Lsn,
106 : pub peer_horizon_lsn: Lsn,
107 : pub remote_consistent_lsn: Lsn,
108 : pub peers: Vec<PeerInfo>,
109 : pub walsenders: Vec<WalSenderState>,
110 : pub walreceivers: Vec<WalReceiverState>,
111 : }
112 :
113 0 : fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Result<(), ApiError> {
114 0 : check_permission_with(request, |claims| {
115 0 : crate::auth::check_permission(claims, tenant_id)
116 0 : })
117 0 : }
118 :
119 : /// Deactivates all timelines for the tenant and removes its data directory.
120 : /// See `timeline_delete_handler`.
121 0 : async fn tenant_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
122 0 : let tenant_id = parse_request_param(&request, "tenant_id")?;
123 0 : let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
124 0 : check_permission(&request, Some(tenant_id))?;
125 0 : ensure_no_body(&mut request).await?;
126 : // FIXME: `delete_force_all_for_tenant` can return an error for multiple different reasons;
127 : // Using an `InternalServerError` should be fixed when the types support it
128 0 : let delete_info = GlobalTimelines::delete_force_all_for_tenant(&tenant_id, only_local)
129 0 : .await
130 0 : .map_err(ApiError::InternalServerError)?;
131 0 : json_response(
132 0 : StatusCode::OK,
133 0 : delete_info
134 0 : .iter()
135 0 : .map(|(ttid, resp)| (format!("{}", ttid.timeline_id), *resp))
136 0 : .collect::<HashMap<String, TimelineDeleteForceResult>>(),
137 0 : )
138 0 : }
139 :
140 0 : async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
141 0 : let request_data: TimelineCreateRequest = json_request(&mut request).await?;
142 :
143 0 : let ttid = TenantTimelineId {
144 0 : tenant_id: request_data.tenant_id,
145 0 : timeline_id: request_data.timeline_id,
146 0 : };
147 0 : check_permission(&request, Some(ttid.tenant_id))?;
148 :
149 0 : let server_info = ServerInfo {
150 0 : pg_version: request_data.pg_version,
151 0 : system_id: request_data.system_id.unwrap_or(0),
152 0 : wal_seg_size: request_data.wal_seg_size.unwrap_or(WAL_SEGMENT_SIZE as u32),
153 0 : };
154 0 : let local_start_lsn = request_data.local_start_lsn.unwrap_or_else(|| {
155 0 : request_data
156 0 : .commit_lsn
157 0 : .segment_lsn(server_info.wal_seg_size as usize)
158 0 : });
159 0 : GlobalTimelines::create(ttid, server_info, request_data.commit_lsn, local_start_lsn)
160 0 : .await
161 0 : .map_err(ApiError::InternalServerError)?;
162 :
163 0 : json_response(StatusCode::OK, ())
164 0 : }
165 :
166 : /// List all (not deleted) timelines.
167 : /// Note: it is possible to do the same with debug_dump.
168 0 : async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
169 0 : check_permission(&request, None)?;
170 0 : let res: Vec<TenantTimelineId> = GlobalTimelines::get_all()
171 0 : .iter()
172 0 : .map(|tli| tli.ttid)
173 0 : .collect();
174 0 : json_response(StatusCode::OK, res)
175 0 : }
176 :
177 : /// Report info about timeline.
178 0 : async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
179 0 : let ttid = TenantTimelineId::new(
180 0 : parse_request_param(&request, "tenant_id")?,
181 0 : parse_request_param(&request, "timeline_id")?,
182 : );
183 0 : check_permission(&request, Some(ttid.tenant_id))?;
184 :
185 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
186 0 : let (inmem, state) = tli.get_state().await;
187 0 : let flush_lsn = tli.get_flush_lsn().await;
188 :
189 0 : let last_log_term = state.acceptor_state.get_last_log_term(flush_lsn);
190 0 : let term_history = state
191 0 : .acceptor_state
192 0 : .term_history
193 0 : .0
194 0 : .into_iter()
195 0 : .map(|ts| TermSwitchApiEntry {
196 0 : term: ts.term,
197 0 : lsn: ts.lsn,
198 0 : })
199 0 : .collect();
200 0 : let acc_state = AcceptorStateStatus {
201 0 : term: state.acceptor_state.term,
202 0 : epoch: last_log_term,
203 0 : term_history,
204 0 : };
205 0 :
206 0 : let conf = get_conf(&request);
207 : // Note: we report in memory values which can be lost.
208 0 : let status = TimelineStatus {
209 0 : tenant_id: ttid.tenant_id,
210 0 : timeline_id: ttid.timeline_id,
211 0 : acceptor_state: acc_state,
212 0 : pg_info: state.server,
213 0 : flush_lsn,
214 0 : timeline_start_lsn: state.timeline_start_lsn,
215 0 : local_start_lsn: state.local_start_lsn,
216 0 : commit_lsn: inmem.commit_lsn,
217 0 : backup_lsn: inmem.backup_lsn,
218 0 : peer_horizon_lsn: inmem.peer_horizon_lsn,
219 0 : remote_consistent_lsn: inmem.remote_consistent_lsn,
220 0 : peers: tli.get_peers(conf).await,
221 0 : walsenders: tli.get_walsenders().get_all(),
222 0 : walreceivers: tli.get_walreceivers().get_all(),
223 0 : };
224 0 : json_response(StatusCode::OK, status)
225 0 : }
226 :
227 : /// Deactivates the timeline and removes its data directory.
228 0 : async fn timeline_delete_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
229 0 : let ttid = TenantTimelineId::new(
230 0 : parse_request_param(&request, "tenant_id")?,
231 0 : parse_request_param(&request, "timeline_id")?,
232 : );
233 0 : let only_local = parse_query_param(&request, "only_local")?.unwrap_or(false);
234 0 : check_permission(&request, Some(ttid.tenant_id))?;
235 0 : ensure_no_body(&mut request).await?;
236 : // FIXME: `delete_force` can fail from both internal errors and bad requests. Add better
237 : // error handling here when we're able to.
238 0 : let resp = GlobalTimelines::delete(&ttid, only_local)
239 0 : .await
240 0 : .map_err(ApiError::InternalServerError)?;
241 0 : json_response(StatusCode::OK, resp)
242 0 : }
243 :
244 : /// Pull timeline from peer safekeeper instances.
245 0 : async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
246 0 : check_permission(&request, None)?;
247 :
248 0 : let data: pull_timeline::Request = json_request(&mut request).await?;
249 0 : let conf = get_conf(&request);
250 :
251 0 : let resp = pull_timeline::handle_request(data, conf.sk_auth_token.clone())
252 0 : .await
253 0 : .map_err(ApiError::InternalServerError)?;
254 0 : json_response(StatusCode::OK, resp)
255 0 : }
256 :
257 : /// Stream tar archive with all timeline data.
258 0 : async fn timeline_snapshot_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
259 0 : let destination = parse_request_param(&request, "destination_id")?;
260 0 : let ttid = TenantTimelineId::new(
261 0 : parse_request_param(&request, "tenant_id")?,
262 0 : parse_request_param(&request, "timeline_id")?,
263 : );
264 0 : check_permission(&request, Some(ttid.tenant_id))?;
265 :
266 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
267 :
268 : // To stream the body use wrap_stream which wants Stream of Result<Bytes>,
269 : // so create the chan and write to it in another task.
270 0 : let (tx, rx) = mpsc::channel(1);
271 0 :
272 0 : let conf = get_conf(&request);
273 0 : task::spawn(pull_timeline::stream_snapshot(
274 0 : tli,
275 0 : conf.my_id,
276 0 : destination,
277 0 : tx,
278 0 : ));
279 0 :
280 0 : let rx_stream = ReceiverStream::new(rx);
281 0 : let body = Body::wrap_stream(rx_stream);
282 0 :
283 0 : let response = Response::builder()
284 0 : .status(200)
285 0 : .header(hyper::header::CONTENT_TYPE, "application/octet-stream")
286 0 : .body(body)
287 0 : .unwrap();
288 0 :
289 0 : Ok(response)
290 0 : }
291 :
292 0 : async fn timeline_copy_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
293 0 : check_permission(&request, None)?;
294 :
295 0 : let request_data: TimelineCopyRequest = json_request(&mut request).await?;
296 0 : let ttid = TenantTimelineId::new(
297 0 : parse_request_param(&request, "tenant_id")?,
298 0 : parse_request_param(&request, "source_timeline_id")?,
299 : );
300 :
301 0 : let source = GlobalTimelines::get(ttid)?;
302 :
303 0 : copy_timeline::handle_request(copy_timeline::Request{
304 0 : source,
305 0 : until_lsn: request_data.until_lsn,
306 0 : destination_ttid: TenantTimelineId::new(ttid.tenant_id, request_data.target_timeline_id),
307 0 : })
308 0 : .instrument(info_span!("copy_timeline", from=%ttid, to=%request_data.target_timeline_id, until_lsn=%request_data.until_lsn))
309 0 : .await
310 0 : .map_err(ApiError::InternalServerError)?;
311 :
312 0 : json_response(StatusCode::OK, ())
313 0 : }
314 :
315 0 : async fn patch_control_file_handler(
316 0 : mut request: Request<Body>,
317 0 : ) -> Result<Response<Body>, ApiError> {
318 0 : check_permission(&request, None)?;
319 :
320 0 : let ttid = TenantTimelineId::new(
321 0 : parse_request_param(&request, "tenant_id")?,
322 0 : parse_request_param(&request, "timeline_id")?,
323 : );
324 :
325 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
326 :
327 0 : let patch_request: patch_control_file::Request = json_request(&mut request).await?;
328 0 : let response = patch_control_file::handle_request(tli, patch_request)
329 0 : .await
330 0 : .map_err(ApiError::InternalServerError)?;
331 :
332 0 : json_response(StatusCode::OK, response)
333 0 : }
334 :
335 : /// Force persist control file.
336 0 : async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
337 0 : check_permission(&request, None)?;
338 :
339 0 : let ttid = TenantTimelineId::new(
340 0 : parse_request_param(&request, "tenant_id")?,
341 0 : parse_request_param(&request, "timeline_id")?,
342 : );
343 :
344 0 : let tli = GlobalTimelines::get(ttid)?;
345 0 : tli.write_shared_state()
346 0 : .await
347 : .sk
348 0 : .state_mut()
349 0 : .flush()
350 0 : .await
351 0 : .map_err(ApiError::InternalServerError)?;
352 0 : json_response(StatusCode::OK, ())
353 0 : }
354 :
355 0 : async fn timeline_digest_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
356 0 : let ttid = TenantTimelineId::new(
357 0 : parse_request_param(&request, "tenant_id")?,
358 0 : parse_request_param(&request, "timeline_id")?,
359 : );
360 0 : check_permission(&request, Some(ttid.tenant_id))?;
361 :
362 0 : let from_lsn: Option<Lsn> = parse_query_param(&request, "from_lsn")?;
363 0 : let until_lsn: Option<Lsn> = parse_query_param(&request, "until_lsn")?;
364 :
365 0 : let request = TimelineDigestRequest {
366 0 : from_lsn: from_lsn.ok_or(ApiError::BadRequest(anyhow::anyhow!(
367 0 : "from_lsn is required"
368 0 : )))?,
369 0 : until_lsn: until_lsn.ok_or(ApiError::BadRequest(anyhow::anyhow!(
370 0 : "until_lsn is required"
371 0 : )))?,
372 : };
373 :
374 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
375 0 : let tli = tli
376 0 : .wal_residence_guard()
377 0 : .await
378 0 : .map_err(ApiError::InternalServerError)?;
379 :
380 0 : let response = debug_dump::calculate_digest(&tli, request)
381 0 : .await
382 0 : .map_err(ApiError::InternalServerError)?;
383 0 : json_response(StatusCode::OK, response)
384 0 : }
385 :
386 : /// Unevict timeline and remove uploaded partial segment(s) from the remote storage.
387 : /// Successfull response returns list of segments existed before the deletion.
388 : /// Aimed for one-off usage not normally needed.
389 0 : async fn timeline_backup_partial_reset(request: Request<Body>) -> Result<Response<Body>, ApiError> {
390 0 : let ttid = TenantTimelineId::new(
391 0 : parse_request_param(&request, "tenant_id")?,
392 0 : parse_request_param(&request, "timeline_id")?,
393 : );
394 0 : check_permission(&request, Some(ttid.tenant_id))?;
395 :
396 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
397 :
398 0 : let response = tli
399 0 : .backup_partial_reset()
400 0 : .await
401 0 : .map_err(ApiError::InternalServerError)?;
402 0 : json_response(StatusCode::OK, response)
403 0 : }
404 :
405 : /// Make term at least as high as one in request. If one in request is None,
406 : /// increment current one.
407 0 : async fn timeline_term_bump_handler(
408 0 : mut request: Request<Body>,
409 0 : ) -> Result<Response<Body>, ApiError> {
410 0 : let ttid = TenantTimelineId::new(
411 0 : parse_request_param(&request, "tenant_id")?,
412 0 : parse_request_param(&request, "timeline_id")?,
413 : );
414 0 : check_permission(&request, Some(ttid.tenant_id))?;
415 :
416 0 : let request_data: TimelineTermBumpRequest = json_request(&mut request).await?;
417 :
418 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
419 0 : let response = tli
420 0 : .term_bump(request_data.term)
421 0 : .await
422 0 : .map_err(ApiError::InternalServerError)?;
423 :
424 0 : json_response(StatusCode::OK, response)
425 0 : }
426 :
427 : /// Used only in tests to hand craft required data.
428 0 : async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
429 0 : let ttid = TenantTimelineId::new(
430 0 : parse_request_param(&request, "tenant_id")?,
431 0 : parse_request_param(&request, "timeline_id")?,
432 : );
433 0 : check_permission(&request, Some(ttid.tenant_id))?;
434 0 : let sk_info: SkTimelineInfo = json_request(&mut request).await?;
435 0 : let proto_sk_info = SafekeeperTimelineInfo {
436 0 : safekeeper_id: 0,
437 0 : tenant_timeline_id: Some(ProtoTenantTimelineId {
438 0 : tenant_id: ttid.tenant_id.as_ref().to_owned(),
439 0 : timeline_id: ttid.timeline_id.as_ref().to_owned(),
440 0 : }),
441 0 : term: sk_info.term.unwrap_or(0),
442 0 : last_log_term: sk_info.last_log_term.unwrap_or(0),
443 0 : flush_lsn: sk_info.flush_lsn.0,
444 0 : commit_lsn: sk_info.commit_lsn.0,
445 0 : remote_consistent_lsn: sk_info.remote_consistent_lsn.0,
446 0 : peer_horizon_lsn: sk_info.peer_horizon_lsn.0,
447 0 : safekeeper_connstr: sk_info.safekeeper_connstr.unwrap_or_else(|| "".to_owned()),
448 0 : http_connstr: sk_info.http_connstr.unwrap_or_else(|| "".to_owned()),
449 0 : backup_lsn: sk_info.backup_lsn.0,
450 0 : local_start_lsn: sk_info.local_start_lsn.0,
451 0 : availability_zone: None,
452 0 : standby_horizon: sk_info.standby_horizon.0,
453 0 : };
454 :
455 0 : let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
456 0 : tli.record_safekeeper_info(proto_sk_info)
457 0 : .await
458 0 : .map_err(ApiError::InternalServerError)?;
459 :
460 0 : json_response(StatusCode::OK, ())
461 0 : }
462 :
463 0 : fn parse_kv_str<E: fmt::Display, T: FromStr<Err = E>>(k: &str, v: &str) -> Result<T, ApiError> {
464 0 : v.parse()
465 0 : .map_err(|e| ApiError::BadRequest(anyhow::anyhow!("cannot parse {k}: {e}")))
466 0 : }
467 :
468 : /// Dump debug info about all available safekeeper state.
469 0 : async fn dump_debug_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
470 0 : check_permission(&request, None)?;
471 0 : ensure_no_body(&mut request).await?;
472 :
473 0 : let mut dump_all: Option<bool> = None;
474 0 : let mut dump_control_file: Option<bool> = None;
475 0 : let mut dump_memory: Option<bool> = None;
476 0 : let mut dump_disk_content: Option<bool> = None;
477 0 : let mut dump_term_history: Option<bool> = None;
478 0 : let mut dump_wal_last_modified: Option<bool> = None;
479 0 : let mut tenant_id: Option<TenantId> = None;
480 0 : let mut timeline_id: Option<TimelineId> = None;
481 0 :
482 0 : let query = request.uri().query().unwrap_or("");
483 0 : let mut values = url::form_urlencoded::parse(query.as_bytes());
484 :
485 0 : for (k, v) in &mut values {
486 0 : match k.as_ref() {
487 0 : "dump_all" => dump_all = Some(parse_kv_str(&k, &v)?),
488 0 : "dump_control_file" => dump_control_file = Some(parse_kv_str(&k, &v)?),
489 0 : "dump_memory" => dump_memory = Some(parse_kv_str(&k, &v)?),
490 0 : "dump_disk_content" => dump_disk_content = Some(parse_kv_str(&k, &v)?),
491 0 : "dump_term_history" => dump_term_history = Some(parse_kv_str(&k, &v)?),
492 0 : "dump_wal_last_modified" => dump_wal_last_modified = Some(parse_kv_str(&k, &v)?),
493 0 : "tenant_id" => tenant_id = Some(parse_kv_str(&k, &v)?),
494 0 : "timeline_id" => timeline_id = Some(parse_kv_str(&k, &v)?),
495 0 : _ => Err(ApiError::BadRequest(anyhow::anyhow!(
496 0 : "Unknown query parameter: {}",
497 0 : k
498 0 : )))?,
499 : }
500 : }
501 :
502 0 : let dump_all = dump_all.unwrap_or(false);
503 0 : let dump_control_file = dump_control_file.unwrap_or(dump_all);
504 0 : let dump_memory = dump_memory.unwrap_or(dump_all);
505 0 : let dump_disk_content = dump_disk_content.unwrap_or(dump_all);
506 0 : let dump_term_history = dump_term_history.unwrap_or(true);
507 0 : let dump_wal_last_modified = dump_wal_last_modified.unwrap_or(dump_all);
508 0 :
509 0 : let args = debug_dump::Args {
510 0 : dump_all,
511 0 : dump_control_file,
512 0 : dump_memory,
513 0 : dump_disk_content,
514 0 : dump_term_history,
515 0 : dump_wal_last_modified,
516 0 : tenant_id,
517 0 : timeline_id,
518 0 : };
519 :
520 0 : let resp = debug_dump::build(args)
521 0 : .await
522 0 : .map_err(ApiError::InternalServerError)?;
523 :
524 0 : let started_at = std::time::Instant::now();
525 0 :
526 0 : let (tx, rx) = mpsc::channel(1);
527 0 :
528 0 : let body = Body::wrap_stream(ReceiverStream::new(rx));
529 0 :
530 0 : let mut writer = ChannelWriter::new(128 * 1024, tx);
531 0 :
532 0 : let response = Response::builder()
533 0 : .status(200)
534 0 : .header(hyper::header::CONTENT_TYPE, "application/octet-stream")
535 0 : .body(body)
536 0 : .unwrap();
537 :
538 0 : let span = info_span!("blocking");
539 0 : tokio::task::spawn_blocking(move || {
540 0 : let _span = span.entered();
541 0 :
542 0 : let res = serde_json::to_writer(&mut writer, &resp)
543 0 : .map_err(std::io::Error::from)
544 0 : .and_then(|_| writer.flush());
545 0 :
546 0 : match res {
547 : Ok(()) => {
548 0 : tracing::info!(
549 0 : bytes = writer.flushed_bytes(),
550 0 : elapsed_ms = started_at.elapsed().as_millis(),
551 0 : "responded /v1/debug_dump"
552 : );
553 : }
554 0 : Err(e) => {
555 0 : tracing::warn!("failed to write out /v1/debug_dump response: {e:#}");
556 : // semantics of this error are quite... unclear. we want to error the stream out to
557 : // abort the response to somehow notify the client that we failed.
558 : //
559 : // though, most likely the reason for failure is that the receiver is already gone.
560 0 : drop(
561 0 : writer
562 0 : .tx
563 0 : .blocking_send(Err(std::io::ErrorKind::BrokenPipe.into())),
564 0 : );
565 : }
566 : }
567 0 : });
568 0 :
569 0 : Ok(response)
570 0 : }
571 :
572 : /// Safekeeper http router.
573 0 : pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
574 0 : let mut router = endpoint::make_router();
575 0 : if conf.http_auth.is_some() {
576 0 : router = router.middleware(auth_middleware(|request| {
577 : #[allow(clippy::mutable_key_type)]
578 0 : static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> = Lazy::new(|| {
579 0 : ["/v1/status", "/metrics", "/pprof/profile"]
580 0 : .iter()
581 0 : .map(|v| v.parse().unwrap())
582 0 : .collect()
583 0 : });
584 0 : if ALLOWLIST_ROUTES.contains(request.uri()) {
585 0 : None
586 : } else {
587 : // Option<Arc<SwappableJwtAuth>> is always provided as data below, hence unwrap().
588 0 : request
589 0 : .data::<Option<Arc<SwappableJwtAuth>>>()
590 0 : .unwrap()
591 0 : .as_deref()
592 : }
593 0 : }))
594 0 : }
595 :
596 : // NB: on any changes do not forget to update the OpenAPI spec
597 : // located nearby (/safekeeper/src/http/openapi_spec.yaml).
598 0 : let auth = conf.http_auth.clone();
599 0 : router
600 0 : .data(Arc::new(conf))
601 0 : .data(auth)
602 0 : .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
603 0 : .get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
604 0 : .get("/v1/status", |r| request_span(r, status_handler))
605 0 : .put("/v1/failpoints", |r| {
606 0 : request_span(r, move |r| async {
607 0 : check_permission(&r, None)?;
608 0 : let cancel = CancellationToken::new();
609 0 : failpoints_handler(r, cancel).await
610 0 : })
611 0 : })
612 0 : .delete("/v1/tenant/:tenant_id", |r| {
613 0 : request_span(r, tenant_delete_handler)
614 0 : })
615 0 : // Will be used in the future instead of implicit timeline creation
616 0 : .post("/v1/tenant/timeline", |r| {
617 0 : request_span(r, timeline_create_handler)
618 0 : })
619 0 : .get("/v1/tenant/timeline", |r| {
620 0 : request_span(r, timeline_list_handler)
621 0 : })
622 0 : .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
623 0 : request_span(r, timeline_status_handler)
624 0 : })
625 0 : .delete("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
626 0 : request_span(r, timeline_delete_handler)
627 0 : })
628 0 : .post("/v1/pull_timeline", |r| {
629 0 : request_span(r, timeline_pull_handler)
630 0 : })
631 0 : .get(
632 0 : "/v1/tenant/:tenant_id/timeline/:timeline_id/snapshot/:destination_id",
633 0 : |r| request_span(r, timeline_snapshot_handler),
634 0 : )
635 0 : .post(
636 0 : "/v1/tenant/:tenant_id/timeline/:source_timeline_id/copy",
637 0 : |r| request_span(r, timeline_copy_handler),
638 0 : )
639 0 : .patch(
640 0 : "/v1/tenant/:tenant_id/timeline/:timeline_id/control_file",
641 0 : |r| request_span(r, patch_control_file_handler),
642 0 : )
643 0 : .post(
644 0 : "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
645 0 : |r| request_span(r, timeline_checkpoint_handler),
646 0 : )
647 0 : .get("/v1/tenant/:tenant_id/timeline/:timeline_id/digest", |r| {
648 0 : request_span(r, timeline_digest_handler)
649 0 : })
650 0 : .post(
651 0 : "/v1/tenant/:tenant_id/timeline/:timeline_id/backup_partial_reset",
652 0 : |r| request_span(r, timeline_backup_partial_reset),
653 0 : )
654 0 : .post(
655 0 : "/v1/tenant/:tenant_id/timeline/:timeline_id/term_bump",
656 0 : |r| request_span(r, timeline_term_bump_handler),
657 0 : )
658 0 : .post("/v1/record_safekeeper_info/:tenant_id/:timeline_id", |r| {
659 0 : request_span(r, record_safekeeper_info)
660 0 : })
661 0 : .get("/v1/debug_dump", |r| request_span(r, dump_debug_handler))
662 0 : }
663 :
664 : #[cfg(test)]
665 : mod tests {
666 : use super::*;
667 :
668 : #[test]
669 1 : fn test_term_switch_entry_api_serialize() {
670 1 : let state = AcceptorStateStatus {
671 1 : term: 1,
672 1 : epoch: 1,
673 1 : term_history: vec![TermSwitchApiEntry {
674 1 : term: 1,
675 1 : lsn: Lsn(0x16FFDDDD),
676 1 : }],
677 1 : };
678 1 : let json = serde_json::to_string(&state).unwrap();
679 1 : assert_eq!(
680 1 : json,
681 1 : "{\"term\":1,\"epoch\":1,\"term_history\":[{\"term\":1,\"lsn\":\"0/16FFDDDD\"}]}"
682 1 : );
683 1 : }
684 : }
|