Line data Source code
1 : use crate::{background_process, local_env::LocalEnv};
2 : use camino::{Utf8Path, Utf8PathBuf};
3 : use hyper::Method;
4 : use pageserver_api::{
5 : models::{
6 : ShardParameters, TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse,
7 : TimelineCreateRequest, TimelineInfo,
8 : },
9 : shard::TenantShardId,
10 : };
11 : use pageserver_client::mgmt_api::ResponseErrorMessageExt;
12 : use postgres_backend::AuthType;
13 : use serde::{de::DeserializeOwned, Deserialize, Serialize};
14 : use std::str::FromStr;
15 : use tokio::process::Command;
16 : use tracing::instrument;
17 : use url::Url;
18 : use utils::{
19 : auth::{Claims, Scope},
20 : id::{NodeId, TenantId},
21 : };
22 :
23 : pub struct AttachmentService {
24 : env: LocalEnv,
25 : listen: String,
26 : path: Utf8PathBuf,
27 : jwt_token: Option<String>,
28 : public_key: Option<String>,
29 : postgres_port: u16,
30 : client: reqwest::Client,
31 : }
32 :
33 : const COMMAND: &str = "attachment_service";
34 :
35 : const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16;
36 :
37 0 : #[derive(Serialize, Deserialize)]
38 : pub struct AttachHookRequest {
39 : pub tenant_shard_id: TenantShardId,
40 : pub node_id: Option<NodeId>,
41 : }
42 :
43 0 : #[derive(Serialize, Deserialize)]
44 : pub struct AttachHookResponse {
45 : pub gen: Option<u32>,
46 : }
47 :
48 0 : #[derive(Serialize, Deserialize)]
49 : pub struct InspectRequest {
50 : pub tenant_shard_id: TenantShardId,
51 : }
52 :
53 0 : #[derive(Serialize, Deserialize)]
54 : pub struct InspectResponse {
55 : pub attachment: Option<(u32, NodeId)>,
56 : }
57 :
58 0 : #[derive(Serialize, Deserialize)]
59 : pub struct TenantCreateResponseShard {
60 : pub shard_id: TenantShardId,
61 : pub node_id: NodeId,
62 : pub generation: u32,
63 : }
64 :
65 0 : #[derive(Serialize, Deserialize)]
66 : pub struct TenantCreateResponse {
67 : pub shards: Vec<TenantCreateResponseShard>,
68 : }
69 :
70 0 : #[derive(Serialize, Deserialize)]
71 : pub struct NodeRegisterRequest {
72 : pub node_id: NodeId,
73 :
74 : pub listen_pg_addr: String,
75 : pub listen_pg_port: u16,
76 :
77 : pub listen_http_addr: String,
78 : pub listen_http_port: u16,
79 : }
80 :
81 0 : #[derive(Serialize, Deserialize)]
82 : pub struct NodeConfigureRequest {
83 : pub node_id: NodeId,
84 :
85 : pub availability: Option<NodeAvailability>,
86 : pub scheduling: Option<NodeSchedulingPolicy>,
87 : }
88 :
89 0 : #[derive(Serialize, Deserialize, Debug)]
90 : pub struct TenantLocateResponseShard {
91 : pub shard_id: TenantShardId,
92 : pub node_id: NodeId,
93 :
94 : pub listen_pg_addr: String,
95 : pub listen_pg_port: u16,
96 :
97 : pub listen_http_addr: String,
98 : pub listen_http_port: u16,
99 : }
100 :
101 0 : #[derive(Serialize, Deserialize)]
102 : pub struct TenantLocateResponse {
103 : pub shards: Vec<TenantLocateResponseShard>,
104 : pub shard_params: ShardParameters,
105 : }
106 :
107 : /// Explicitly migrating a particular shard is a low level operation
108 : /// TODO: higher level "Reschedule tenant" operation where the request
109 : /// specifies some constraints, e.g. asking it to get off particular node(s)
110 0 : #[derive(Serialize, Deserialize, Debug)]
111 : pub struct TenantShardMigrateRequest {
112 : pub tenant_shard_id: TenantShardId,
113 : pub node_id: NodeId,
114 : }
115 :
116 0 : #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
117 : pub enum NodeAvailability {
118 : // Normal, happy state
119 : Active,
120 : // Offline: Tenants shouldn't try to attach here, but they may assume that their
121 : // secondary locations on this node still exist. Newly added nodes are in this
122 : // state until we successfully contact them.
123 : Offline,
124 : }
125 :
126 : impl FromStr for NodeAvailability {
127 : type Err = anyhow::Error;
128 :
129 0 : fn from_str(s: &str) -> Result<Self, Self::Err> {
130 0 : match s {
131 0 : "active" => Ok(Self::Active),
132 0 : "offline" => Ok(Self::Offline),
133 0 : _ => Err(anyhow::anyhow!("Unknown availability state '{s}'")),
134 : }
135 0 : }
136 : }
137 :
138 : /// FIXME: this is a duplicate of the type in the attachment_service crate, because the
139 : /// type needs to be defined with diesel traits in there.
140 0 : #[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq)]
141 : pub enum NodeSchedulingPolicy {
142 : Active,
143 : Filling,
144 : Pause,
145 : Draining,
146 : }
147 :
148 : impl FromStr for NodeSchedulingPolicy {
149 : type Err = anyhow::Error;
150 :
151 0 : fn from_str(s: &str) -> Result<Self, Self::Err> {
152 0 : match s {
153 0 : "active" => Ok(Self::Active),
154 0 : "filling" => Ok(Self::Filling),
155 0 : "pause" => Ok(Self::Pause),
156 0 : "draining" => Ok(Self::Draining),
157 0 : _ => Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
158 : }
159 0 : }
160 : }
161 :
162 : impl From<NodeSchedulingPolicy> for String {
163 0 : fn from(value: NodeSchedulingPolicy) -> String {
164 0 : use NodeSchedulingPolicy::*;
165 0 : match value {
166 0 : Active => "active",
167 0 : Filling => "filling",
168 0 : Pause => "pause",
169 0 : Draining => "draining",
170 : }
171 0 : .to_string()
172 0 : }
173 : }
174 :
175 0 : #[derive(Serialize, Deserialize, Debug)]
176 : pub struct TenantShardMigrateResponse {}
177 :
178 : impl AttachmentService {
179 0 : pub fn from_env(env: &LocalEnv) -> Self {
180 0 : let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
181 0 : .unwrap()
182 0 : .join("attachments.json");
183 0 :
184 0 : // Makes no sense to construct this if pageservers aren't going to use it: assume
185 0 : // pageservers have control plane API set
186 0 : let listen_url = env.control_plane_api.clone().unwrap();
187 0 :
188 0 : let listen = format!(
189 0 : "{}:{}",
190 0 : listen_url.host_str().unwrap(),
191 0 : listen_url.port().unwrap()
192 0 : );
193 0 :
194 0 : // Convention: NeonEnv in python tests reserves the next port after the control_plane_api
195 0 : // port, for use by our captive postgres.
196 0 : let postgres_port = listen_url
197 0 : .port()
198 0 : .expect("Control plane API setting should always have a port")
199 0 : + 1;
200 0 :
201 0 : // Assume all pageservers have symmetric auth configuration: this service
202 0 : // expects to use one JWT token to talk to all of them.
203 0 : let ps_conf = env
204 0 : .pageservers
205 0 : .first()
206 0 : .expect("Config is validated to contain at least one pageserver");
207 0 : let (jwt_token, public_key) = match ps_conf.http_auth_type {
208 0 : AuthType::Trust => (None, None),
209 : AuthType::NeonJWT => {
210 0 : let jwt_token = env
211 0 : .generate_auth_token(&Claims::new(None, Scope::PageServerApi))
212 0 : .unwrap();
213 0 :
214 0 : // If pageserver auth is enabled, this implicitly enables auth for this service,
215 0 : // using the same credentials.
216 0 : let public_key_path =
217 0 : camino::Utf8PathBuf::try_from(env.base_data_dir.join("auth_public_key.pem"))
218 0 : .unwrap();
219 :
220 : // This service takes keys as a string rather than as a path to a file/dir: read the key into memory.
221 0 : let public_key = if std::fs::metadata(&public_key_path)
222 0 : .expect("Can't stat public key")
223 0 : .is_dir()
224 : {
225 : // Our config may specify a directory: this is for the pageserver's ability to handle multiple
226 : // keys. We only use one key at a time, so, arbitrarily load the first one in the directory.
227 0 : let mut dir =
228 0 : std::fs::read_dir(&public_key_path).expect("Can't readdir public key path");
229 0 : let dent = dir
230 0 : .next()
231 0 : .expect("Empty key dir")
232 0 : .expect("Error reading key dir");
233 0 :
234 0 : std::fs::read_to_string(dent.path()).expect("Can't read public key")
235 : } else {
236 0 : std::fs::read_to_string(&public_key_path).expect("Can't read public key")
237 : };
238 0 : (Some(jwt_token), Some(public_key))
239 : }
240 : };
241 :
242 0 : Self {
243 0 : env: env.clone(),
244 0 : path,
245 0 : listen,
246 0 : jwt_token,
247 0 : public_key,
248 0 : postgres_port,
249 0 : client: reqwest::ClientBuilder::new()
250 0 : .build()
251 0 : .expect("Failed to construct http client"),
252 0 : }
253 0 : }
254 :
255 0 : fn pid_file(&self) -> Utf8PathBuf {
256 0 : Utf8PathBuf::from_path_buf(self.env.base_data_dir.join("attachment_service.pid"))
257 0 : .expect("non-Unicode path")
258 0 : }
259 :
260 : /// PIDFile for the postgres instance used to store attachment service state
261 0 : fn postgres_pid_file(&self) -> Utf8PathBuf {
262 0 : Utf8PathBuf::from_path_buf(
263 0 : self.env
264 0 : .base_data_dir
265 0 : .join("attachment_service_postgres.pid"),
266 0 : )
267 0 : .expect("non-Unicode path")
268 0 : }
269 :
270 : /// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
271 : ///
272 : /// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back
273 : /// to other versions if that one isn't found. Some automated tests create circumstances
274 : /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
275 0 : pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
276 0 : let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14];
277 :
278 0 : for v in prefer_versions {
279 0 : let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
280 0 : if tokio::fs::try_exists(&path).await? {
281 0 : return Ok(path);
282 0 : }
283 : }
284 :
285 : // Fall through
286 0 : anyhow::bail!(
287 0 : "Postgres binaries not found in {}",
288 0 : self.env.pg_distrib_dir.display()
289 0 : );
290 0 : }
291 :
292 : /// Readiness check for our postgres process
293 0 : async fn pg_isready(&self, pg_bin_dir: &Utf8Path) -> anyhow::Result<bool> {
294 0 : let bin_path = pg_bin_dir.join("pg_isready");
295 0 : let args = ["-h", "localhost", "-p", &format!("{}", self.postgres_port)];
296 0 : let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
297 :
298 0 : Ok(exitcode.success())
299 0 : }
300 :
301 : /// Create our database if it doesn't exist, and run migrations.
302 : ///
303 : /// This function is equivalent to the `diesel setup` command in the diesel CLI. We implement
304 : /// the same steps by hand to avoid imposing a dependency on installing diesel-cli for developers
305 : /// who just want to run `cargo neon_local` without knowing about diesel.
306 : ///
307 : /// Returns the database url
308 0 : pub async fn setup_database(&self) -> anyhow::Result<String> {
309 0 : const DB_NAME: &str = "attachment_service";
310 0 : let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);
311 :
312 0 : let pg_bin_dir = self.get_pg_bin_dir().await?;
313 0 : let createdb_path = pg_bin_dir.join("createdb");
314 0 : let output = Command::new(&createdb_path)
315 0 : .args([
316 0 : "-h",
317 0 : "localhost",
318 0 : "-p",
319 0 : &format!("{}", self.postgres_port),
320 0 : &DB_NAME,
321 0 : ])
322 0 : .output()
323 0 : .await
324 0 : .expect("Failed to spawn createdb");
325 0 :
326 0 : if !output.status.success() {
327 0 : let stderr = String::from_utf8(output.stderr).expect("Non-UTF8 output from createdb");
328 0 : if stderr.contains("already exists") {
329 0 : tracing::info!("Database {DB_NAME} already exists");
330 : } else {
331 0 : anyhow::bail!("createdb failed with status {}: {stderr}", output.status);
332 : }
333 0 : }
334 :
335 0 : Ok(database_url)
336 0 : }
337 :
338 0 : pub async fn start(&self) -> anyhow::Result<()> {
339 0 : // Start a vanilla Postgres process used by the attachment service for persistence.
340 0 : let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
341 0 : .unwrap()
342 0 : .join("attachment_service_db");
343 0 : let pg_bin_dir = self.get_pg_bin_dir().await?;
344 0 : let pg_log_path = pg_data_path.join("postgres.log");
345 0 :
346 0 : if !tokio::fs::try_exists(&pg_data_path).await? {
347 : // Initialize empty database
348 0 : let initdb_path = pg_bin_dir.join("initdb");
349 0 : let mut child = Command::new(&initdb_path)
350 0 : .args(["-D", pg_data_path.as_ref()])
351 0 : .spawn()
352 0 : .expect("Failed to spawn initdb");
353 0 : let status = child.wait().await?;
354 0 : if !status.success() {
355 0 : anyhow::bail!("initdb failed with status {status}");
356 0 : }
357 0 :
358 0 : tokio::fs::write(
359 0 : &pg_data_path.join("postgresql.conf"),
360 0 : format!("port = {}", self.postgres_port),
361 0 : )
362 0 : .await?;
363 0 : };
364 :
365 0 : println!("Starting attachment service database...");
366 0 : let db_start_args = [
367 0 : "-w",
368 0 : "-D",
369 0 : pg_data_path.as_ref(),
370 0 : "-l",
371 0 : pg_log_path.as_ref(),
372 0 : "start",
373 0 : ];
374 0 :
375 0 : background_process::start_process(
376 0 : "attachment_service_db",
377 0 : &self.env.base_data_dir,
378 0 : pg_bin_dir.join("pg_ctl").as_std_path(),
379 0 : db_start_args,
380 0 : [],
381 0 : background_process::InitialPidFile::Create(self.postgres_pid_file()),
382 0 : || self.pg_isready(&pg_bin_dir),
383 0 : )
384 0 : .await?;
385 :
386 : // Run migrations on every startup, in case something changed.
387 0 : let database_url = self.setup_database().await?;
388 :
389 0 : let mut args = vec![
390 0 : "-l",
391 0 : &self.listen,
392 0 : "-p",
393 0 : self.path.as_ref(),
394 0 : "--database-url",
395 0 : &database_url,
396 0 : ]
397 0 : .into_iter()
398 0 : .map(|s| s.to_string())
399 0 : .collect::<Vec<_>>();
400 0 : if let Some(jwt_token) = &self.jwt_token {
401 0 : args.push(format!("--jwt-token={jwt_token}"));
402 0 : }
403 :
404 0 : if let Some(public_key) = &self.public_key {
405 0 : args.push(format!("--public-key=\"{public_key}\""));
406 0 : }
407 :
408 0 : if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api {
409 0 : args.push(format!(
410 0 : "--compute-hook-url={control_plane_compute_hook_api}"
411 0 : ));
412 0 : }
413 :
414 0 : background_process::start_process(
415 0 : COMMAND,
416 0 : &self.env.base_data_dir,
417 0 : &self.env.attachment_service_bin(),
418 0 : args,
419 0 : [(
420 0 : "NEON_REPO_DIR".to_string(),
421 0 : self.env.base_data_dir.to_string_lossy().to_string(),
422 0 : )],
423 0 : background_process::InitialPidFile::Create(self.pid_file()),
424 0 : || async {
425 0 : match self.status().await {
426 0 : Ok(_) => Ok(true),
427 0 : Err(_) => Ok(false),
428 : }
429 0 : },
430 0 : )
431 0 : .await?;
432 :
433 0 : Ok(())
434 0 : }
435 :
436 0 : pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
437 0 : background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
438 :
439 0 : let pg_data_path = self.env.base_data_dir.join("attachment_service_db");
440 0 : let pg_bin_dir = self.get_pg_bin_dir().await?;
441 :
442 0 : println!("Stopping attachment service database...");
443 0 : let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
444 0 : let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
445 0 : .args(pg_stop_args)
446 0 : .spawn()?
447 0 : .wait()
448 0 : .await?;
449 0 : if !stop_status.success() {
450 0 : let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
451 0 : let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
452 0 : .args(pg_status_args)
453 0 : .spawn()?
454 0 : .wait()
455 0 : .await?;
456 :
457 : // pg_ctl status returns this exit code if postgres is not running: in this case it is
458 : // fine that stop failed. Otherwise it is an error that stop failed.
459 : const PG_STATUS_NOT_RUNNING: i32 = 3;
460 0 : if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
461 0 : println!("Attachment service data base is already stopped");
462 0 : return Ok(());
463 : } else {
464 0 : anyhow::bail!("Failed to stop attachment service database: {stop_status}")
465 : }
466 0 : }
467 0 :
468 0 : Ok(())
469 0 : }
470 :
471 : /// Simple HTTP request wrapper for calling into attachment service
472 0 : async fn dispatch<RQ, RS>(
473 0 : &self,
474 0 : method: hyper::Method,
475 0 : path: String,
476 0 : body: Option<RQ>,
477 0 : ) -> anyhow::Result<RS>
478 0 : where
479 0 : RQ: Serialize + Sized,
480 0 : RS: DeserializeOwned + Sized,
481 0 : {
482 0 : // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
483 0 : // for general purpose API access.
484 0 : let listen_url = self.env.control_plane_api.clone().unwrap();
485 0 : let url = Url::from_str(&format!(
486 0 : "http://{}:{}/{path}",
487 0 : listen_url.host_str().unwrap(),
488 0 : listen_url.port().unwrap()
489 0 : ))
490 0 : .unwrap();
491 0 :
492 0 : let mut builder = self.client.request(method, url);
493 0 : if let Some(body) = body {
494 0 : builder = builder.json(&body)
495 0 : }
496 0 : if let Some(jwt_token) = &self.jwt_token {
497 0 : builder = builder.header(
498 0 : reqwest::header::AUTHORIZATION,
499 0 : format!("Bearer {jwt_token}"),
500 0 : );
501 0 : }
502 :
503 0 : let response = builder.send().await?;
504 0 : let response = response.error_from_body().await?;
505 :
506 0 : Ok(response
507 0 : .json()
508 0 : .await
509 0 : .map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
510 0 : }
511 :
512 : /// Call into the attach_hook API, for use before handing out attachments to pageservers
513 0 : #[instrument(skip(self))]
514 : pub async fn attach_hook(
515 : &self,
516 : tenant_shard_id: TenantShardId,
517 : pageserver_id: NodeId,
518 : ) -> anyhow::Result<Option<u32>> {
519 : let request = AttachHookRequest {
520 : tenant_shard_id,
521 : node_id: Some(pageserver_id),
522 : };
523 :
524 : let response = self
525 : .dispatch::<_, AttachHookResponse>(
526 : Method::POST,
527 : "debug/v1/attach-hook".to_string(),
528 : Some(request),
529 : )
530 : .await?;
531 :
532 : Ok(response.gen)
533 : }
534 :
535 0 : #[instrument(skip(self))]
536 : pub async fn inspect(
537 : &self,
538 : tenant_shard_id: TenantShardId,
539 : ) -> anyhow::Result<Option<(u32, NodeId)>> {
540 : let request = InspectRequest { tenant_shard_id };
541 :
542 : let response = self
543 : .dispatch::<_, InspectResponse>(
544 : Method::POST,
545 : "debug/v1/inspect".to_string(),
546 : Some(request),
547 : )
548 : .await?;
549 :
550 : Ok(response.attachment)
551 : }
552 :
553 0 : #[instrument(skip(self))]
554 : pub async fn tenant_create(
555 : &self,
556 : req: TenantCreateRequest,
557 : ) -> anyhow::Result<TenantCreateResponse> {
558 : self.dispatch(Method::POST, "v1/tenant".to_string(), Some(req))
559 : .await
560 : }
561 :
562 0 : #[instrument(skip(self))]
563 : pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {
564 : self.dispatch::<(), _>(
565 : Method::GET,
566 : format!("control/v1/tenant/{tenant_id}/locate"),
567 : None,
568 : )
569 : .await
570 : }
571 :
572 0 : #[instrument(skip(self))]
573 : pub async fn tenant_migrate(
574 : &self,
575 : tenant_shard_id: TenantShardId,
576 : node_id: NodeId,
577 : ) -> anyhow::Result<TenantShardMigrateResponse> {
578 : self.dispatch(
579 : Method::PUT,
580 : format!("control/v1/tenant/{tenant_shard_id}/migrate"),
581 : Some(TenantShardMigrateRequest {
582 : tenant_shard_id,
583 : node_id,
584 : }),
585 : )
586 : .await
587 : }
588 :
589 0 : #[instrument(skip(self), fields(%tenant_id, %new_shard_count))]
590 : pub async fn tenant_split(
591 : &self,
592 : tenant_id: TenantId,
593 : new_shard_count: u8,
594 : ) -> anyhow::Result<TenantShardSplitResponse> {
595 : self.dispatch(
596 : Method::PUT,
597 : format!("control/v1/tenant/{tenant_id}/shard_split"),
598 : Some(TenantShardSplitRequest { new_shard_count }),
599 : )
600 : .await
601 : }
602 :
603 0 : #[instrument(skip_all, fields(node_id=%req.node_id))]
604 : pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> {
605 : self.dispatch::<_, ()>(Method::POST, "control/v1/node".to_string(), Some(req))
606 : .await
607 : }
608 :
609 0 : #[instrument(skip_all, fields(node_id=%req.node_id))]
610 : pub async fn node_configure(&self, req: NodeConfigureRequest) -> anyhow::Result<()> {
611 : self.dispatch::<_, ()>(
612 : Method::PUT,
613 : format!("control/v1/node/{}/config", req.node_id),
614 : Some(req),
615 : )
616 : .await
617 : }
618 :
619 0 : #[instrument(skip(self))]
620 : pub async fn status(&self) -> anyhow::Result<()> {
621 : self.dispatch::<(), ()>(Method::GET, "status".to_string(), None)
622 : .await
623 : }
624 :
625 0 : #[instrument(skip_all, fields(%tenant_id, timeline_id=%req.new_timeline_id))]
626 : pub async fn tenant_timeline_create(
627 : &self,
628 : tenant_id: TenantId,
629 : req: TimelineCreateRequest,
630 : ) -> anyhow::Result<TimelineInfo> {
631 : self.dispatch(
632 : Method::POST,
633 : format!("v1/tenant/{tenant_id}/timeline"),
634 : Some(req),
635 : )
636 : .await
637 : }
638 : }
|