LCOV - code coverage report
Current view: top level - safekeeper/src - handler.rs (source / functions) Coverage Total Hit
Test: 8b13a09a5c233d98abd4a0d3e59157e7db16d6fd.info Lines: 0.0 % 309 0
Test Date: 2024-11-21 10:53:51 Functions: 0.0 % 39 0

            Line data    Source code
       1              : //! Part of Safekeeper pretending to be Postgres, i.e. handling Postgres
       2              : //! protocol commands.
       3              : 
       4              : use anyhow::Context;
       5              : use pageserver_api::models::ShardParameters;
       6              : use pageserver_api::shard::{ShardIdentity, ShardStripeSize};
       7              : use std::future::Future;
       8              : use std::str::{self, FromStr};
       9              : use std::sync::Arc;
      10              : use tokio::io::{AsyncRead, AsyncWrite};
      11              : use tracing::{debug, info, info_span, Instrument};
      12              : use utils::postgres_client::PostgresClientProtocol;
      13              : use utils::shard::{ShardCount, ShardNumber};
      14              : 
      15              : use crate::auth::check_permission;
      16              : use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};
      17              : 
      18              : use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE};
      19              : use crate::safekeeper::Term;
      20              : use crate::timeline::TimelineError;
      21              : use crate::wal_service::ConnectionId;
      22              : use crate::{GlobalTimelines, SafeKeeperConf};
      23              : use postgres_backend::PostgresBackend;
      24              : use postgres_backend::QueryError;
      25              : use postgres_ffi::PG_TLI;
      26              : use pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID, TEXT_OID};
      27              : use regex::Regex;
      28              : use utils::auth::{Claims, JwtAuth, Scope};
      29              : use utils::{
      30              :     id::{TenantId, TenantTimelineId, TimelineId},
      31              :     lsn::Lsn,
      32              : };
      33              : 
      34              : /// Safekeeper handler of postgres commands
      35              : pub struct SafekeeperPostgresHandler {
      36              :     pub conf: SafeKeeperConf,
      37              :     /// assigned application name
      38              :     pub appname: Option<String>,
      39              :     pub tenant_id: Option<TenantId>,
      40              :     pub timeline_id: Option<TimelineId>,
      41              :     pub ttid: TenantTimelineId,
      42              :     pub shard: Option<ShardIdentity>,
      43              :     pub protocol: Option<PostgresClientProtocol>,
      44              :     /// Unique connection id is logged in spans for observability.
      45              :     pub conn_id: ConnectionId,
      46              :     /// Auth scope allowed on the connections and public key used to check auth tokens. None if auth is not configured.
      47              :     auth: Option<(Scope, Arc<JwtAuth>)>,
      48              :     claims: Option<Claims>,
      49              :     io_metrics: Option<TrafficMetrics>,
      50              : }
      51              : 
      52              : /// Parsed Postgres command.
      53              : enum SafekeeperPostgresCommand {
      54              :     StartWalPush,
      55              :     StartReplication { start_lsn: Lsn, term: Option<Term> },
      56              :     IdentifySystem,
      57              :     TimelineStatus,
      58              :     JSONCtrl { cmd: AppendLogicalMessage },
      59              : }
      60              : 
      61            0 : fn parse_cmd(cmd: &str) -> anyhow::Result<SafekeeperPostgresCommand> {
      62            0 :     if cmd.starts_with("START_WAL_PUSH") {
      63            0 :         Ok(SafekeeperPostgresCommand::StartWalPush)
      64            0 :     } else if cmd.starts_with("START_REPLICATION") {
      65            0 :         let re = Regex::new(
      66            0 :             // We follow postgres START_REPLICATION LOGICAL options to pass term.
      67            0 :             r"START_REPLICATION(?: SLOT [^ ]+)?(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)(?: \(term='(\d+)'\))?",
      68            0 :         )
      69            0 :         .unwrap();
      70            0 :         let caps = re
      71            0 :             .captures(cmd)
      72            0 :             .context(format!("failed to parse START_REPLICATION command {}", cmd))?;
      73            0 :         let start_lsn =
      74            0 :             Lsn::from_str(&caps[1]).context("parse start LSN from START_REPLICATION command")?;
      75            0 :         let term = if let Some(m) = caps.get(2) {
      76            0 :             Some(m.as_str().parse::<u64>().context("invalid term")?)
      77              :         } else {
      78            0 :             None
      79              :         };
      80            0 :         Ok(SafekeeperPostgresCommand::StartReplication { start_lsn, term })
      81            0 :     } else if cmd.starts_with("IDENTIFY_SYSTEM") {
      82            0 :         Ok(SafekeeperPostgresCommand::IdentifySystem)
      83            0 :     } else if cmd.starts_with("TIMELINE_STATUS") {
      84            0 :         Ok(SafekeeperPostgresCommand::TimelineStatus)
      85            0 :     } else if cmd.starts_with("JSON_CTRL") {
      86            0 :         let cmd = cmd.strip_prefix("JSON_CTRL").context("invalid prefix")?;
      87              :         Ok(SafekeeperPostgresCommand::JSONCtrl {
      88            0 :             cmd: serde_json::from_str(cmd)?,
      89              :         })
      90              :     } else {
      91            0 :         anyhow::bail!("unsupported command {cmd}");
      92              :     }
      93            0 : }
      94              : 
      95            0 : fn cmd_to_string(cmd: &SafekeeperPostgresCommand) -> &str {
      96            0 :     match cmd {
      97            0 :         SafekeeperPostgresCommand::StartWalPush => "START_WAL_PUSH",
      98            0 :         SafekeeperPostgresCommand::StartReplication { .. } => "START_REPLICATION",
      99            0 :         SafekeeperPostgresCommand::TimelineStatus => "TIMELINE_STATUS",
     100            0 :         SafekeeperPostgresCommand::IdentifySystem => "IDENTIFY_SYSTEM",
     101            0 :         SafekeeperPostgresCommand::JSONCtrl { .. } => "JSON_CTRL",
     102              :     }
     103            0 : }
     104              : 
     105              : impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
     106              :     for SafekeeperPostgresHandler
     107              : {
     108              :     // tenant_id and timeline_id are passed in connection string params
     109            0 :     fn startup(
     110            0 :         &mut self,
     111            0 :         _pgb: &mut PostgresBackend<IO>,
     112            0 :         sm: &FeStartupPacket,
     113            0 :     ) -> Result<(), QueryError> {
     114            0 :         if let FeStartupPacket::StartupMessage { params, .. } = sm {
     115            0 :             if let Some(options) = params.options_raw() {
     116            0 :                 let mut shard_count: Option<u8> = None;
     117            0 :                 let mut shard_number: Option<u8> = None;
     118            0 :                 let mut shard_stripe_size: Option<u32> = None;
     119              : 
     120            0 :                 for opt in options {
     121              :                     // FIXME `ztenantid` and `ztimelineid` left for compatibility during deploy,
     122              :                     // remove these after the PR gets deployed:
     123              :                     // https://github.com/neondatabase/neon/pull/2433#discussion_r970005064
     124            0 :                     match opt.split_once('=') {
     125            0 :                         Some(("protocol", value)) => {
     126            0 :                             self.protocol =
     127            0 :                                 Some(serde_json::from_str(value).with_context(|| {
     128            0 :                                     format!("Failed to parse {value} as protocol")
     129            0 :                                 })?);
     130              :                         }
     131            0 :                         Some(("ztenantid", value)) | Some(("tenant_id", value)) => {
     132            0 :                             self.tenant_id = Some(value.parse().with_context(|| {
     133            0 :                                 format!("Failed to parse {value} as tenant id")
     134            0 :                             })?);
     135              :                         }
     136            0 :                         Some(("ztimelineid", value)) | Some(("timeline_id", value)) => {
     137            0 :                             self.timeline_id = Some(value.parse().with_context(|| {
     138            0 :                                 format!("Failed to parse {value} as timeline id")
     139            0 :                             })?);
     140              :                         }
     141            0 :                         Some(("availability_zone", client_az)) => {
     142            0 :                             if let Some(metrics) = self.io_metrics.as_ref() {
     143            0 :                                 metrics.set_client_az(client_az)
     144            0 :                             }
     145              :                         }
     146            0 :                         Some(("shard_count", value)) => {
     147            0 :                             shard_count = Some(value.parse::<u8>().with_context(|| {
     148            0 :                                 format!("Failed to parse {value} as shard count")
     149            0 :                             })?);
     150              :                         }
     151            0 :                         Some(("shard_number", value)) => {
     152            0 :                             shard_number = Some(value.parse::<u8>().with_context(|| {
     153            0 :                                 format!("Failed to parse {value} as shard number")
     154            0 :                             })?);
     155              :                         }
     156            0 :                         Some(("shard_stripe_size", value)) => {
     157            0 :                             shard_stripe_size = Some(value.parse::<u32>().with_context(|| {
     158            0 :                                 format!("Failed to parse {value} as shard stripe size")
     159            0 :                             })?);
     160              :                         }
     161            0 :                         _ => continue,
     162              :                     }
     163              :                 }
     164              : 
     165            0 :                 match self.protocol() {
     166              :                     PostgresClientProtocol::Vanilla => {
     167            0 :                         if shard_count.is_some()
     168            0 :                             || shard_number.is_some()
     169            0 :                             || shard_stripe_size.is_some()
     170              :                         {
     171            0 :                             return Err(QueryError::Other(anyhow::anyhow!(
     172            0 :                                 "Shard params specified for vanilla protocol"
     173            0 :                             )));
     174            0 :                         }
     175              :                     }
     176              :                     PostgresClientProtocol::Interpreted { .. } => {
     177            0 :                         match (shard_count, shard_number, shard_stripe_size) {
     178            0 :                             (Some(count), Some(number), Some(stripe_size)) => {
     179            0 :                                 let params = ShardParameters {
     180            0 :                                     count: ShardCount(count),
     181            0 :                                     stripe_size: ShardStripeSize(stripe_size),
     182            0 :                                 };
     183            0 :                                 self.shard =
     184            0 :                                     Some(ShardIdentity::from_params(ShardNumber(number), &params));
     185            0 :                             }
     186              :                             _ => {
     187            0 :                                 return Err(QueryError::Other(anyhow::anyhow!(
     188            0 :                                     "Shard params were not specified"
     189            0 :                                 )));
     190              :                             }
     191              :                         }
     192              :                     }
     193              :                 }
     194            0 :             }
     195              : 
     196            0 :             if let Some(app_name) = params.get("application_name") {
     197            0 :                 self.appname = Some(app_name.to_owned());
     198            0 :                 if let Some(metrics) = self.io_metrics.as_ref() {
     199            0 :                     metrics.set_app_name(app_name)
     200            0 :                 }
     201            0 :             }
     202              : 
     203            0 :             let ttid = TenantTimelineId::new(
     204            0 :                 self.tenant_id.unwrap_or(TenantId::from([0u8; 16])),
     205            0 :                 self.timeline_id.unwrap_or(TimelineId::from([0u8; 16])),
     206            0 :             );
     207            0 :             tracing::Span::current()
     208            0 :                 .record("ttid", tracing::field::display(ttid))
     209            0 :                 .record(
     210            0 :                     "application_name",
     211            0 :                     tracing::field::debug(self.appname.clone()),
     212            0 :                 );
     213              : 
     214            0 :             if let Some(shard) = self.shard.as_ref() {
     215            0 :                 tracing::Span::current()
     216            0 :                     .record("shard", tracing::field::display(shard.shard_slug()));
     217            0 :             }
     218              : 
     219            0 :             Ok(())
     220              :         } else {
     221            0 :             Err(QueryError::Other(anyhow::anyhow!(
     222            0 :                 "Safekeeper received unexpected initial message: {sm:?}"
     223            0 :             )))
     224              :         }
     225            0 :     }
     226              : 
     227            0 :     fn check_auth_jwt(
     228            0 :         &mut self,
     229            0 :         _pgb: &mut PostgresBackend<IO>,
     230            0 :         jwt_response: &[u8],
     231            0 :     ) -> Result<(), QueryError> {
     232            0 :         // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
     233            0 :         // which requires auth to be present
     234            0 :         let (allowed_auth_scope, auth) = self
     235            0 :             .auth
     236            0 :             .as_ref()
     237            0 :             .expect("auth_type is configured but .auth of handler is missing");
     238            0 :         let data = auth
     239            0 :             .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)
     240            0 :             .map_err(|e| QueryError::Unauthorized(e.0))?;
     241              : 
     242              :         // The handler might be configured to allow only tenant scope tokens.
     243            0 :         if matches!(allowed_auth_scope, Scope::Tenant)
     244            0 :             && !matches!(data.claims.scope, Scope::Tenant)
     245              :         {
     246            0 :             return Err(QueryError::Unauthorized(
     247            0 :                 "passed JWT token is for full access, but only tenant scope is allowed".into(),
     248            0 :             ));
     249            0 :         }
     250              : 
     251            0 :         if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {
     252            0 :             return Err(QueryError::Unauthorized(
     253            0 :                 "jwt token scope is Tenant, but tenant id is missing".into(),
     254            0 :             ));
     255            0 :         }
     256            0 : 
     257            0 :         debug!(
     258            0 :             "jwt scope check succeeded for scope: {:#?} by tenant id: {:?}",
     259              :             data.claims.scope, data.claims.tenant_id,
     260              :         );
     261              : 
     262            0 :         self.claims = Some(data.claims);
     263            0 :         Ok(())
     264            0 :     }
     265              : 
     266            0 :     fn process_query(
     267            0 :         &mut self,
     268            0 :         pgb: &mut PostgresBackend<IO>,
     269            0 :         query_string: &str,
     270            0 :     ) -> impl Future<Output = Result<(), QueryError>> {
     271            0 :         Box::pin(async move {
     272            0 :             if query_string
     273            0 :                 .to_ascii_lowercase()
     274            0 :                 .starts_with("set datestyle to ")
     275              :             {
     276              :                 // important for debug because psycopg2 executes "SET datestyle TO 'ISO'" on connect
     277            0 :                 pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
     278            0 :                 return Ok(());
     279            0 :             }
     280              : 
     281            0 :             let cmd = parse_cmd(query_string)?;
     282            0 :             let cmd_str = cmd_to_string(&cmd);
     283            0 : 
     284            0 :             let _guard = PG_QUERIES_GAUGE.with_label_values(&[cmd_str]).guard();
     285            0 : 
     286            0 :             info!("got query {:?}", query_string);
     287              : 
     288            0 :             let tenant_id = self.tenant_id.context("tenantid is required")?;
     289            0 :             let timeline_id = self.timeline_id.context("timelineid is required")?;
     290            0 :             self.check_permission(Some(tenant_id))?;
     291            0 :             self.ttid = TenantTimelineId::new(tenant_id, timeline_id);
     292            0 : 
     293            0 :             match cmd {
     294              :                 SafekeeperPostgresCommand::StartWalPush => {
     295            0 :                     self.handle_start_wal_push(pgb)
     296            0 :                         .instrument(info_span!("WAL receiver"))
     297            0 :                         .await
     298              :                 }
     299            0 :                 SafekeeperPostgresCommand::StartReplication { start_lsn, term } => {
     300            0 :                     self.handle_start_replication(pgb, start_lsn, term)
     301            0 :                         .instrument(info_span!("WAL sender"))
     302            0 :                         .await
     303              :                 }
     304            0 :                 SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb).await,
     305            0 :                 SafekeeperPostgresCommand::TimelineStatus => self.handle_timeline_status(pgb).await,
     306            0 :                 SafekeeperPostgresCommand::JSONCtrl { ref cmd } => {
     307            0 :                     handle_json_ctrl(self, pgb, cmd).await
     308              :                 }
     309              :             }
     310            0 :         })
     311            0 :     }
     312              : }
     313              : 
     314              : impl SafekeeperPostgresHandler {
     315            0 :     pub fn new(
     316            0 :         conf: SafeKeeperConf,
     317            0 :         conn_id: u32,
     318            0 :         io_metrics: Option<TrafficMetrics>,
     319            0 :         auth: Option<(Scope, Arc<JwtAuth>)>,
     320            0 :     ) -> Self {
     321            0 :         SafekeeperPostgresHandler {
     322            0 :             conf,
     323            0 :             appname: None,
     324            0 :             tenant_id: None,
     325            0 :             timeline_id: None,
     326            0 :             ttid: TenantTimelineId::empty(),
     327            0 :             shard: None,
     328            0 :             protocol: None,
     329            0 :             conn_id,
     330            0 :             claims: None,
     331            0 :             auth,
     332            0 :             io_metrics,
     333            0 :         }
     334            0 :     }
     335              : 
     336            0 :     pub fn protocol(&self) -> PostgresClientProtocol {
     337            0 :         self.protocol.unwrap_or(PostgresClientProtocol::Vanilla)
     338            0 :     }
     339              : 
     340              :     // when accessing management api supply None as an argument
     341              :     // when using to authorize tenant pass corresponding tenant id
     342            0 :     fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<(), QueryError> {
     343            0 :         if self.auth.is_none() {
     344              :             // auth is set to Trust, nothing to check so just return ok
     345            0 :             return Ok(());
     346            0 :         }
     347            0 :         // auth is some, just checked above, when auth is some
     348            0 :         // then claims are always present because of checks during connection init
     349            0 :         // so this expect won't trigger
     350            0 :         let claims = self
     351            0 :             .claims
     352            0 :             .as_ref()
     353            0 :             .expect("claims presence already checked");
     354            0 :         check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0))
     355            0 :     }
     356              : 
     357            0 :     async fn handle_timeline_status<IO: AsyncRead + AsyncWrite + Unpin>(
     358            0 :         &mut self,
     359            0 :         pgb: &mut PostgresBackend<IO>,
     360            0 :     ) -> Result<(), QueryError> {
     361              :         // Get timeline, handling "not found" error
     362            0 :         let tli = match GlobalTimelines::get(self.ttid) {
     363            0 :             Ok(tli) => Ok(Some(tli)),
     364            0 :             Err(TimelineError::NotFound(_)) => Ok(None),
     365            0 :             Err(e) => Err(QueryError::Other(e.into())),
     366            0 :         }?;
     367              : 
     368              :         // Write row description
     369            0 :         pgb.write_message_noflush(&BeMessage::RowDescription(&[
     370            0 :             RowDescriptor::text_col(b"flush_lsn"),
     371            0 :             RowDescriptor::text_col(b"commit_lsn"),
     372            0 :         ]))?;
     373              : 
     374              :         // Write row if timeline exists
     375            0 :         if let Some(tli) = tli {
     376            0 :             let (inmem, _state) = tli.get_state().await;
     377            0 :             let flush_lsn = tli.get_flush_lsn().await;
     378            0 :             let commit_lsn = inmem.commit_lsn;
     379            0 :             pgb.write_message_noflush(&BeMessage::DataRow(&[
     380            0 :                 Some(flush_lsn.to_string().as_bytes()),
     381            0 :                 Some(commit_lsn.to_string().as_bytes()),
     382            0 :             ]))?;
     383            0 :         }
     384              : 
     385            0 :         pgb.write_message_noflush(&BeMessage::CommandComplete(b"TIMELINE_STATUS"))?;
     386            0 :         Ok(())
     387            0 :     }
     388              : 
     389              :     ///
     390              :     /// Handle IDENTIFY_SYSTEM replication command
     391              :     ///
     392            0 :     async fn handle_identify_system<IO: AsyncRead + AsyncWrite + Unpin>(
     393            0 :         &mut self,
     394            0 :         pgb: &mut PostgresBackend<IO>,
     395            0 :     ) -> Result<(), QueryError> {
     396            0 :         let tli = GlobalTimelines::get(self.ttid).map_err(|e| QueryError::Other(e.into()))?;
     397              : 
     398            0 :         let lsn = if self.is_walproposer_recovery() {
     399              :             // walproposer should get all local WAL until flush_lsn
     400            0 :             tli.get_flush_lsn().await
     401              :         } else {
     402              :             // other clients shouldn't get any uncommitted WAL
     403            0 :             tli.get_state().await.0.commit_lsn
     404              :         }
     405            0 :         .to_string();
     406              : 
     407            0 :         let sysid = tli.get_state().await.1.server.system_id.to_string();
     408            0 :         let lsn_bytes = lsn.as_bytes();
     409            0 :         let tli = PG_TLI.to_string();
     410            0 :         let tli_bytes = tli.as_bytes();
     411            0 :         let sysid_bytes = sysid.as_bytes();
     412            0 : 
     413            0 :         pgb.write_message_noflush(&BeMessage::RowDescription(&[
     414            0 :             RowDescriptor {
     415            0 :                 name: b"systemid",
     416            0 :                 typoid: TEXT_OID,
     417            0 :                 typlen: -1,
     418            0 :                 ..Default::default()
     419            0 :             },
     420            0 :             RowDescriptor {
     421            0 :                 name: b"timeline",
     422            0 :                 typoid: INT4_OID,
     423            0 :                 typlen: 4,
     424            0 :                 ..Default::default()
     425            0 :             },
     426            0 :             RowDescriptor {
     427            0 :                 name: b"xlogpos",
     428            0 :                 typoid: TEXT_OID,
     429            0 :                 typlen: -1,
     430            0 :                 ..Default::default()
     431            0 :             },
     432            0 :             RowDescriptor {
     433            0 :                 name: b"dbname",
     434            0 :                 typoid: TEXT_OID,
     435            0 :                 typlen: -1,
     436            0 :                 ..Default::default()
     437            0 :             },
     438            0 :         ]))?
     439            0 :         .write_message_noflush(&BeMessage::DataRow(&[
     440            0 :             Some(sysid_bytes),
     441            0 :             Some(tli_bytes),
     442            0 :             Some(lsn_bytes),
     443            0 :             None,
     444            0 :         ]))?
     445            0 :         .write_message_noflush(&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM"))?;
     446            0 :         Ok(())
     447            0 :     }
     448              : 
     449              :     /// Returns true if current connection is a replication connection, originating
     450              :     /// from a walproposer recovery function. This connection gets a special handling:
     451              :     /// safekeeper must stream all local WAL till the flush_lsn, whether committed or not.
     452            0 :     pub fn is_walproposer_recovery(&self) -> bool {
     453            0 :         match &self.appname {
     454            0 :             None => false,
     455            0 :             Some(appname) => {
     456            0 :                 appname == "wal_proposer_recovery" ||
     457              :                 // set by safekeeper peer recovery
     458            0 :                 appname.starts_with("safekeeper")
     459              :             }
     460              :         }
     461            0 :     }
     462              : }
        

Generated by: LCOV version 2.1-beta