|             Line data    Source code 
       1              : use std::cell::{RefCell, RefMut, UnsafeCell};
       2              : use std::ffi::CStr;
       3              : use std::sync::Arc;
       4              : 
       5              : use bytes::Bytes;
       6              : use desim::executor::{self, PollSome};
       7              : use desim::network::TCP;
       8              : use desim::node_os::NodeOs;
       9              : use desim::proto::{AnyMessage, NetEvent, NodeEvent};
      10              : use desim::world::NodeId;
      11              : use tracing::debug;
      12              : use utils::lsn::Lsn;
      13              : use walproposer::api_bindings::Level;
      14              : use walproposer::bindings::{
      15              :     NeonWALReadResult, SafekeeperStateDesiredEvents, WL_SOCKET_READABLE, WL_SOCKET_WRITEABLE,
      16              : };
      17              : use walproposer::walproposer::{ApiImpl, Config};
      18              : 
      19              : use super::walproposer_disk::DiskWalProposer;
      20              : 
      21              : /// Special state for each wp->sk connection.
      22              : struct SafekeeperConn {
      23              :     host: String,
      24              :     port: String,
      25              :     node_id: NodeId,
      26              :     // socket is Some(..) equals to connection is established
      27              :     socket: Option<TCP>,
      28              :     // connection is in progress
      29              :     is_connecting: bool,
      30              :     // START_WAL_PUSH is in progress
      31              :     is_start_wal_push: bool,
      32              :     // pointer to Safekeeper in walproposer for callbacks
      33              :     raw_ptr: *mut walproposer::bindings::Safekeeper,
      34              : }
      35              : 
      36              : impl SafekeeperConn {
      37        26400 :     pub fn new(host: String, port: String) -> Self {
      38        26400 :         // port number is the same as NodeId
      39        26400 :         let port_num = port.parse::<u32>().unwrap();
      40        26400 :         Self {
      41        26400 :             host,
      42        26400 :             port,
      43        26400 :             node_id: port_num,
      44        26400 :             socket: None,
      45        26400 :             is_connecting: false,
      46        26400 :             is_start_wal_push: false,
      47        26400 :             raw_ptr: std::ptr::null_mut(),
      48        26400 :         }
      49        26400 :     }
      50              : }
      51              : 
      52              : /// Simulation version of a postgres WaitEventSet. At pos 0 there is always
      53              : /// a special NodeEvents channel, which is used as a latch.
      54              : struct EventSet {
      55              :     os: NodeOs,
      56              :     // all pollable channels, 0 is always NodeEvent channel
      57              :     chans: Vec<Box<dyn PollSome>>,
      58              :     // 0 is always nullptr
      59              :     sk_ptrs: Vec<*mut walproposer::bindings::Safekeeper>,
      60              :     // event mask for each channel
      61              :     masks: Vec<u32>,
      62              : }
      63              : 
      64              : impl EventSet {
      65         8800 :     pub fn new(os: NodeOs) -> Self {
      66         8800 :         let node_events = os.node_events();
      67         8800 :         Self {
      68         8800 :             os,
      69         8800 :             chans: vec![Box::new(node_events)],
      70         8800 :             sk_ptrs: vec![std::ptr::null_mut()],
      71         8800 :             masks: vec![WL_SOCKET_READABLE],
      72         8800 :         }
      73         8800 :     }
      74              : 
      75              :     /// Leaves all readable channels at the beginning of the array.
      76        27281 :     fn sort_readable(&mut self) -> usize {
      77        27281 :         let mut cnt = 1;
      78        64319 :         for i in 1..self.chans.len() {
      79        64319 :             if self.masks[i] & WL_SOCKET_READABLE != 0 {
      80        64319 :                 self.chans.swap(i, cnt);
      81        64319 :                 self.sk_ptrs.swap(i, cnt);
      82        64319 :                 self.masks.swap(i, cnt);
      83        64319 :                 cnt += 1;
      84        64319 :             }
      85              :         }
      86        27281 :         cnt
      87        27281 :     }
      88              : 
      89        63863 :     fn update_event_set(&mut self, conn: &SafekeeperConn, event_mask: u32) {
      90        63863 :         let index = self
      91        63863 :             .sk_ptrs
      92        63863 :             .iter()
      93       241581 :             .position(|&ptr| ptr == conn.raw_ptr)
      94        63863 :             .expect("safekeeper should exist in event set");
      95        63863 :         self.masks[index] = event_mask;
      96        63863 :     }
      97              : 
      98        58602 :     fn add_safekeeper(&mut self, sk: &SafekeeperConn, event_mask: u32) {
      99       141603 :         for ptr in self.sk_ptrs.iter() {
     100       141603 :             assert!(*ptr != sk.raw_ptr);
     101              :         }
     102              : 
     103        58602 :         self.chans.push(Box::new(
     104        58602 :             sk.socket
     105        58602 :                 .as_ref()
     106        58602 :                 .expect("socket should not be closed")
     107        58602 :                 .recv_chan(),
     108        58602 :         ));
     109        58602 :         self.sk_ptrs.push(sk.raw_ptr);
     110        58602 :         self.masks.push(event_mask);
     111        58602 :     }
     112              : 
     113        36910 :     fn remove_safekeeper(&mut self, sk: &SafekeeperConn) {
     114        78389 :         let index = self.sk_ptrs.iter().position(|&ptr| ptr == sk.raw_ptr);
     115        36910 :         if index.is_none() {
     116            3 :             debug!("remove_safekeeper: sk={:?} not found", sk.raw_ptr);
     117            3 :             return;
     118        36907 :         }
     119        36907 :         let index = index.unwrap();
     120        36907 : 
     121        36907 :         self.chans.remove(index);
     122        36907 :         self.sk_ptrs.remove(index);
     123        36907 :         self.masks.remove(index);
     124        36907 : 
     125        36907 :         // to simulate the actual behaviour
     126        36907 :         self.refresh_event_set();
     127        36910 :     }
     128              : 
     129              :     /// Updates all masks to match the result of a SafekeeperStateDesiredEvents.
     130        42842 :     fn refresh_event_set(&mut self) {
     131       121036 :         for (i, mask) in self.masks.iter_mut().enumerate() {
     132       121036 :             if i == 0 {
     133        42842 :                 continue;
     134        78194 :             }
     135        78194 : 
     136        78194 :             let mut mask_sk: u32 = 0;
     137        78194 :             let mut mask_nwr: u32 = 0;
     138        78194 :             unsafe { SafekeeperStateDesiredEvents(self.sk_ptrs[i], &mut mask_sk, &mut mask_nwr) };
     139        78194 : 
     140        78194 :             if mask_sk != *mask {
     141            0 :                 debug!(
     142            0 :                     "refresh_event_set: sk={:?}, old_mask={:#b}, new_mask={:#b}",
     143            0 :                     self.sk_ptrs[i], *mask, mask_sk
     144              :                 );
     145            0 :                 *mask = mask_sk;
     146        78194 :             }
     147              :         }
     148        42842 :     }
     149              : 
     150              :     /// Wait for events on all channels.
     151        27281 :     fn wait(&mut self, timeout_millis: i64) -> walproposer::walproposer::WaitResult {
     152              :         // all channels are always writeable
     153        91600 :         for (i, mask) in self.masks.iter().enumerate() {
     154        91600 :             if *mask & WL_SOCKET_WRITEABLE != 0 {
     155            0 :                 return walproposer::walproposer::WaitResult::Network(
     156            0 :                     self.sk_ptrs[i],
     157            0 :                     WL_SOCKET_WRITEABLE,
     158            0 :                 );
     159        91600 :             }
     160              :         }
     161              : 
     162        27281 :         let cnt = self.sort_readable();
     163        27281 : 
     164        27281 :         let slice = &self.chans[0..cnt];
     165        27281 :         match executor::epoll_chans(slice, timeout_millis) {
     166        11032 :             None => walproposer::walproposer::WaitResult::Timeout,
     167              :             Some(0) => {
     168          496 :                 let msg = self.os.node_events().must_recv();
     169          496 :                 match msg {
     170          496 :                     NodeEvent::Internal(AnyMessage::Just32(0)) => {
     171          496 :                         // got a notification about new WAL available
     172          496 :                     }
     173            0 :                     NodeEvent::Internal(_) => unreachable!(),
     174            0 :                     NodeEvent::Accept(_) => unreachable!(),
     175              :                 }
     176          496 :                 walproposer::walproposer::WaitResult::Latch
     177              :             }
     178        15753 :             Some(index) => walproposer::walproposer::WaitResult::Network(
     179        15753 :                 self.sk_ptrs[index],
     180        15753 :                 WL_SOCKET_READABLE,
     181        15753 :             ),
     182              :         }
     183        27281 :     }
     184              : }
     185              : 
     186              : /// This struct handles all calls from walproposer into walproposer_api.
     187              : pub struct SimulationApi {
     188              :     os: NodeOs,
     189              :     safekeepers: RefCell<Vec<SafekeeperConn>>,
     190              :     disk: Arc<DiskWalProposer>,
     191              :     redo_start_lsn: Option<Lsn>,
     192              :     last_logged_commit_lsn: u64,
     193              :     shmem: UnsafeCell<walproposer::bindings::WalproposerShmemState>,
     194              :     config: Config,
     195              :     event_set: RefCell<Option<EventSet>>,
     196              : }
     197              : 
     198              : pub struct Args {
     199              :     pub os: NodeOs,
     200              :     pub config: Config,
     201              :     pub disk: Arc<DiskWalProposer>,
     202              :     pub redo_start_lsn: Option<Lsn>,
     203              : }
     204              : 
     205              : impl SimulationApi {
     206         8800 :     pub fn new(args: Args) -> Self {
     207         8800 :         // initialize connection state for each safekeeper
     208         8800 :         let sk_conns = args
     209         8800 :             .config
     210         8800 :             .safekeepers_list
     211         8800 :             .iter()
     212        26400 :             .map(|s| {
     213        26400 :                 SafekeeperConn::new(
     214        26400 :                     s.split(':').next().unwrap().to_string(),
     215        26400 :                     s.split(':').nth(1).unwrap().to_string(),
     216        26400 :                 )
     217        26400 :             })
     218         8800 :             .collect::<Vec<_>>();
     219         8800 : 
     220         8800 :         Self {
     221         8800 :             os: args.os,
     222         8800 :             safekeepers: RefCell::new(sk_conns),
     223         8800 :             disk: args.disk,
     224         8800 :             redo_start_lsn: args.redo_start_lsn,
     225         8800 :             last_logged_commit_lsn: 0,
     226         8800 :             shmem: UnsafeCell::new(walproposer::api_bindings::empty_shmem()),
     227         8800 :             config: args.config,
     228         8800 :             event_set: RefCell::new(None),
     229         8800 :         }
     230         8800 :     }
     231              : 
     232              :     /// Get SafekeeperConn for the given Safekeeper.
     233       287532 :     fn get_conn(&self, sk: &mut walproposer::bindings::Safekeeper) -> RefMut<'_, SafekeeperConn> {
     234       287532 :         let sk_port = unsafe { CStr::from_ptr(sk.port).to_str().unwrap() };
     235       287532 :         let state = self.safekeepers.borrow_mut();
     236       287532 :         RefMut::map(state, |v| {
     237       287532 :             v.iter_mut()
     238       573941 :                 .find(|conn| conn.port == sk_port)
     239       287532 :                 .expect("safekeeper conn not found by port")
     240       287532 :         })
     241       287532 :     }
     242              : }
     243              : 
     244              : impl ApiImpl for SimulationApi {
     245       308241 :     fn get_current_timestamp(&self) -> i64 {
     246       308241 :         debug!("get_current_timestamp");
     247              :         // PG TimestampTZ is microseconds, but simulation unit is assumed to be
     248              :         // milliseconds, so add 10^3
     249       308241 :         self.os.now() as i64 * 1000
     250       308241 :     }
     251              : 
     252          887 :     fn update_donor(&self, donor: &mut walproposer::bindings::Safekeeper, donor_lsn: u64) {
     253          887 :         let mut shmem = unsafe { *self.get_shmem_state() };
     254          887 :         shmem.propEpochStartLsn.value = donor_lsn;
     255          887 :         shmem.donor_conninfo = donor.conninfo;
     256          887 :     }
     257              : 
     258        32579 :     fn conn_status(
     259        32579 :         &self,
     260        32579 :         _: &mut walproposer::bindings::Safekeeper,
     261        32579 :     ) -> walproposer::bindings::WalProposerConnStatusType {
     262        32579 :         debug!("conn_status");
     263              :         // break the connection with a 10% chance
     264        32579 :         if self.os.random(100) < 10 {
     265         3278 :             walproposer::bindings::WalProposerConnStatusType_WP_CONNECTION_BAD
     266              :         } else {
     267        29301 :             walproposer::bindings::WalProposerConnStatusType_WP_CONNECTION_OK
     268              :         }
     269        32579 :     }
     270              : 
     271        32579 :     fn conn_connect_start(&self, sk: &mut walproposer::bindings::Safekeeper) {
     272        32579 :         debug!("conn_connect_start");
     273        32579 :         let mut conn = self.get_conn(sk);
     274        32579 : 
     275        32579 :         assert!(conn.socket.is_none());
     276        32579 :         let socket = self.os.open_tcp(conn.node_id);
     277        32579 :         conn.socket = Some(socket);
     278        32579 :         conn.raw_ptr = sk;
     279        32579 :         conn.is_connecting = true;
     280        32579 :     }
     281              : 
     282        29301 :     fn conn_connect_poll(
     283        29301 :         &self,
     284        29301 :         _: &mut walproposer::bindings::Safekeeper,
     285        29301 :     ) -> walproposer::bindings::WalProposerConnectPollStatusType {
     286        29301 :         debug!("conn_connect_poll");
     287              :         // TODO: break the connection here
     288        29301 :         walproposer::bindings::WalProposerConnectPollStatusType_WP_CONN_POLLING_OK
     289        29301 :     }
     290              : 
     291        29301 :     fn conn_send_query(&self, sk: &mut walproposer::bindings::Safekeeper, query: &str) -> bool {
     292        29301 :         debug!("conn_send_query: {}", query);
     293        29301 :         self.get_conn(sk).is_start_wal_push = true;
     294        29301 :         true
     295        29301 :     }
     296              : 
     297        29301 :     fn conn_get_query_result(
     298        29301 :         &self,
     299        29301 :         _: &mut walproposer::bindings::Safekeeper,
     300        29301 :     ) -> walproposer::bindings::WalProposerExecStatusType {
     301        29301 :         debug!("conn_get_query_result");
     302              :         // TODO: break the connection here
     303        29301 :         walproposer::bindings::WalProposerExecStatusType_WP_EXEC_SUCCESS_COPYBOTH
     304        29301 :     }
     305              : 
     306        17700 :     fn conn_async_read(
     307        17700 :         &self,
     308        17700 :         sk: &mut walproposer::bindings::Safekeeper,
     309        17700 :         vec: &mut Vec<u8>,
     310        17700 :     ) -> walproposer::bindings::PGAsyncReadResult {
     311        17700 :         debug!("conn_async_read");
     312        17700 :         let mut conn = self.get_conn(sk);
     313              : 
     314        17700 :         let socket = if let Some(socket) = conn.socket.as_mut() {
     315        17700 :             socket
     316              :         } else {
     317              :             // socket is already closed
     318            0 :             return walproposer::bindings::PGAsyncReadResult_PG_ASYNC_READ_FAIL;
     319              :         };
     320              : 
     321        17700 :         let msg = socket.recv_chan().try_recv();
     322              : 
     323        15585 :         match msg {
     324              :             None => {
     325              :                 // no message is ready
     326         2115 :                 walproposer::bindings::PGAsyncReadResult_PG_ASYNC_READ_TRY_AGAIN
     327              :             }
     328              :             Some(NetEvent::Closed) => {
     329              :                 // connection is closed
     330         7328 :                 debug!("conn_async_read: connection is closed");
     331         7328 :                 conn.socket = None;
     332         7328 :                 walproposer::bindings::PGAsyncReadResult_PG_ASYNC_READ_FAIL
     333              :             }
     334         8257 :             Some(NetEvent::Message(msg)) => {
     335              :                 // got a message
     336         8257 :                 let b = match msg {
     337         8257 :                     desim::proto::AnyMessage::Bytes(b) => b,
     338            0 :                     _ => unreachable!(),
     339              :                 };
     340         8257 :                 vec.extend_from_slice(&b);
     341         8257 :                 walproposer::bindings::PGAsyncReadResult_PG_ASYNC_READ_SUCCESS
     342              :             }
     343              :         }
     344        17700 :     }
     345              : 
     346        32738 :     fn conn_blocking_write(&self, sk: &mut walproposer::bindings::Safekeeper, buf: &[u8]) -> bool {
     347        32738 :         let mut conn = self.get_conn(sk);
     348        32738 :         debug!("conn_blocking_write to {}: {:?}", conn.node_id, buf);
     349        32738 :         let socket = conn.socket.as_mut().unwrap();
     350        32738 :         socket.send(desim::proto::AnyMessage::Bytes(Bytes::copy_from_slice(buf)));
     351        32738 :         true
     352        32738 :     }
     353              : 
     354         4718 :     fn conn_async_write(
     355         4718 :         &self,
     356         4718 :         sk: &mut walproposer::bindings::Safekeeper,
     357         4718 :         buf: &[u8],
     358         4718 :     ) -> walproposer::bindings::PGAsyncWriteResult {
     359         4718 :         let mut conn = self.get_conn(sk);
     360         4718 :         debug!("conn_async_write to {}: {:?}", conn.node_id, buf);
     361         4718 :         if let Some(socket) = conn.socket.as_mut() {
     362         4718 :             socket.send(desim::proto::AnyMessage::Bytes(Bytes::copy_from_slice(buf)));
     363         4718 :         } else {
     364              :             // connection is already closed
     365            0 :             debug!("conn_async_write: writing to a closed socket!");
     366              :             // TODO: maybe we should return error here?
     367              :         }
     368         4718 :         walproposer::bindings::PGAsyncWriteResult_PG_ASYNC_WRITE_SUCCESS
     369         4718 :     }
     370              : 
     371          898 :     fn wal_reader_allocate(&self, _: &mut walproposer::bindings::Safekeeper) -> NeonWALReadResult {
     372          898 :         debug!("wal_reader_allocate");
     373          898 :         walproposer::bindings::NeonWALReadResult_NEON_WALREAD_SUCCESS
     374          898 :     }
     375              : 
     376          875 :     fn wal_read(
     377          875 :         &self,
     378          875 :         _sk: &mut walproposer::bindings::Safekeeper,
     379          875 :         buf: &mut [u8],
     380          875 :         startpos: u64,
     381          875 :     ) -> NeonWALReadResult {
     382          875 :         self.disk.lock().read(startpos, buf);
     383          875 :         walproposer::bindings::NeonWALReadResult_NEON_WALREAD_SUCCESS
     384          875 :     }
     385              : 
     386         8800 :     fn init_event_set(&self, _: &mut walproposer::bindings::WalProposer) {
     387         8800 :         debug!("init_event_set");
     388         8800 :         let new_event_set = EventSet::new(self.os.clone());
     389         8800 :         let old_event_set = self.event_set.replace(Some(new_event_set));
     390         8800 :         assert!(old_event_set.is_none());
     391         8800 :     }
     392              : 
     393        63863 :     fn update_event_set(&self, sk: &mut walproposer::bindings::Safekeeper, event_mask: u32) {
     394        63863 :         debug!(
     395            0 :             "update_event_set, sk={:?}, events_mask={:#b}",
     396            0 :             sk as *mut walproposer::bindings::Safekeeper, event_mask
     397              :         );
     398        63863 :         let conn = self.get_conn(sk);
     399        63863 : 
     400        63863 :         self.event_set
     401        63863 :             .borrow_mut()
     402        63863 :             .as_mut()
     403        63863 :             .unwrap()
     404        63863 :             .update_event_set(&conn, event_mask);
     405        63863 :     }
     406              : 
     407        58602 :     fn add_safekeeper_event_set(
     408        58602 :         &self,
     409        58602 :         sk: &mut walproposer::bindings::Safekeeper,
     410        58602 :         event_mask: u32,
     411        58602 :     ) {
     412        58602 :         debug!(
     413            0 :             "add_safekeeper_event_set, sk={:?}, events_mask={:#b}",
     414            0 :             sk as *mut walproposer::bindings::Safekeeper, event_mask
     415              :         );
     416              : 
     417        58602 :         self.event_set
     418        58602 :             .borrow_mut()
     419        58602 :             .as_mut()
     420        58602 :             .unwrap()
     421        58602 :             .add_safekeeper(&self.get_conn(sk), event_mask);
     422        58602 :     }
     423              : 
     424        36910 :     fn rm_safekeeper_event_set(&self, sk: &mut walproposer::bindings::Safekeeper) {
     425        36910 :         debug!(
     426            0 :             "rm_safekeeper_event_set, sk={:?}",
     427            0 :             sk as *mut walproposer::bindings::Safekeeper,
     428              :         );
     429              : 
     430        36910 :         self.event_set
     431        36910 :             .borrow_mut()
     432        36910 :             .as_mut()
     433        36910 :             .unwrap()
     434        36910 :             .remove_safekeeper(&self.get_conn(sk));
     435        36910 :     }
     436              : 
     437         5935 :     fn active_state_update_event_set(&self, sk: &mut walproposer::bindings::Safekeeper) {
     438         5935 :         debug!("active_state_update_event_set");
     439              : 
     440         5935 :         assert!(sk.state == walproposer::bindings::SafekeeperState_SS_ACTIVE);
     441         5935 :         self.event_set
     442         5935 :             .borrow_mut()
     443         5935 :             .as_mut()
     444         5935 :             .unwrap()
     445         5935 :             .refresh_event_set();
     446         5935 :     }
     447              : 
     448        16476 :     fn wal_reader_events(&self, _sk: &mut walproposer::bindings::Safekeeper) -> u32 {
     449        16476 :         0
     450        16476 :     }
     451              : 
     452        85883 :     fn wait_event_set(
     453        85883 :         &self,
     454        85883 :         _: &mut walproposer::bindings::WalProposer,
     455        85883 :         timeout_millis: i64,
     456        85883 :     ) -> walproposer::walproposer::WaitResult {
     457        85883 :         // TODO: handle multiple stages as part of the simulation (e.g. connect, start_wal_push, etc)
     458        85883 :         let mut conns = self.safekeepers.borrow_mut();
     459       199035 :         for conn in conns.iter_mut() {
     460       199035 :             if conn.socket.is_some() && conn.is_connecting {
     461        29301 :                 conn.is_connecting = false;
     462        29301 :                 debug!("wait_event_set, connecting to {}:{}", conn.host, conn.port);
     463        29301 :                 return walproposer::walproposer::WaitResult::Network(
     464        29301 :                     conn.raw_ptr,
     465        29301 :                     WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE,
     466        29301 :                 );
     467       169734 :             }
     468       169734 :             if conn.socket.is_some() && conn.is_start_wal_push {
     469        29301 :                 conn.is_start_wal_push = false;
     470        29301 :                 debug!(
     471            0 :                     "wait_event_set, start wal push to {}:{}",
     472              :                     conn.host, conn.port
     473              :                 );
     474        29301 :                 return walproposer::walproposer::WaitResult::Network(
     475        29301 :                     conn.raw_ptr,
     476        29301 :                     WL_SOCKET_READABLE,
     477        29301 :                 );
     478       140433 :             }
     479              :         }
     480        27281 :         drop(conns);
     481        27281 : 
     482        27281 :         let res = self
     483        27281 :             .event_set
     484        27281 :             .borrow_mut()
     485        27281 :             .as_mut()
     486        27281 :             .unwrap()
     487        27281 :             .wait(timeout_millis);
     488        27281 : 
     489        27281 :         debug!(
     490            0 :             "wait_event_set, timeout_millis={}, res={:?}",
     491              :             timeout_millis, res,
     492              :         );
     493        27281 :         res
     494        85883 :     }
     495              : 
     496            0 :     fn strong_random(&self, buf: &mut [u8]) -> bool {
     497            0 :         debug!("strong_random");
     498            0 :         buf.fill(0);
     499            0 :         true
     500            0 :     }
     501              : 
     502          368 :     fn finish_sync_safekeepers(&self, lsn: u64) {
     503          368 :         debug!("finish_sync_safekeepers, lsn={}", lsn);
     504          368 :         executor::exit(0, Lsn(lsn).to_string());
     505          368 :     }
     506              : 
     507       145940 :     fn log_internal(&self, _wp: &mut walproposer::bindings::WalProposer, level: Level, msg: &str) {
     508       145940 :         debug!("wp_log[{}] {}", level, msg);
     509       145940 :         if level == Level::Fatal || level == Level::Panic {
     510           58 :             if msg.contains("rejects our connection request with term") {
     511           45 :                 // collected quorum with lower term, then got rejected by next connected safekeeper
     512           45 :                 executor::exit(1, msg.to_owned());
     513           45 :             }
     514           58 :             if msg.contains("collected propTermStartLsn") && msg.contains(", but basebackup LSN ") {
     515            4 :                 // sync-safekeepers collected wrong quorum, walproposer collected another quorum
     516            4 :                 executor::exit(1, msg.to_owned());
     517           54 :             }
     518           58 :             if msg.contains("failed to download WAL for logical replicaiton") {
     519            9 :                 // Recovery connection broken and recovery was failed
     520            9 :                 executor::exit(1, msg.to_owned());
     521           49 :             }
     522           58 :             if msg.contains("missing majority of votes, collected") {
     523            0 :                 // Voting bug when safekeeper disconnects after voting
     524            0 :                 executor::exit(1, msg.to_owned());
     525           58 :             }
     526           58 :             panic!("unknown FATAL error from walproposer: {}", msg);
     527       145882 :         }
     528       145882 :     }
     529              : 
     530          499 :     fn after_election(&self, wp: &mut walproposer::bindings::WalProposer) {
     531          499 :         let prop_lsn = wp.propTermStartLsn;
     532          499 :         let prop_term = wp.propTerm;
     533          499 : 
     534          499 :         let mut prev_lsn: u64 = 0;
     535          499 :         let mut prev_term: u64 = 0;
     536          499 : 
     537          499 :         unsafe {
     538          499 :             let history = wp.propTermHistory.entries;
     539          499 :             let len = wp.propTermHistory.n_entries as usize;
     540          499 :             if len > 1 {
     541          450 :                 let entry = *history.wrapping_add(len - 2);
     542          450 :                 prev_lsn = entry.lsn;
     543          450 :                 prev_term = entry.term;
     544          450 :             }
     545              :         }
     546              : 
     547          499 :         let msg = format!(
     548          499 :             "prop_elected;{};{};{};{}",
     549          499 :             prop_lsn, prop_term, prev_lsn, prev_term
     550          499 :         );
     551          499 : 
     552          499 :         debug!(msg);
     553          499 :         self.os.log_event(msg);
     554          499 :     }
     555              : 
     556          226 :     fn get_redo_start_lsn(&self) -> u64 {
     557          226 :         debug!("get_redo_start_lsn -> {:?}", self.redo_start_lsn);
     558          226 :         self.redo_start_lsn.expect("redo_start_lsn is not set").0
     559          226 :     }
     560              : 
     561         1893 :     fn get_shmem_state(&self) -> *mut walproposer::bindings::WalproposerShmemState {
     562         1893 :         self.shmem.get()
     563         1893 :     }
     564              : 
     565          163 :     fn start_streaming(
     566          163 :         &self,
     567          163 :         startpos: u64,
     568          163 :         callback: &walproposer::walproposer::StreamingCallback,
     569          163 :     ) {
     570          163 :         let disk = &self.disk;
     571          163 :         let disk_lsn = disk.lock().flush_rec_ptr().0;
     572          163 :         debug!("start_streaming at {} (disk_lsn={})", startpos, disk_lsn);
     573          163 :         if startpos < disk_lsn {
     574           32 :             debug!(
     575            0 :                 "startpos < disk_lsn, it means we wrote some transaction even before streaming started"
     576              :             );
     577          131 :         }
     578          163 :         assert!(startpos <= disk_lsn);
     579          163 :         let mut broadcasted = Lsn(startpos);
     580              : 
     581              :         loop {
     582          498 :             let available = disk.lock().flush_rec_ptr();
     583          498 :             assert!(available >= broadcasted);
     584          335 :             callback.broadcast(broadcasted, available);
     585          335 :             broadcasted = available;
     586          335 :             callback.poll();
     587              :         }
     588              :     }
     589              : 
     590         2290 :     fn process_safekeeper_feedback(
     591         2290 :         &mut self,
     592         2290 :         wp: &mut walproposer::bindings::WalProposer,
     593         2290 :         _sk: &mut walproposer::bindings::Safekeeper,
     594         2290 :     ) {
     595         2290 :         debug!("process_safekeeper_feedback, commit_lsn={}", wp.commitLsn);
     596         2290 :         if wp.commitLsn > self.last_logged_commit_lsn {
     597          518 :             self.os.log_event(format!("commit_lsn;{}", wp.commitLsn));
     598          518 :             self.last_logged_commit_lsn = wp.commitLsn;
     599         1772 :         }
     600         2290 :     }
     601              : 
     602           83 :     fn get_flush_rec_ptr(&self) -> u64 {
     603           83 :         let lsn = self.disk.lock().flush_rec_ptr();
     604           83 :         debug!("get_flush_rec_ptr: {}", lsn);
     605           83 :         lsn.0
     606           83 :     }
     607              : 
     608          499 :     fn recovery_download(
     609          499 :         &self,
     610          499 :         wp: &mut walproposer::bindings::WalProposer,
     611          499 :         sk: &mut walproposer::bindings::Safekeeper,
     612          499 :     ) -> bool {
     613          499 :         let mut startpos = wp.truncateLsn;
     614          499 :         let endpos = wp.propTermStartLsn;
     615          499 : 
     616          499 :         if startpos == endpos {
     617          265 :             debug!("recovery_download: nothing to download");
     618          265 :             return true;
     619          234 :         }
     620          234 : 
     621          234 :         debug!("recovery_download from {} to {}", startpos, endpos,);
     622              : 
     623          234 :         let replication_prompt = format!(
     624          234 :             "START_REPLICATION {} {} {} {}",
     625          234 :             self.config.ttid.tenant_id, self.config.ttid.timeline_id, startpos, endpos,
     626          234 :         );
     627          234 :         let async_conn = self.get_conn(sk);
     628          234 : 
     629          234 :         let conn = self.os.open_tcp(async_conn.node_id);
     630          234 :         conn.send(desim::proto::AnyMessage::Bytes(replication_prompt.into()));
     631          234 : 
     632          234 :         let chan = conn.recv_chan();
     633          408 :         while startpos < endpos {
     634          234 :             let event = chan.recv();
     635          225 :             match event {
     636              :                 NetEvent::Closed => {
     637            9 :                     debug!("connection closed in recovery");
     638            9 :                     break;
     639              :                 }
     640          174 :                 NetEvent::Message(AnyMessage::Bytes(b)) => {
     641          174 :                     debug!("got recovery bytes from safekeeper");
     642          174 :                     self.disk.lock().write(startpos, &b);
     643          174 :                     startpos += b.len() as u64;
     644              :                 }
     645           51 :                 NetEvent::Message(_) => unreachable!(),
     646              :             }
     647              :         }
     648              : 
     649          183 :         debug!("recovery finished at {}", startpos);
     650              : 
     651          183 :         startpos == endpos
     652          448 :     }
     653              : 
     654        10887 :     fn conn_finish(&self, sk: &mut walproposer::bindings::Safekeeper) {
     655        10887 :         let mut conn = self.get_conn(sk);
     656        10887 :         debug!("conn_finish to {}", conn.node_id);
     657        10887 :         if let Some(socket) = conn.socket.as_mut() {
     658         3556 :             socket.close();
     659         7331 :         } else {
     660         7331 :             // connection is already closed
     661         7331 :         }
     662        10887 :         conn.socket = None;
     663        10887 :     }
     664              : 
     665        10606 :     fn conn_error_message(&self, _sk: &mut walproposer::bindings::Safekeeper) -> String {
     666        10606 :         "connection is closed, probably".into()
     667        10606 :     }
     668              : }
         |