LCOV - bb522999b2ee0ee028df22bb188d3a84170ba700.info - safekeeper/src/timeline

LCOV - code coverage report

Current view:	top level - safekeeper/src - timeline_manager.rs (source / functions)		Coverage	Total	Hit
Test:	bb522999b2ee0ee028df22bb188d3a84170ba700.info	Lines:	0.0 %	295	0
Test Date:	2024-07-21 16:16:09	Functions:	0.0 %	51	0

            Line data    Source code

       1              : //! The timeline manager task is responsible for managing the timeline's background tasks.
       2              : //! It is spawned alongside each timeline and exits when the timeline is deleted.
       3              : //! It watches for changes in the timeline state and decides when to spawn or kill background tasks.
       4              : //! It also can manage some reactive state, like should the timeline be active for broker pushes or not.
       5              : //!
       6              : //! Be aware that you need to be extra careful with manager code, because it is not respawned on panic.
       7              : //! Also, if it will stuck in some branch, it will prevent any further progress in the timeline.
       8              : 
       9              : use std::{
      10              :     sync::{atomic::AtomicUsize, Arc},
      11              :     time::Duration,
      12              : };
      13              : 
      14              : use postgres_ffi::XLogSegNo;
      15              : use serde::{Deserialize, Serialize};
      16              : use tokio::{
      17              :     task::{JoinError, JoinHandle},
      18              :     time::Instant,
      19              : };
      20              : use tracing::{debug, info, info_span, instrument, warn, Instrument};
      21              : use utils::lsn::Lsn;
      22              : 
      23              : use crate::{
      24              :     control_file::{FileStorage, Storage},
      25              :     metrics::{MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS},
      26              :     recovery::recovery_main,
      27              :     remove_wal::calc_horizon_lsn,
      28              :     safekeeper::Term,
      29              :     send_wal::WalSenders,
      30              :     state::TimelineState,
      31              :     timeline::{ManagerTimeline, PeerInfo, ReadGuardSharedState, StateSK, WalResidentTimeline},
      32              :     timeline_guard::{AccessService, GuardId, ResidenceGuard},
      33              :     timelines_set::{TimelineSetGuard, TimelinesSet},
      34              :     wal_backup::{self, WalBackupTaskHandle},
      35              :     wal_backup_partial::{self, PartialRemoteSegment, RateLimiter},
      36              :     SafeKeeperConf,
      37              : };
      38              : 
      39              : pub(crate) struct StateSnapshot {
      40              :     // inmem values
      41              :     pub(crate) commit_lsn: Lsn,
      42              :     pub(crate) backup_lsn: Lsn,
      43              :     pub(crate) remote_consistent_lsn: Lsn,
      44              : 
      45              :     // persistent control file values
      46              :     pub(crate) cfile_peer_horizon_lsn: Lsn,
      47              :     pub(crate) cfile_remote_consistent_lsn: Lsn,
      48              :     pub(crate) cfile_backup_lsn: Lsn,
      49              : 
      50              :     // latest state
      51              :     pub(crate) flush_lsn: Lsn,
      52              :     pub(crate) last_log_term: Term,
      53              : 
      54              :     // misc
      55              :     pub(crate) cfile_last_persist_at: std::time::Instant,
      56              :     pub(crate) inmem_flush_pending: bool,
      57              :     pub(crate) wal_removal_on_hold: bool,
      58              :     pub(crate) peers: Vec<PeerInfo>,
      59              : }
      60              : 
      61              : impl StateSnapshot {
      62              :     /// Create a new snapshot of the timeline state.
      63            0 :     fn new(read_guard: ReadGuardSharedState, heartbeat_timeout: Duration) -> Self {
      64            0 :         let state = read_guard.sk.state();
      65            0 :         Self {
      66            0 :             commit_lsn: state.inmem.commit_lsn,
      67            0 :             backup_lsn: state.inmem.backup_lsn,
      68            0 :             remote_consistent_lsn: state.inmem.remote_consistent_lsn,
      69            0 :             cfile_peer_horizon_lsn: state.peer_horizon_lsn,
      70            0 :             cfile_remote_consistent_lsn: state.remote_consistent_lsn,
      71            0 :             cfile_backup_lsn: state.backup_lsn,
      72            0 :             flush_lsn: read_guard.sk.flush_lsn(),
      73            0 :             last_log_term: read_guard.sk.last_log_term(),
      74            0 :             cfile_last_persist_at: state.pers.last_persist_at(),
      75            0 :             inmem_flush_pending: Self::has_unflushed_inmem_state(state),
      76            0 :             wal_removal_on_hold: read_guard.wal_removal_on_hold,
      77            0 :             peers: read_guard.get_peers(heartbeat_timeout),
      78            0 :         }
      79            0 :     }
      80              : 
      81            0 :     fn has_unflushed_inmem_state(state: &TimelineState<FileStorage>) -> bool {
      82            0 :         state.inmem.commit_lsn > state.commit_lsn
      83            0 :             || state.inmem.backup_lsn > state.backup_lsn
      84            0 :             || state.inmem.peer_horizon_lsn > state.peer_horizon_lsn
      85            0 :             || state.inmem.remote_consistent_lsn > state.remote_consistent_lsn
      86            0 :     }
      87              : }
      88              : 
      89              : /// Control how often the manager task should wake up to check updates.
      90              : /// There is no need to check for updates more often than this.
      91              : const REFRESH_INTERVAL: Duration = Duration::from_millis(300);
      92              : 
      93              : pub enum ManagerCtlMessage {
      94              :     /// Request to get a guard for WalResidentTimeline, with WAL files available locally.
      95              :     GuardRequest(tokio::sync::oneshot::Sender<anyhow::Result<ResidenceGuard>>),
      96              :     /// Request to drop the guard.
      97              :     GuardDrop(GuardId),
      98              : }
      99              : 
     100              : impl std::fmt::Debug for ManagerCtlMessage {
     101            0 :     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
     102            0 :         match self {
     103            0 :             ManagerCtlMessage::GuardRequest(_) => write!(f, "GuardRequest"),
     104            0 :             ManagerCtlMessage::GuardDrop(id) => write!(f, "GuardDrop({:?})", id),
     105              :         }
     106            0 :     }
     107              : }
     108              : 
     109              : pub struct ManagerCtl {
     110              :     manager_tx: tokio::sync::mpsc::UnboundedSender<ManagerCtlMessage>,
     111              : 
     112              :     // this is used to initialize manager, it will be moved out in bootstrap().
     113              :     init_manager_rx:
     114              :         std::sync::Mutex<Option<tokio::sync::mpsc::UnboundedReceiver<ManagerCtlMessage>>>,
     115              : }
     116              : 
     117              : impl Default for ManagerCtl {
     118            0 :     fn default() -> Self {
     119            0 :         Self::new()
     120            0 :     }
     121              : }
     122              : 
     123              : impl ManagerCtl {
     124            0 :     pub fn new() -> Self {
     125            0 :         let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
     126            0 :         Self {
     127            0 :             manager_tx: tx,
     128            0 :             init_manager_rx: std::sync::Mutex::new(Some(rx)),
     129            0 :         }
     130            0 :     }
     131              : 
     132              :     /// Issue a new guard and wait for manager to prepare the timeline.
     133              :     /// Sends a message to the manager and waits for the response.
     134              :     /// Can be blocked indefinitely if the manager is stuck.
     135            0 :     pub async fn wal_residence_guard(&self) -> anyhow::Result<ResidenceGuard> {
     136            0 :         let (tx, rx) = tokio::sync::oneshot::channel();
     137            0 :         self.manager_tx.send(ManagerCtlMessage::GuardRequest(tx))?;
     138              : 
     139              :         // wait for the manager to respond with the guard
     140            0 :         rx.await
     141            0 :             .map_err(|e| anyhow::anyhow!("response read fail: {:?}", e))
     142            0 :             .and_then(std::convert::identity)
     143            0 :     }
     144              : 
     145              :     /// Must be called exactly once to bootstrap the manager.
     146            0 :     pub fn bootstrap_manager(
     147            0 :         &self,
     148            0 :     ) -> (
     149            0 :         tokio::sync::mpsc::UnboundedSender<ManagerCtlMessage>,
     150            0 :         tokio::sync::mpsc::UnboundedReceiver<ManagerCtlMessage>,
     151            0 :     ) {
     152            0 :         let rx = self
     153            0 :             .init_manager_rx
     154            0 :             .lock()
     155            0 :             .expect("mutex init_manager_rx poisoned")
     156            0 :             .take()
     157            0 :             .expect("manager already bootstrapped");
     158            0 : 
     159            0 :         (self.manager_tx.clone(), rx)
     160            0 :     }
     161              : }
     162              : 
     163              : pub(crate) struct Manager {
     164              :     // configuration & dependencies
     165              :     pub(crate) tli: ManagerTimeline,
     166              :     pub(crate) conf: SafeKeeperConf,
     167              :     pub(crate) wal_seg_size: usize,
     168              :     pub(crate) walsenders: Arc<WalSenders>,
     169              : 
     170              :     // current state
     171              :     pub(crate) state_version_rx: tokio::sync::watch::Receiver<usize>,
     172              :     pub(crate) num_computes_rx: tokio::sync::watch::Receiver<usize>,
     173              :     pub(crate) tli_broker_active: TimelineSetGuard,
     174              :     pub(crate) last_removed_segno: XLogSegNo,
     175              :     pub(crate) is_offloaded: bool,
     176              : 
     177              :     // background tasks
     178              :     pub(crate) backup_task: Option<WalBackupTaskHandle>,
     179              :     pub(crate) recovery_task: Option<JoinHandle<()>>,
     180              :     pub(crate) wal_removal_task: Option<JoinHandle<anyhow::Result<u64>>>,
     181              : 
     182              :     // partial backup
     183              :     pub(crate) partial_backup_task: Option<JoinHandle<Option<PartialRemoteSegment>>>,
     184              :     pub(crate) partial_backup_uploaded: Option<PartialRemoteSegment>,
     185              : 
     186              :     // misc
     187              :     pub(crate) access_service: AccessService,
     188              :     pub(crate) partial_backup_rate_limiter: RateLimiter,
     189              : 
     190              :     // Anti-flapping state: we evict timelines eagerly if they are inactive, but should not
     191              :     // evict them if they go inactive very soon after being restored.
     192              :     pub(crate) resident_since: std::time::Instant,
     193              : }
     194              : 
     195              : /// This task gets spawned alongside each timeline and is responsible for managing the timeline's
     196              : /// background tasks.
     197              : /// Be careful, this task is not respawned on panic, so it should not panic.
     198            0 : #[instrument(name = "manager", skip_all, fields(ttid = %tli.ttid))]
     199              : pub async fn main_task(
     200              :     tli: ManagerTimeline,
     201              :     conf: SafeKeeperConf,
     202              :     broker_active_set: Arc<TimelinesSet>,
     203              :     manager_tx: tokio::sync::mpsc::UnboundedSender<ManagerCtlMessage>,
     204              :     mut manager_rx: tokio::sync::mpsc::UnboundedReceiver<ManagerCtlMessage>,
     205              :     partial_backup_rate_limiter: RateLimiter,
     206              : ) {
     207              :     tli.set_status(Status::Started);
     208              : 
     209              :     let defer_tli = tli.tli.clone();
     210              :     scopeguard::defer! {
     211              :         if defer_tli.is_cancelled() {
     212              :             info!("manager task finished");
     213              :         } else {
     214              :             warn!("manager task finished prematurely");
     215              :         }
     216              :     };
     217              : 
     218              :     let mut mgr = Manager::new(
     219              :         tli,
     220              :         conf,
     221              :         broker_active_set,
     222              :         manager_tx,
     223              :         partial_backup_rate_limiter,
     224              :     )
     225              :     .await;
     226              : 
     227              :     // Start recovery task which always runs on the timeline.
     228              :     if !mgr.is_offloaded && mgr.conf.peer_recovery_enabled {
     229              :         let tli = mgr.wal_resident_timeline();
     230              :         mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone())));
     231              :     }
     232              : 
     233              :     let last_state = 'outer: loop {
     234              :         MANAGER_ITERATIONS_TOTAL.inc();
     235              : 
     236              :         mgr.set_status(Status::StateSnapshot);
     237              :         let state_snapshot = mgr.state_snapshot().await;
     238              : 
     239              :         let mut next_event: Option<Instant> = None;
     240              :         if !mgr.is_offloaded {
     241              :             let num_computes = *mgr.num_computes_rx.borrow();
     242              : 
     243              :             mgr.set_status(Status::UpdateBackup);
     244              :             let is_wal_backup_required = mgr.update_backup(num_computes, &state_snapshot).await;
     245              :             mgr.update_is_active(is_wal_backup_required, num_computes, &state_snapshot);
     246              : 
     247              :             mgr.set_status(Status::UpdateControlFile);
     248              :             mgr.update_control_file_save(&state_snapshot, &mut next_event)
     249              :                 .await;
     250              : 
     251              :             mgr.set_status(Status::UpdateWalRemoval);
     252              :             mgr.update_wal_removal(&state_snapshot).await;
     253              : 
     254              :             mgr.set_status(Status::UpdatePartialBackup);
     255              :             mgr.update_partial_backup(&state_snapshot).await;
     256              : 
     257              :             if mgr.conf.enable_offload && mgr.ready_for_eviction(&next_event, &state_snapshot) {
     258              :                 mgr.set_status(Status::EvictTimeline);
     259              :                 mgr.evict_timeline().await;
     260              :             }
     261              :         }
     262              : 
     263              :         mgr.set_status(Status::Wait);
     264              :         // wait until something changes. tx channels are stored under Arc, so they will not be
     265              :         // dropped until the manager task is finished.
     266              :         tokio::select! {
     267              :             _ = mgr.tli.cancel.cancelled() => {
     268              :                 // timeline was deleted
     269              :                 break 'outer state_snapshot;
     270              :             }
     271            0 :             _ = async {
     272            0 :                 // don't wake up on every state change, but at most every REFRESH_INTERVAL
     273            0 :                 tokio::time::sleep(REFRESH_INTERVAL).await;
     274            0 :                 let _ = mgr.state_version_rx.changed().await;
     275            0 :             } => {
     276              :                 // state was updated
     277              :             }
     278              :             _ = mgr.num_computes_rx.changed() => {
     279              :                 // number of connected computes was updated
     280              :             }
     281              :             _ = sleep_until(&next_event) => {
     282              :                 // we were waiting for some event (e.g. cfile save)
     283              :             }
     284              :             res = await_task_finish(&mut mgr.wal_removal_task) => {
     285              :                 // WAL removal task finished
     286              :                 mgr.wal_removal_task = None;
     287              :                 mgr.update_wal_removal_end(res);
     288              :             }
     289              :             res = await_task_finish(&mut mgr.partial_backup_task) => {
     290              :                 // partial backup task finished
     291              :                 mgr.partial_backup_task = None;
     292              :                 mgr.update_partial_backup_end(res);
     293              :             }
     294              : 
     295              :             msg = manager_rx.recv() => {
     296              :                 mgr.set_status(Status::HandleMessage);
     297              :                 mgr.handle_message(msg).await;
     298              :             }
     299              :         }
     300              :     };
     301              :     mgr.set_status(Status::Exiting);
     302              : 
     303              :     // remove timeline from the broker active set sooner, before waiting for background tasks
     304              :     mgr.tli_broker_active.set(false);
     305              : 
     306              :     // shutdown background tasks
     307              :     if mgr.conf.is_wal_backup_enabled() {
     308              :         wal_backup::update_task(&mut mgr, false, &last_state).await;
     309              :     }
     310              : 
     311              :     if let Some(recovery_task) = &mut mgr.recovery_task {
     312              :         if let Err(e) = recovery_task.await {
     313              :             warn!("recovery task failed: {:?}", e);
     314              :         }
     315              :     }
     316              : 
     317              :     if let Some(partial_backup_task) = &mut mgr.partial_backup_task {
     318              :         if let Err(e) = partial_backup_task.await {
     319              :             warn!("partial backup task failed: {:?}", e);
     320              :         }
     321              :     }
     322              : 
     323              :     if let Some(wal_removal_task) = &mut mgr.wal_removal_task {
     324              :         let res = wal_removal_task.await;
     325              :         mgr.update_wal_removal_end(res);
     326              :     }
     327              : 
     328              :     mgr.set_status(Status::Finished);
     329              : }
     330              : 
     331              : impl Manager {
     332            0 :     async fn new(
     333            0 :         tli: ManagerTimeline,
     334            0 :         conf: SafeKeeperConf,
     335            0 :         broker_active_set: Arc<TimelinesSet>,
     336            0 :         manager_tx: tokio::sync::mpsc::UnboundedSender<ManagerCtlMessage>,
     337            0 :         partial_backup_rate_limiter: RateLimiter,
     338            0 :     ) -> Manager {
     339            0 :         let (is_offloaded, partial_backup_uploaded) = tli.bootstrap_mgr().await;
     340              :         Manager {
     341            0 :             conf,
     342            0 :             wal_seg_size: tli.get_wal_seg_size().await,
     343            0 :             walsenders: tli.get_walsenders().clone(),
     344            0 :             state_version_rx: tli.get_state_version_rx(),
     345            0 :             num_computes_rx: tli.get_walreceivers().get_num_rx(),
     346            0 :             tli_broker_active: broker_active_set.guard(tli.clone()),
     347            0 :             last_removed_segno: 0,
     348            0 :             is_offloaded,
     349            0 :             backup_task: None,
     350            0 :             recovery_task: None,
     351            0 :             wal_removal_task: None,
     352            0 :             partial_backup_task: None,
     353            0 :             partial_backup_uploaded,
     354            0 :             access_service: AccessService::new(manager_tx),
     355            0 :             tli,
     356            0 :             partial_backup_rate_limiter,
     357            0 :             resident_since: std::time::Instant::now(),
     358            0 :         }
     359            0 :     }
     360              : 
     361            0 :     fn set_status(&self, status: Status) {
     362            0 :         self.tli.set_status(status);
     363            0 :     }
     364              : 
     365              :     /// Get a WalResidentTimeline.
     366              :     /// Manager code must use this function instead of one from `Timeline`
     367              :     /// directly, because it will deadlock.
     368            0 :     pub(crate) fn wal_resident_timeline(&mut self) -> WalResidentTimeline {
     369            0 :         assert!(!self.is_offloaded);
     370            0 :         let guard = self.access_service.create_guard();
     371            0 :         WalResidentTimeline::new(self.tli.clone(), guard)
     372            0 :     }
     373              : 
     374              :     /// Get a snapshot of the timeline state.
     375            0 :     async fn state_snapshot(&self) -> StateSnapshot {
     376            0 :         let _timer = MISC_OPERATION_SECONDS
     377            0 :             .with_label_values(&["state_snapshot"])
     378            0 :             .start_timer();
     379            0 : 
     380            0 :         StateSnapshot::new(
     381            0 :             self.tli.read_shared_state().await,
     382            0 :             self.conf.heartbeat_timeout,
     383            0 :         )
     384            0 :     }
     385              : 
     386              :     /// Spawns/kills backup task and returns true if backup is required.
     387            0 :     async fn update_backup(&mut self, num_computes: usize, state: &StateSnapshot) -> bool {
     388            0 :         let is_wal_backup_required =
     389            0 :             wal_backup::is_wal_backup_required(self.wal_seg_size, num_computes, state);
     390            0 : 
     391            0 :         if self.conf.is_wal_backup_enabled() {
     392            0 :             wal_backup::update_task(self, is_wal_backup_required, state).await;
     393            0 :         }
     394              : 
     395              :         // update the state in Arc<Timeline>
     396            0 :         self.tli.wal_backup_active.store(
     397            0 :             self.backup_task.is_some(),
     398            0 :             std::sync::atomic::Ordering::Relaxed,
     399            0 :         );
     400            0 :         is_wal_backup_required
     401            0 :     }
     402              : 
     403              :     /// Update is_active flag and returns its value.
     404            0 :     fn update_is_active(
     405            0 :         &mut self,
     406            0 :         is_wal_backup_required: bool,
     407            0 :         num_computes: usize,
     408            0 :         state: &StateSnapshot,
     409            0 :     ) {
     410            0 :         let is_active = is_wal_backup_required
     411            0 :             || num_computes > 0
     412            0 :             || state.remote_consistent_lsn < state.commit_lsn;
     413              : 
     414              :         // update the broker timeline set
     415            0 :         if self.tli_broker_active.set(is_active) {
     416              :             // write log if state has changed
     417            0 :             info!(
     418            0 :                 "timeline active={} now, remote_consistent_lsn={}, commit_lsn={}",
     419              :                 is_active, state.remote_consistent_lsn, state.commit_lsn,
     420              :             );
     421              : 
     422            0 :             MANAGER_ACTIVE_CHANGES.inc();
     423            0 :         }
     424              : 
     425              :         // update the state in Arc<Timeline>
     426            0 :         self.tli
     427            0 :             .broker_active
     428            0 :             .store(is_active, std::sync::atomic::Ordering::Relaxed);
     429            0 :     }
     430              : 
     431              :     /// Save control file if needed. Returns Instant if we should persist the control file in the future.
     432            0 :     async fn update_control_file_save(
     433            0 :         &self,
     434            0 :         state: &StateSnapshot,
     435            0 :         next_event: &mut Option<Instant>,
     436            0 :     ) {
     437            0 :         if !state.inmem_flush_pending {
     438            0 :             return;
     439            0 :         }
     440            0 : 
     441            0 :         if state.cfile_last_persist_at.elapsed() > self.conf.control_file_save_interval {
     442            0 :             let mut write_guard = self.tli.write_shared_state().await;
     443              :             // it should be done in the background because it blocks manager task, but flush() should
     444              :             // be fast enough not to be a problem now
     445            0 :             if let Err(e) = write_guard.sk.state_mut().flush().await {
     446            0 :                 warn!("failed to save control file: {:?}", e);
     447            0 :             }
     448            0 :         } else {
     449            0 :             // we should wait until some time passed until the next save
     450            0 :             update_next_event(
     451            0 :                 next_event,
     452            0 :                 (state.cfile_last_persist_at + self.conf.control_file_save_interval).into(),
     453            0 :             );
     454            0 :         }
     455            0 :     }
     456              : 
     457              :     /// Spawns WAL removal task if needed.
     458            0 :     async fn update_wal_removal(&mut self, state: &StateSnapshot) {
     459            0 :         if self.wal_removal_task.is_some() || state.wal_removal_on_hold {
     460              :             // WAL removal is already in progress or hold off
     461            0 :             return;
     462            0 :         }
     463              : 
     464              :         // If enabled, we use LSN of the most lagging walsender as a WAL removal horizon.
     465              :         // This allows to get better read speed for pageservers that are lagging behind,
     466              :         // at the cost of keeping more WAL on disk.
     467            0 :         let replication_horizon_lsn = if self.conf.walsenders_keep_horizon {
     468            0 :             self.walsenders.laggard_lsn()
     469              :         } else {
     470            0 :             None
     471              :         };
     472              : 
     473            0 :         let removal_horizon_lsn = calc_horizon_lsn(state, replication_horizon_lsn);
     474            0 :         let removal_horizon_segno = removal_horizon_lsn
     475            0 :             .segment_number(self.wal_seg_size)
     476            0 :             .saturating_sub(1);
     477            0 : 
     478            0 :         if removal_horizon_segno > self.last_removed_segno {
     479              :             // we need to remove WAL
     480            0 :             let remover = match self.tli.read_shared_state().await.sk {
     481            0 :                 StateSK::Loaded(ref sk) => {
     482            0 :                     crate::wal_storage::Storage::remove_up_to(&sk.wal_store, removal_horizon_segno)
     483              :                 }
     484              :                 StateSK::Offloaded(_) => {
     485              :                     // we can't remove WAL if it's not loaded
     486            0 :                     warn!("unexpectedly trying to run WAL removal on offloaded timeline");
     487            0 :                     return;
     488              :                 }
     489            0 :                 StateSK::Empty => unreachable!(),
     490              :             };
     491              : 
     492            0 :             self.wal_removal_task = Some(tokio::spawn(
     493            0 :                 async move {
     494            0 :                     remover.await?;
     495            0 :                     Ok(removal_horizon_segno)
     496            0 :                 }
     497            0 :                 .instrument(info_span!("WAL removal", ttid=%self.tli.ttid)),
     498              :             ));
     499            0 :         }
     500            0 :     }
     501              : 
     502              :     /// Update the state after WAL removal task finished.
     503            0 :     fn update_wal_removal_end(&mut self, res: Result<anyhow::Result<u64>, JoinError>) {
     504            0 :         let new_last_removed_segno = match res {
     505            0 :             Ok(Ok(segno)) => segno,
     506            0 :             Err(e) => {
     507            0 :                 warn!("WAL removal task failed: {:?}", e);
     508            0 :                 return;
     509              :             }
     510            0 :             Ok(Err(e)) => {
     511            0 :                 warn!("WAL removal task failed: {:?}", e);
     512            0 :                 return;
     513              :             }
     514              :         };
     515              : 
     516            0 :         self.last_removed_segno = new_last_removed_segno;
     517            0 :         // update the state in Arc<Timeline>
     518            0 :         self.tli
     519            0 :             .last_removed_segno
     520            0 :             .store(new_last_removed_segno, std::sync::atomic::Ordering::Relaxed);
     521            0 :     }
     522              : 
     523              :     /// Spawns partial WAL backup task if needed.
     524            0 :     async fn update_partial_backup(&mut self, state: &StateSnapshot) {
     525            0 :         // check if partial backup is enabled and should be started
     526            0 :         if !self.conf.is_wal_backup_enabled() || !self.conf.partial_backup_enabled {
     527            0 :             return;
     528            0 :         }
     529            0 : 
     530            0 :         if self.partial_backup_task.is_some() {
     531              :             // partial backup is already running
     532            0 :             return;
     533            0 :         }
     534            0 : 
     535            0 :         if !wal_backup_partial::needs_uploading(state, &self.partial_backup_uploaded) {
     536              :             // nothing to upload
     537            0 :             return;
     538            0 :         }
     539            0 : 
     540            0 :         // Get WalResidentTimeline and start partial backup task.
     541            0 :         self.partial_backup_task = Some(tokio::spawn(wal_backup_partial::main_task(
     542            0 :             self.wal_resident_timeline(),
     543            0 :             self.conf.clone(),
     544            0 :             self.partial_backup_rate_limiter.clone(),
     545            0 :         )));
     546            0 :     }
     547              : 
     548              :     /// Update the state after partial WAL backup task finished.
     549            0 :     fn update_partial_backup_end(&mut self, res: Result<Option<PartialRemoteSegment>, JoinError>) {
     550            0 :         match res {
     551            0 :             Ok(new_upload_state) => {
     552            0 :                 self.partial_backup_uploaded = new_upload_state;
     553            0 :             }
     554            0 :             Err(e) => {
     555            0 :                 warn!("partial backup task panicked: {:?}", e);
     556              :             }
     557              :         }
     558            0 :     }
     559              : 
     560              :     /// Handle message arrived from ManagerCtl.
     561            0 :     async fn handle_message(&mut self, msg: Option<ManagerCtlMessage>) {
     562            0 :         debug!("received manager message: {:?}", msg);
     563            0 :         match msg {
     564            0 :             Some(ManagerCtlMessage::GuardRequest(tx)) => {
     565            0 :                 if self.is_offloaded {
     566              :                     // trying to unevict timeline, but without gurarantee that it will be successful
     567            0 :                     self.unevict_timeline().await;
     568            0 :                 }
     569              : 
     570            0 :                 let guard = if self.is_offloaded {
     571            0 :                     Err(anyhow::anyhow!("timeline is offloaded, can't get a guard"))
     572              :                 } else {
     573            0 :                     Ok(self.access_service.create_guard())
     574              :                 };
     575              : 
     576            0 :                 if tx.send(guard).is_err() {
     577            0 :                     warn!("failed to reply with a guard, receiver dropped");
     578            0 :                 }
     579              :             }
     580            0 :             Some(ManagerCtlMessage::GuardDrop(guard_id)) => {
     581            0 :                 self.access_service.drop_guard(guard_id);
     582            0 :             }
     583              :             None => {
     584              :                 // can't happen, we're holding the sender
     585            0 :                 unreachable!();
     586              :             }
     587              :         }
     588            0 :     }
     589              : }
     590              : 
     591              : // utility functions
     592            0 : async fn sleep_until(option: &Option<tokio::time::Instant>) {
     593            0 :     if let Some(timeout) = option {
     594            0 :         tokio::time::sleep_until(*timeout).await;
     595              :     } else {
     596            0 :         futures::future::pending::<()>().await;
     597              :     }
     598            0 : }
     599              : 
     600            0 : async fn await_task_finish<T>(option: &mut Option<JoinHandle<T>>) -> Result<T, JoinError> {
     601            0 :     if let Some(task) = option {
     602            0 :         task.await
     603              :     } else {
     604            0 :         futures::future::pending().await
     605              :     }
     606            0 : }
     607              : 
     608              : /// Update next_event if candidate is earlier.
     609            0 : fn update_next_event(next_event: &mut Option<Instant>, candidate: Instant) {
     610            0 :     if let Some(next) = next_event {
     611            0 :         if candidate < *next {
     612            0 :             *next = candidate;
     613            0 :         }
     614            0 :     } else {
     615            0 :         *next_event = Some(candidate);
     616            0 :     }
     617            0 : }
     618              : 
     619              : #[repr(usize)]
     620            0 : #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
     621              : pub enum Status {
     622              :     NotStarted,
     623              :     Started,
     624              :     StateSnapshot,
     625              :     UpdateBackup,
     626              :     UpdateControlFile,
     627              :     UpdateWalRemoval,
     628              :     UpdatePartialBackup,
     629              :     EvictTimeline,
     630              :     Wait,
     631              :     HandleMessage,
     632              :     Exiting,
     633              :     Finished,
     634              : }
     635              : 
     636              : /// AtomicStatus is a wrapper around AtomicUsize adapted for the Status enum.
     637              : pub struct AtomicStatus {
     638              :     inner: AtomicUsize,
     639              : }
     640              : 
     641              : impl Default for AtomicStatus {
     642            0 :     fn default() -> Self {
     643            0 :         Self::new()
     644            0 :     }
     645              : }
     646              : 
     647              : impl AtomicStatus {
     648            0 :     pub fn new() -> Self {
     649            0 :         AtomicStatus {
     650            0 :             inner: AtomicUsize::new(Status::NotStarted as usize),
     651            0 :         }
     652            0 :     }
     653              : 
     654            0 :     pub fn load(&self, order: std::sync::atomic::Ordering) -> Status {
     655            0 :         // Safety: This line of code uses `std::mem::transmute` to reinterpret the loaded value as `Status`.
     656            0 :         // It is safe to use `transmute` in this context because `Status` is a repr(usize) enum,
     657            0 :         // which means it has the same memory layout as usize.
     658            0 :         // However, it is important to ensure that the loaded value is a valid variant of `Status`,
     659            0 :         // otherwise, the behavior will be undefined.
     660            0 :         unsafe { std::mem::transmute(self.inner.load(order)) }
     661            0 :     }
     662              : 
     663            0 :     pub fn get(&self) -> Status {
     664            0 :         self.load(std::sync::atomic::Ordering::Relaxed)
     665            0 :     }
     666              : 
     667            0 :     pub fn store(&self, val: Status, order: std::sync::atomic::Ordering) {
     668            0 :         self.inner.store(val as usize, order);
     669            0 :     }
     670              : }

Generated by: LCOV version 2.1-beta