LCOV - code coverage report
Current view: top level - storage_scrubber/src - metadata_stream.rs (source / functions) Coverage Total Hit
Test: ccf45ed1c149555259baec52d6229a81013dcd6a.info Lines: 0.0 % 92 0
Test Date: 2024-08-21 17:32:46 Functions: 0.0 % 15 0

            Line data    Source code
       1              : use std::str::FromStr;
       2              : 
       3              : use anyhow::{anyhow, Context};
       4              : use async_stream::{stream, try_stream};
       5              : use futures::StreamExt;
       6              : use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePath};
       7              : use tokio_stream::Stream;
       8              : 
       9              : use crate::{
      10              :     list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target,
      11              :     TenantShardTimelineId,
      12              : };
      13              : use pageserver_api::shard::TenantShardId;
      14              : use utils::id::{TenantId, TimelineId};
      15              : 
      16              : /// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes
      17            0 : pub fn stream_tenants<'a>(
      18            0 :     remote_client: &'a GenericRemoteStorage,
      19            0 :     target: &'a RootTarget,
      20            0 : ) -> impl Stream<Item = anyhow::Result<TenantShardId>> + 'a {
      21              :     try_stream! {
      22              :         let tenants_target = target.tenants_root();
      23              :         let mut tenants_stream =
      24              :             std::pin::pin!(stream_objects_with_retries(remote_client, ListingMode::WithDelimiter, &tenants_target));
      25              :         while let Some(chunk) = tenants_stream.next().await {
      26              :             let chunk = chunk?;
      27              :             let entry_ids = chunk.prefixes.iter()
      28            0 :                 .map(|prefix| prefix.get_path().file_name().ok_or_else(|| anyhow!("no final component in path '{prefix}'")));
      29              :             for dir_name_res in entry_ids {
      30              :                 let dir_name = dir_name_res?;
      31              :                 let id = TenantShardId::from_str(dir_name)?;
      32              :                 yield id;
      33              :             }
      34              :         }
      35              :     }
      36            0 : }
      37              : 
      38            0 : pub async fn stream_tenant_shards<'a>(
      39            0 :     remote_client: &'a GenericRemoteStorage,
      40            0 :     target: &'a RootTarget,
      41            0 :     tenant_id: TenantId,
      42            0 : ) -> anyhow::Result<impl Stream<Item = Result<TenantShardId, anyhow::Error>> + 'a> {
      43            0 :     let shards_target = target.tenant_shards_prefix(&tenant_id);
      44            0 : 
      45            0 :     let strip_prefix = target.tenants_root().prefix_in_bucket;
      46            0 :     let prefix_str = &strip_prefix.strip_prefix("/").unwrap_or(&strip_prefix);
      47            0 : 
      48            0 :     tracing::info!("Listing shards in {}", shards_target.prefix_in_bucket);
      49            0 :     let listing =
      50            0 :         list_objects_with_retries(remote_client, ListingMode::WithDelimiter, &shards_target)
      51            0 :             .await?;
      52              : 
      53            0 :     let tenant_shard_ids = listing
      54            0 :         .prefixes
      55            0 :         .iter()
      56            0 :         .map(|prefix| prefix.get_path().as_str())
      57            0 :         .filter_map(|prefix| -> Option<&str> { prefix.strip_prefix(prefix_str) })
      58            0 :         .map(|entry_id_str| {
      59            0 :             let first_part = entry_id_str.split('/').next().unwrap();
      60            0 : 
      61            0 :             first_part
      62            0 :                 .parse::<TenantShardId>()
      63            0 :                 .with_context(|| format!("Incorrect entry id str: {first_part}"))
      64            0 :         })
      65            0 :         .collect::<Vec<_>>();
      66            0 : 
      67            0 :     tracing::debug!("Yielding {} shards for {tenant_id}", tenant_shard_ids.len());
      68            0 :     Ok(stream! {
      69              :         for i in tenant_shard_ids {
      70              :             let id = i?;
      71              :             yield Ok(id);
      72              :         }
      73            0 :     })
      74            0 : }
      75              : 
      76              : /// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered
      77              : /// using a listing. The listing is done before the stream is built, so that this
      78              : /// function can be used to generate concurrency on a stream using buffer_unordered.
      79            0 : pub async fn stream_tenant_timelines<'a>(
      80            0 :     remote_client: &'a GenericRemoteStorage,
      81            0 :     target: &'a RootTarget,
      82            0 :     tenant: TenantShardId,
      83            0 : ) -> anyhow::Result<impl Stream<Item = Result<TenantShardTimelineId, anyhow::Error>> + 'a> {
      84            0 :     let mut timeline_ids: Vec<Result<TimelineId, anyhow::Error>> = Vec::new();
      85            0 :     let timelines_target = target.timelines_root(&tenant);
      86            0 : 
      87            0 :     let prefix_str = &timelines_target
      88            0 :         .prefix_in_bucket
      89            0 :         .strip_prefix("/")
      90            0 :         .unwrap_or(&timelines_target.prefix_in_bucket);
      91            0 : 
      92            0 :     let mut objects_stream = std::pin::pin!(stream_objects_with_retries(
      93            0 :         remote_client,
      94            0 :         ListingMode::WithDelimiter,
      95            0 :         &timelines_target
      96            0 :     ));
      97            0 :     loop {
      98            0 :         tracing::debug!("Listing in {tenant}");
      99            0 :         let fetch_response = match objects_stream.next().await {
     100            0 :             None => break,
     101            0 :             Some(Err(e)) => {
     102            0 :                 timeline_ids.push(Err(e));
     103            0 :                 break;
     104              :             }
     105            0 :             Some(Ok(r)) => r,
     106            0 :         };
     107            0 : 
     108            0 :         let new_entry_ids = fetch_response
     109            0 :             .prefixes
     110            0 :             .iter()
     111            0 :             .filter_map(|prefix| -> Option<&str> {
     112            0 :                 prefix.get_path().as_str().strip_prefix(prefix_str)
     113            0 :             })
     114            0 :             .map(|entry_id_str| {
     115            0 :                 entry_id_str
     116            0 :                     .parse::<TimelineId>()
     117            0 :                     .with_context(|| format!("Incorrect entry id str: {entry_id_str}"))
     118            0 :             });
     119              : 
     120            0 :         for i in new_entry_ids {
     121            0 :             timeline_ids.push(i);
     122            0 :         }
     123              :     }
     124              : 
     125            0 :     tracing::debug!("Yielding {} timelines for {}", timeline_ids.len(), tenant);
     126            0 :     Ok(stream! {
     127              :         for i in timeline_ids {
     128              :             let id = i?;
     129              :             yield Ok(TenantShardTimelineId::new(tenant, id));
     130              :         }
     131            0 :     })
     132            0 : }
     133              : 
     134            0 : pub(crate) fn stream_listing<'a>(
     135            0 :     remote_client: &'a GenericRemoteStorage,
     136            0 :     target: &'a S3Target,
     137            0 : ) -> impl Stream<Item = anyhow::Result<(RemotePath, Option<ListingObject>)>> + 'a {
     138            0 :     let listing_mode = if target.delimiter.is_empty() {
     139            0 :         ListingMode::NoDelimiter
     140              :     } else {
     141            0 :         ListingMode::WithDelimiter
     142              :     };
     143              :     try_stream! {
     144              :         let mut objects_stream = std::pin::pin!(stream_objects_with_retries(
     145              :             remote_client,
     146              :             listing_mode,
     147              :             target,
     148              :         ));
     149              :         while let Some(list) = objects_stream.next().await {
     150              :             let list = list?;
     151              :             if target.delimiter.is_empty() {
     152              :                 for key in list.keys {
     153              :                     yield (key.key.clone(), Some(key));
     154              :                 }
     155              :             } else {
     156              :                 for key in list.prefixes {
     157              :                     yield (key, None);
     158              :                 }
     159              :             }
     160              :         }
     161              :     }
     162            0 : }
        

Generated by: LCOV version 2.1-beta