LCOV - code coverage report
Current view: top level - storage_scrubber/src - metadata_stream.rs (source / functions) Coverage Total Hit
Test: b9d67f908f91f00e353a27440ba89f642a869959.info Lines: 0.0 % 133 0
Test Date: 2024-11-19 21:44:13 Functions: 0.0 % 15 0

            Line data    Source code
       1              : use std::str::FromStr;
       2              : 
       3              : use anyhow::{anyhow, Context};
       4              : use async_stream::{stream, try_stream};
       5              : use futures::StreamExt;
       6              : use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePath};
       7              : use tokio_stream::Stream;
       8              : 
       9              : use crate::{
      10              :     list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target,
      11              :     TenantShardTimelineId,
      12              : };
      13              : use pageserver_api::shard::TenantShardId;
      14              : use utils::id::{TenantId, TimelineId};
      15              : 
      16              : /// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes
      17            0 : pub fn stream_tenants<'a>(
      18            0 :     remote_client: &'a GenericRemoteStorage,
      19            0 :     target: &'a RootTarget,
      20            0 : ) -> impl Stream<Item = anyhow::Result<TenantShardId>> + 'a {
      21            0 :     try_stream! {
      22            0 :         let tenants_target = target.tenants_root();
      23            0 :         let mut tenants_stream =
      24            0 :             std::pin::pin!(stream_objects_with_retries(remote_client, ListingMode::WithDelimiter, &tenants_target));
      25            0 :         while let Some(chunk) = tenants_stream.next().await {
      26            0 :             let chunk = chunk?;
      27            0 :             let entry_ids = chunk.prefixes.iter()
      28            0 :                 .map(|prefix| prefix.get_path().file_name().ok_or_else(|| anyhow!("no final component in path '{prefix}'")));
      29            0 :             for dir_name_res in entry_ids {
      30            0 :                 let dir_name = dir_name_res?;
      31            0 :                 let id = TenantShardId::from_str(dir_name)?;
      32            0 :                 yield id;
      33            0 :             }
      34            0 :         }
      35            0 :     }
      36            0 : }
      37              : 
      38            0 : pub async fn stream_tenant_shards<'a>(
      39            0 :     remote_client: &'a GenericRemoteStorage,
      40            0 :     target: &'a RootTarget,
      41            0 :     tenant_id: TenantId,
      42            0 : ) -> anyhow::Result<impl Stream<Item = Result<TenantShardId, anyhow::Error>> + 'a> {
      43            0 :     let shards_target = target.tenant_shards_prefix(&tenant_id);
      44            0 : 
      45            0 :     let strip_prefix = target.tenants_root().prefix_in_bucket;
      46            0 :     let prefix_str = &strip_prefix.strip_prefix("/").unwrap_or(&strip_prefix);
      47            0 : 
      48            0 :     tracing::info!("Listing shards in {}", shards_target.prefix_in_bucket);
      49            0 :     let listing =
      50            0 :         list_objects_with_retries(remote_client, ListingMode::WithDelimiter, &shards_target)
      51            0 :             .await?;
      52              : 
      53            0 :     let tenant_shard_ids = listing
      54            0 :         .prefixes
      55            0 :         .iter()
      56            0 :         .map(|prefix| prefix.get_path().as_str())
      57            0 :         .filter_map(|prefix| -> Option<&str> { prefix.strip_prefix(prefix_str) })
      58            0 :         .map(|entry_id_str| {
      59            0 :             let first_part = entry_id_str.split('/').next().unwrap();
      60            0 : 
      61            0 :             first_part
      62            0 :                 .parse::<TenantShardId>()
      63            0 :                 .with_context(|| format!("Incorrect tenant entry id str: {first_part}"))
      64            0 :         })
      65            0 :         .collect::<Vec<_>>();
      66            0 : 
      67            0 :     tracing::debug!("Yielding {} shards for {tenant_id}", tenant_shard_ids.len());
      68            0 :     Ok(stream! {
      69            0 :         for i in tenant_shard_ids {
      70            0 :             let id = i?;
      71            0 :             yield Ok(id);
      72            0 :         }
      73            0 :     })
      74            0 : }
      75              : 
      76              : /// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered
      77              : /// using a listing.
      78              : ///
      79              : /// The listing is done before the stream is built, so that this
      80              : /// function can be used to generate concurrency on a stream using buffer_unordered.
      81            0 : pub async fn stream_tenant_timelines<'a>(
      82            0 :     remote_client: &'a GenericRemoteStorage,
      83            0 :     target: &'a RootTarget,
      84            0 :     tenant: TenantShardId,
      85            0 : ) -> anyhow::Result<impl Stream<Item = Result<TenantShardTimelineId, anyhow::Error>> + 'a> {
      86            0 :     let mut timeline_ids: Vec<Result<TimelineId, anyhow::Error>> = Vec::new();
      87            0 :     let timelines_target = target.timelines_root(&tenant);
      88            0 : 
      89            0 :     let prefix_str = &timelines_target
      90            0 :         .prefix_in_bucket
      91            0 :         .strip_prefix("/")
      92            0 :         .unwrap_or(&timelines_target.prefix_in_bucket);
      93            0 : 
      94            0 :     let mut objects_stream = std::pin::pin!(stream_objects_with_retries(
      95            0 :         remote_client,
      96            0 :         ListingMode::WithDelimiter,
      97            0 :         &timelines_target
      98            0 :     ));
      99              :     loop {
     100            0 :         tracing::debug!("Listing in {tenant}");
     101            0 :         let fetch_response = match objects_stream.next().await {
     102            0 :             None => break,
     103            0 :             Some(Err(e)) => {
     104            0 :                 timeline_ids.push(Err(e));
     105            0 :                 break;
     106              :             }
     107            0 :             Some(Ok(r)) => r,
     108            0 :         };
     109            0 : 
     110            0 :         let new_entry_ids = fetch_response
     111            0 :             .prefixes
     112            0 :             .iter()
     113            0 :             .filter_map(|prefix| -> Option<&str> {
     114            0 :                 prefix.get_path().as_str().strip_prefix(prefix_str)
     115            0 :             })
     116            0 :             .map(|entry_id_str| {
     117            0 :                 let first_part = entry_id_str.split('/').next().unwrap();
     118            0 :                 first_part
     119            0 :                     .parse::<TimelineId>()
     120            0 :                     .with_context(|| format!("Incorrect timeline entry id str: {entry_id_str}"))
     121            0 :             });
     122              : 
     123            0 :         for i in new_entry_ids {
     124            0 :             timeline_ids.push(i);
     125            0 :         }
     126              :     }
     127              : 
     128            0 :     tracing::debug!("Yielding {} timelines for {}", timeline_ids.len(), tenant);
     129            0 :     Ok(stream! {
     130            0 :         for i in timeline_ids {
     131            0 :             let id = i?;
     132            0 :             yield Ok(TenantShardTimelineId::new(tenant, id));
     133            0 :         }
     134            0 :     })
     135            0 : }
     136              : 
     137            0 : pub(crate) fn stream_listing<'a>(
     138            0 :     remote_client: &'a GenericRemoteStorage,
     139            0 :     target: &'a S3Target,
     140            0 : ) -> impl Stream<Item = anyhow::Result<(RemotePath, Option<ListingObject>)>> + 'a {
     141            0 :     let listing_mode = if target.delimiter.is_empty() {
     142            0 :         ListingMode::NoDelimiter
     143              :     } else {
     144            0 :         ListingMode::WithDelimiter
     145              :     };
     146            0 :     try_stream! {
     147            0 :         let mut objects_stream = std::pin::pin!(stream_objects_with_retries(
     148            0 :             remote_client,
     149            0 :             listing_mode,
     150            0 :             target,
     151            0 :         ));
     152            0 :         while let Some(list) = objects_stream.next().await {
     153            0 :             let list = list?;
     154            0 :             if target.delimiter.is_empty() {
     155            0 :                 for key in list.keys {
     156            0 :                     yield (key.key.clone(), Some(key));
     157            0 :                 }
     158            0 :             } else {
     159            0 :                 for key in list.prefixes {
     160            0 :                     yield (key, None);
     161            0 :                 }
     162            0 :             }
     163            0 :         }
     164            0 :     }
     165            0 : }
        

Generated by: LCOV version 2.1-beta