       1              : //! Implementation of append-only file data structure
       2              : //! used to keep in-memory layers spilled on disk.
       3              : 
       4              : use std::io;
       5              : use std::sync::Arc;
       6              : use std::sync::atomic::AtomicU64;
       7              : 
       8              : use camino::Utf8PathBuf;
       9              : use num_traits::Num;
      10              : use pageserver_api::shard::TenantShardId;
      11              : use tokio_epoll_uring::{BoundedBuf, Slice};
      12              : use tracing::{error, info_span};
      13              : use utils::id::TimelineId;
      14              : 
      15              : use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64};
      16              : use crate::config::PageServerConf;
      17              : use crate::context::RequestContext;
      18              : use crate::page_cache;
      19              : use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
      20              : use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
      21              : use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
      22              : use crate::virtual_file::owned_buffers_io::write::Buffer;
      23              : use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io};
      24              : 
      25              : pub struct EphemeralFile {
      26              :     _tenant_shard_id: TenantShardId,
      27              :     _timeline_id: TimelineId,
      28              :     page_cache_file_id: page_cache::FileId,
      29              :     bytes_written: u64,
      30              :     buffered_writer: owned_buffers_io::write::BufferedWriter<IoBufferMut, VirtualFile>,
      31              :     /// Gate guard is held on as long as we need to do operations in the path (delete on drop)
      32              :     _gate_guard: utils::sync::gate::GateGuard,
      33              : }
      34              : 
      35              : const TAIL_SZ: usize = 64 * 1024;
      36              : 
      37              : impl EphemeralFile {
      38         2632 :     pub async fn create(
      39         2632 :         conf: &PageServerConf,
      40         2632 :         tenant_shard_id: TenantShardId,
      41         2632 :         timeline_id: TimelineId,
      42         2632 :         gate: &utils::sync::gate::Gate,
      43         2632 :         ctx: &RequestContext,
      44         2632 :     ) -> anyhow::Result<EphemeralFile> {
      45              :         static NEXT_FILENAME: AtomicU64 = AtomicU64::new(1);
      46         2632 :         let filename_disambiguator =
      47         2632 :             NEXT_FILENAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
      48         2632 : 
      49         2632 :         let filename = conf
      50         2632 :             .timeline_path(&tenant_shard_id, &timeline_id)
      51         2632 :             .join(Utf8PathBuf::from(format!(
      52         2632 :                 "ephemeral-{filename_disambiguator}"
      53         2632 :             )));
      54              : 
      55         2632 :         let file = Arc::new(
      56         2632 :             VirtualFile::open_with_options_v2(
      57         2632 :                 &filename,
      58         2632 :                 virtual_file::OpenOptions::new()
      59         2632 :                     .read(true)
      60         2632 :                     .write(true)
      61         2632 :                     .create(true),
      62         2632 :                 ctx,
      63         2632 :             )
      64         2632 :             .await?,
      65              :         );
      66              : 
      67         2632 :         let page_cache_file_id = page_cache::next_file_id(); // XXX get rid, we're not page-caching anymore
      68         2632 : 
      69         2632 :         Ok(EphemeralFile {
      70         2632 :             _tenant_shard_id: tenant_shard_id,
      71         2632 :             _timeline_id: timeline_id,
      72         2632 :             page_cache_file_id,
      73         2632 :             bytes_written: 0,
      74         2632 :             buffered_writer: owned_buffers_io::write::BufferedWriter::new(
      75         2632 :                 file,
      76         5264 :                 || IoBufferMut::with_capacity(TAIL_SZ),
      77         2632 :                 gate.enter()?,
      78         2632 :                 ctx,
      79         2632 :                 info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename),
      80              :             ),
      81         2632 :             _gate_guard: gate.enter()?,
      82              :         })
      83         2632 :     }
      84              : }
      85              : 
      86              : impl Drop for EphemeralFile {
      87         2380 :     fn drop(&mut self) {
      88         2380 :         // unlink the file
      89         2380 :         // we are clear to do this, because we have entered a gate
      90         2380 :         let path = self.buffered_writer.as_inner().path();
      91         2380 :         let res = std::fs::remove_file(path);
      92         2380 :         if let Err(e) = res {
      93            4 :             if e.kind() != std::io::ErrorKind::NotFound {
      94              :                 // just never log the not found errors, we cannot do anything for them; on detach
      95              :                 // the tenant directory is already gone.
      96              :                 //
      97              :                 // not found files might also be related to
      98            0 :                 error!("could not remove ephemeral file '{path}': {e}");
      99            4 :             }
     100         2376 :         }
     101         2380 :     }
     102              : }
     103              : 
     104              : impl EphemeralFile {
     105     19226220 :     pub(crate) fn len(&self) -> u64 {
     106     19226220 :         self.bytes_written
     107     19226220 :     }
     108              : 
     109         2616 :     pub(crate) fn page_cache_file_id(&self) -> page_cache::FileId {
     110         2616 :         self.page_cache_file_id
     111         2616 :     }
     112              : 
     113         1940 :     pub(crate) async fn load_to_io_buf(
     114         1940 :         &self,
     115         1940 :         ctx: &RequestContext,
     116         1940 :     ) -> Result<IoBufferMut, io::Error> {
     117         1940 :         let size = self.len().into_usize();
     118         1940 :         let buf = IoBufferMut::with_capacity(size);
     119         1940 :         let (slice, nread) = self.read_exact_at_eof_ok(0, buf.slice_full(), ctx).await?;
     120         1940 :         assert_eq!(nread, size);
     121         1940 :         let buf = slice.into_inner();
     122         1940 :         assert_eq!(buf.len(), nread);
     123         1940 :         assert_eq!(buf.capacity(), size, "we shouldn't be reallocating");
     124         1940 :         Ok(buf)
     125         1940 :     }
     126              : 
     127              :     /// Returns the offset at which the first byte of the input was written, for use
     128              :     /// in constructing indices over the written value.
     129              :     ///
     130              :     /// Panics if the write is short because there's no way we can recover from that.
     131              :     /// TODO: make upstack handle this as an error.
     132      9609764 :     pub(crate) async fn write_raw(
     133      9609764 :         &mut self,
     134      9609764 :         srcbuf: &[u8],
     135      9609764 :         ctx: &RequestContext,
     136      9609764 :     ) -> std::io::Result<u64> {
     137      9609764 :         let (pos, control) = self.write_raw_controlled(srcbuf, ctx).await?;
     138      9609764 :         if let Some(control) = control {
     139        11068 :             control.release().await;
     140      9598696 :         }
     141      9609764 :         Ok(pos)
     142      9609764 :     }
     143              : 
     144      9609768 :     async fn write_raw_controlled(
     145      9609768 :         &mut self,
     146      9609768 :         srcbuf: &[u8],
     147      9609768 :         ctx: &RequestContext,
     148      9609768 :     ) -> std::io::Result<(u64, Option<owned_buffers_io::write::FlushControl>)> {
     149      9609768 :         let pos = self.bytes_written;
     150              : 
     151      9609768 :         let new_bytes_written = pos.checked_add(srcbuf.len().into_u64()).ok_or_else(|| {
     152            0 :             std::io::Error::new(
     153            0 :                 std::io::ErrorKind::Other,
     154            0 :                 format!(
     155            0 :                     "write would grow EphemeralFile beyond u64::MAX: len={pos} writen={srcbuf_len}",
     156            0 :                     srcbuf_len = srcbuf.len(),
     157            0 :                 ),
     158            0 :             )
     159      9609768 :         })?;
     160              : 
     161              :         // Write the payload
     162      9609768 :         let (nwritten, control) = self
     163      9609768 :             .buffered_writer
     164      9609768 :             .write_buffered_borrowed_controlled(srcbuf, ctx)
     165      9609768 :             .await?;
     166      9609768 :         assert_eq!(
     167      9609768 :             nwritten,
     168      9609768 :             srcbuf.len(),
     169            0 :             "buffered writer has no short writes"
     170              :         );
     171              : 
     172      9609768 :         self.bytes_written = new_bytes_written;
     173      9609768 : 
     174      9609768 :         Ok((pos, control))
     175      9609768 :     }
     176              : }
     177              : 
     178              : impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
     179       997837 :     async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
     180       997837 :         &self,
     181       997837 :         start: u64,
     182       997837 :         dst: tokio_epoll_uring::Slice<B>,
     183       997837 :         ctx: &RequestContext,
     184       997837 :     ) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {
     185       997837 :         let submitted_offset = self.buffered_writer.bytes_submitted();
     186       997837 : 
     187       997837 :         let mutable = self.buffered_writer.inspect_mutable();
     188       997837 :         let mutable = &mutable[0..mutable.pending()];
     189       997837 : 
     190       997837 :         let maybe_flushed = self.buffered_writer.inspect_maybe_flushed();
     191       997837 : 
     192       997837 :         let dst_cap = dst.bytes_total().into_u64();
     193       997837 :         let end = {
     194              :             // saturating_add is correct here because the max file size is u64::MAX, so,
     195              :             // if start + dst.len() > u64::MAX, then we know it will be a short read
     196       997837 :             let mut end: u64 = start.saturating_add(dst_cap);
     197       997837 :             if end > self.bytes_written {
     198       554384 :                 end = self.bytes_written;
     199       554384 :             }
     200       997837 :             end
     201              :         };
     202              : 
     203              :         // inclusive, exclusive
     204              :         #[derive(Debug)]
     205              :         struct Range<N>(N, N);
     206              :         impl<N: Num + Clone + Copy + PartialOrd + Ord> Range<N> {
     207      6615040 :             fn len(&self) -> N {
     208      6615040 :                 if self.0 > self.1 {
     209      3550383 :                     N::zero()
     210              :                 } else {
     211      3064657 :                     self.1 - self.0
     212              :                 }
     213      6615040 :             }
     214              :         }
     215              : 
     216       997837 :         let (written_range, maybe_flushed_range) = {
     217       997837 :             if maybe_flushed.is_some() {
     218              :                 // [       written       ][ maybe_flushed ][    mutable    ]
     219              :                 //                        <-   TAIL_SZ   -><-   TAIL_SZ   ->
     220              :                 //                                         ^
     221              :                 //                                 `submitted_offset`
     222              :                 // <++++++ on disk +++++++????????????????>
     223       977127 :                 (
     224       977127 :                     Range(
     225       977127 :                         start,
     226       977127 :                         std::cmp::min(end, submitted_offset.saturating_sub(TAIL_SZ as u64)),
     227       977127 :                     ),
     228       977127 :                     Range(
     229       977127 :                         std::cmp::max(start, submitted_offset.saturating_sub(TAIL_SZ as u64)),
     230       977127 :                         std::cmp::min(end, submitted_offset),
     231       977127 :                     ),
     232       977127 :                 )
     233              :             } else {
     234              :                 // [       written                        ][    mutable    ]
     235              :                 //                                         <-   TAIL_SZ   ->
     236              :                 //                                         ^
     237              :                 //                                 `submitted_offset`
     238              :                 // <++++++ on disk +++++++++++++++++++++++>
     239        20710 :                 (
     240        20710 :                     Range(start, std::cmp::min(end, submitted_offset)),
     241        20710 :                     // zero len
     242        20710 :                     Range(submitted_offset, u64::MIN),
     243        20710 :                 )
     244              :             }
     245              :         };
     246              : 
     247       997837 :         let mutable_range = Range(std::cmp::max(start, submitted_offset), end);
     248              : 
     249       997837 :         let dst = if written_range.len() > 0 {
     250        20335 :             let file: &VirtualFile = self.buffered_writer.as_inner();
     251        20335 :             let bounds = dst.bounds();
     252        20335 :             let slice = file
     253        20335 :                 .read_exact_at(dst.slice(0..written_range.len().into_usize()), start, ctx)
     254        20335 :                 .await?;
     255        20335 :             Slice::from_buf_bounds(Slice::into_inner(slice), bounds)
     256              :         } else {
     257       977502 :             dst
     258              :         };
     259              : 
     260       997837 :         let dst = if maybe_flushed_range.len() > 0 {
     261       321206 :             let offset_in_buffer = maybe_flushed_range
     262       321206 :                 .0
     263       321206 :                 .checked_sub(submitted_offset.saturating_sub(TAIL_SZ as u64))
     264       321206 :                 .unwrap()
     265       321206 :                 .into_usize();
     266       321206 :             // Checked previously the buffer is Some.
     267       321206 :             let maybe_flushed = maybe_flushed.unwrap();
     268       321206 :             let to_copy = &maybe_flushed
     269       321206 :                 [offset_in_buffer..(offset_in_buffer + maybe_flushed_range.len().into_usize())];
     270       321206 :             let bounds = dst.bounds();
     271       321206 :             let mut view = dst.slice({
     272       321206 :                 let start = written_range.len().into_usize();
     273       321206 :                 let end = start
     274       321206 :                     .checked_add(maybe_flushed_range.len().into_usize())
     275       321206 :                     .unwrap();
     276       321206 :                 start..end
     277       321206 :             });
     278       321206 :             view.as_mut_rust_slice_full_zeroed()
     279       321206 :                 .copy_from_slice(to_copy);
     280       321206 :             Slice::from_buf_bounds(Slice::into_inner(view), bounds)
     281              :         } else {
     282       676631 :             dst
     283              :         };
     284              : 
     285       997837 :         let dst = if mutable_range.len() > 0 {
     286       659394 :             let offset_in_buffer = mutable_range
     287       659394 :                 .0
     288       659394 :                 .checked_sub(submitted_offset)
     289       659394 :                 .unwrap()
     290       659394 :                 .into_usize();
     291       659394 :             let to_copy =
     292       659394 :                 &mutable[offset_in_buffer..(offset_in_buffer + mutable_range.len().into_usize())];
     293       659394 :             let bounds = dst.bounds();
     294       659394 :             let mut view = dst.slice({
     295       659394 :                 let start =
     296       659394 :                     written_range.len().into_usize() + maybe_flushed_range.len().into_usize();
     297       659394 :                 let end = start.checked_add(mutable_range.len().into_usize()).unwrap();
     298       659394 :                 start..end
     299       659394 :             });
     300       659394 :             view.as_mut_rust_slice_full_zeroed()
     301       659394 :                 .copy_from_slice(to_copy);
     302       659394 :             Slice::from_buf_bounds(Slice::into_inner(view), bounds)
     303              :         } else {
     304       338443 :             dst
     305              :         };
     306              : 
     307              :         // TODO: in debug mode, randomize the remaining bytes in `dst` to catch bugs
     308              : 
     309       997837 :         Ok((dst, (end - start).into_usize()))
     310       997837 :     }
     311              : }
     312              : 
     313              : /// Does the given filename look like an ephemeral file?
     314            0 : pub fn is_ephemeral_file(filename: &str) -> bool {
     315            0 :     if let Some(rest) = filename.strip_prefix("ephemeral-") {
     316            0 :         rest.parse::<u32>().is_ok()
     317              :     } else {
     318            0 :         false
     319              :     }
     320            0 : }
     321              : 
     322              : #[cfg(test)]
     323              : mod tests {
     324              :     use std::fs;
     325              :     use std::str::FromStr;
     326              : 
     327              :     use rand::Rng;
     328              : 
     329              :     use super::*;
     330              :     use crate::context::DownloadBehavior;
     331              :     use crate::task_mgr::TaskKind;
     332              : 
     333           16 :     fn harness(
     334           16 :         test_name: &str,
     335           16 :     ) -> Result<
     336           16 :         (
     337           16 :             &'static PageServerConf,
     338           16 :             TenantShardId,
     339           16 :             TimelineId,
     340           16 :             RequestContext,
     341           16 :         ),
     342           16 :         io::Error,
     343           16 :     > {
     344           16 :         let repo_dir = PageServerConf::test_repo_dir(test_name);
     345           16 :         let _ = fs::remove_dir_all(&repo_dir);
     346           16 :         let conf = PageServerConf::dummy_conf(repo_dir);
     347           16 :         // Make a static copy of the config. This can never be free'd, but that's
     348           16 :         // OK in a test.
     349           16 :         let conf: &'static PageServerConf = Box::leak(Box::new(conf));
     350           16 : 
     351           16 :         let tenant_shard_id = TenantShardId::from_str("11000000000000000000000000000000").unwrap();
     352           16 :         let timeline_id = TimelineId::from_str("22000000000000000000000000000000").unwrap();
     353           16 :         fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?;
     354              : 
     355           16 :         let ctx =
     356           16 :             RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test();
     357           16 : 
     358           16 :         Ok((conf, tenant_shard_id, timeline_id, ctx))
     359           16 :     }
     360              : 
     361              :     #[tokio::test]
     362            4 :     async fn ephemeral_file_holds_gate_open() {
     363            4 :         const FOREVER: std::time::Duration = std::time::Duration::from_secs(5);
     364            4 : 
     365            4 :         let (conf, tenant_id, timeline_id, ctx) =
     366            4 :             harness("ephemeral_file_holds_gate_open").unwrap();
     367            4 : 
     368            4 :         let gate = utils::sync::gate::Gate::default();
     369            4 : 
     370            4 :         let file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
     371            4 :             .await
     372            4 :             .unwrap();
     373            4 : 
     374            4 :         let mut closing = tokio::task::spawn(async move {
     375            4 :             gate.close().await;
     376            4 :         });
     377            4 : 
     378            4 :         // gate is entered until the ephemeral file is dropped
     379            4 :         // do not start paused tokio-epoll-uring has a sleep loop
     380            4 :         tokio::time::pause();
     381            4 :         tokio::time::timeout(FOREVER, &mut closing)
     382            4 :             .await
     383            4 :             .expect_err("closing cannot complete before dropping");
     384            4 : 
     385            4 :         // this is a requirement of the reset_tenant functionality: we have to be able to restart a
     386            4 :         // tenant fast, and for that, we need all tenant_dir operations be guarded by entering a gate
     387            4 :         drop(file);
     388            4 : 
     389            4 :         tokio::time::timeout(FOREVER, &mut closing)
     390            4 :             .await
     391            4 :             .expect("closing completes right away")
     392            4 :             .expect("closing does not panic");
     393            4 :     }
     394              : 
     395              :     #[tokio::test]
     396            4 :     async fn test_ephemeral_file_basics() {
     397            4 :         let (conf, tenant_id, timeline_id, ctx) = harness("test_ephemeral_file_basics").unwrap();
     398            4 : 
     399            4 :         let gate = utils::sync::gate::Gate::default();
     400            4 : 
     401            4 :         let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
     402            4 :             .await
     403            4 :             .unwrap();
     404            4 : 
     405            4 :         let mutable = file.buffered_writer.inspect_mutable();
     406            4 :         let cap = mutable.capacity();
     407            4 :         let align = mutable.align();
     408            4 : 
     409            4 :         let write_nbytes = cap * 2 + cap / 2;
     410            4 : 
     411            4 :         let content: Vec<u8> = rand::thread_rng()
     412            4 :             .sample_iter(rand::distributions::Standard)
     413            4 :             .take(write_nbytes)
     414            4 :             .collect();
     415            4 : 
     416            4 :         let mut value_offsets = Vec::new();
     417         1280 :         for range in (0..write_nbytes)
     418            4 :             .step_by(align)
     419         1280 :             .map(|start| start..(start + align).min(write_nbytes))
     420            4 :         {
     421         1280 :             let off = file.write_raw(&content[range], &ctx).await.unwrap();
     422         1280 :             value_offsets.push(off);
     423            4 :         }
     424            4 : 
     425            4 :         assert_eq!(file.len() as usize, write_nbytes);
     426         1280 :         for (i, range) in (0..write_nbytes)
     427            4 :             .step_by(align)
     428         1280 :             .map(|start| start..(start + align).min(write_nbytes))
     429            4 :             .enumerate()
     430            4 :         {
     431         1280 :             assert_eq!(value_offsets[i], range.start.into_u64());
     432         1280 :             let buf = IoBufferMut::with_capacity(range.len());
     433         1280 :             let (buf_slice, nread) = file
     434         1280 :                 .read_exact_at_eof_ok(range.start.into_u64(), buf.slice_full(), &ctx)
     435         1280 :                 .await
     436         1280 :                 .unwrap();
     437         1280 :             let buf = buf_slice.into_inner();
     438         1280 :             assert_eq!(nread, range.len());
     439         1280 :             assert_eq!(&buf, &content[range]);
     440            4 :         }
     441            4 : 
     442            4 :         let file_contents = std::fs::read(file.buffered_writer.as_inner().path()).unwrap();
     443            4 :         assert!(file_contents == content[0..cap * 2]);
     444            4 : 
     445            4 :         let maybe_flushed_buffer_contents = file.buffered_writer.inspect_maybe_flushed().unwrap();
     446            4 :         assert_eq!(&maybe_flushed_buffer_contents[..], &content[cap..cap * 2]);
     447            4 : 
     448            4 :         let mutable_buffer_contents = file.buffered_writer.inspect_mutable();
     449            4 :         assert_eq!(mutable_buffer_contents, &content[cap * 2..write_nbytes]);
     450            4 :     }
     451              : 
     452              :     #[tokio::test]
     453            4 :     async fn test_flushes_do_happen() {
     454            4 :         let (conf, tenant_id, timeline_id, ctx) = harness("test_flushes_do_happen").unwrap();
     455            4 : 
     456            4 :         let gate = utils::sync::gate::Gate::default();
     457            4 : 
     458            4 :         let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
     459            4 :             .await
     460            4 :             .unwrap();
     461            4 : 
     462            4 :         // mutable buffer and maybe_flushed buffer each has `cap` bytes.
     463            4 :         let cap = file.buffered_writer.inspect_mutable().capacity();
     464            4 : 
     465            4 :         let content: Vec<u8> = rand::thread_rng()
     466            4 :             .sample_iter(rand::distributions::Standard)
     467            4 :             .take(cap * 2 + cap / 2)
     468            4 :             .collect();
     469            4 : 
     470            4 :         file.write_raw(&content, &ctx).await.unwrap();
     471            4 : 
     472            4 :         // assert the state is as this test expects it to be
     473            4 :         assert_eq!(
     474            4 :             &file.load_to_io_buf(&ctx).await.unwrap(),
     475            4 :             &content[0..cap * 2 + cap / 2]
     476            4 :         );
     477            4 :         let md = file.buffered_writer.as_inner().path().metadata().unwrap();
     478            4 :         assert_eq!(
     479            4 :             md.len(),
     480            4 :             2 * cap.into_u64(),
     481            4 :             "buffered writer requires one write to be flushed if we write 2.5x buffer capacity"
     482            4 :         );
     483            4 :         assert_eq!(
     484            4 :             &file.buffered_writer.inspect_maybe_flushed().unwrap()[0..cap],
     485            4 :             &content[cap..cap * 2]
     486            4 :         );
     487            4 :         assert_eq!(
     488            4 :             &file.buffered_writer.inspect_mutable()[0..cap / 2],
     489            4 :             &content[cap * 2..cap * 2 + cap / 2]
     490            4 :         );
     491            4 :     }
     492              : 
     493              :     #[tokio::test]
     494            4 :     async fn test_read_split_across_file_and_buffer() {
     495            4 :         // This test exercises the logic on the read path that splits the logical read
     496            4 :         // into a read from the flushed part (= the file) and a copy from the buffered writer's buffer.
     497            4 :         //
     498            4 :         // This test build on the assertions in test_flushes_do_happen
     499            4 : 
     500            4 :         let (conf, tenant_id, timeline_id, ctx) =
     501            4 :             harness("test_read_split_across_file_and_buffer").unwrap();
     502            4 : 
     503            4 :         let gate = utils::sync::gate::Gate::default();
     504            4 : 
     505            4 :         let mut file = EphemeralFile::create(conf, tenant_id, timeline_id, &gate, &ctx)
     506            4 :             .await
     507            4 :             .unwrap();
     508            4 : 
     509            4 :         let mutable = file.buffered_writer.inspect_mutable();
     510            4 :         let cap = mutable.capacity();
     511            4 :         let align = mutable.align();
     512            4 :         let content: Vec<u8> = rand::thread_rng()
     513            4 :             .sample_iter(rand::distributions::Standard)
     514            4 :             .take(cap * 2 + cap / 2)
     515            4 :             .collect();
     516            4 : 
     517            4 :         let (_, control) = file.write_raw_controlled(&content, &ctx).await.unwrap();
     518            4 : 
     519          108 :         let test_read = |start: usize, len: usize| {
     520          108 :             let file = &file;
     521          108 :             let ctx = &ctx;
     522          108 :             let content = &content;
     523          108 :             async move {
     524          108 :                 let (buf, nread) = file
     525          108 :                     .read_exact_at_eof_ok(
     526          108 :                         start.into_u64(),
     527          108 :                         IoBufferMut::with_capacity(len).slice_full(),
     528          108 :                         ctx,
     529          108 :                     )
     530          108 :                     .await
     531          108 :                     .unwrap();
     532          108 :                 assert_eq!(nread, len);
     533          108 :                 assert_eq!(&buf.into_inner(), &content[start..(start + len)]);
     534          108 :             }
     535          108 :         };
     536            4 : 
     537           12 :         let test_read_all_offset_combinations = || {
     538           12 :             async move {
     539           12 :                 test_read(align, align).await;
     540            4 :                 // border onto edge of file
     541           12 :                 test_read(cap - align, align).await;
     542            4 :                 // read across file and buffer
     543           12 :                 test_read(cap - align, 2 * align).await;
     544            4 :                 // stay from start of maybe flushed buffer
     545           12 :                 test_read(cap, align).await;
     546            4 :                 // completely within maybe flushed buffer
     547           12 :                 test_read(cap + align, align).await;
     548            4 :                 // border onto edge of maybe flushed buffer.
     549           12 :                 test_read(cap * 2 - align, align).await;
     550            4 :                 // read across maybe flushed and mutable buffer
     551           12 :                 test_read(cap * 2 - align, 2 * align).await;
     552            4 :                 // read across three segments
     553           12 :                 test_read(cap - align, cap + 2 * align).await;
     554            4 :                 // completely within mutable buffer
     555           12 :                 test_read(cap * 2 + align, align).await;
     556           12 :             }
     557           12 :         };
     558            4 : 
     559            4 :         // completely within the file range
     560            4 :         assert!(align < cap, "test assumption");
     561            4 :         assert!(cap % align == 0);
     562            4 : 
     563            4 :         // test reads at different flush stages.
     564            4 :         let not_started = control.unwrap().into_not_started();
     565            4 :         test_read_all_offset_combinations().await;
     566            4 :         let in_progress = not_started.ready_to_flush();
     567            4 :         test_read_all_offset_combinations().await;
     568            4 :         in_progress.wait_until_flush_is_done().await;
     569            4 :         test_read_all_offset_combinations().await;
     570            4 :     }
     571              : }

