Line data Source code
1 : //! The heart of how [`super::EphemeralFile`] does its reads and writes.
2 : //!
3 : //! # Writes
4 : //!
5 : //! [`super::EphemeralFile`] writes small, borrowed buffers using [`RW::write_all_borrowed`].
6 : //! The [`RW`] batches these into [`TAIL_SZ`] bigger writes, using [`owned_buffers_io::write::BufferedWriter`].
7 : //!
8 : //! # Reads
9 : //!
10 : //! [`super::EphemeralFile`] always reads full [`PAGE_SZ`]ed blocks using [`RW::read_blk`].
11 : //!
12 : //! The [`RW`] serves these reads either from the buffered writer's in-memory buffer
13 : //! or redirects the caller to read from the underlying [`OwnedAsyncWriter`]
14 : //! if the read is for the prefix that has already been flushed.
15 : //!
16 : //! # Current Usage
17 : //!
18 : //! The current user of this module is [`super::page_caching::RW`].
19 :
20 : mod zero_padded;
21 :
22 : use crate::{
23 : context::RequestContext,
24 : page_cache::PAGE_SZ,
25 : virtual_file::owned_buffers_io::{
26 : self,
27 : write::{Buffer, OwnedAsyncWriter},
28 : },
29 : };
30 :
31 : const TAIL_SZ: usize = 64 * 1024;
32 :
33 : /// See module-level comment.
34 : pub struct RW<W: OwnedAsyncWriter> {
35 : buffered_writer: owned_buffers_io::write::BufferedWriter<
36 : zero_padded::Buffer<TAIL_SZ>,
37 : owned_buffers_io::util::size_tracking_writer::Writer<W>,
38 : >,
39 : }
40 :
41 : pub enum ReadResult<'a, W> {
42 : NeedsReadFromWriter { writer: &'a W },
43 : ServedFromZeroPaddedMutableTail { buffer: &'a [u8; PAGE_SZ] },
44 : }
45 :
46 : impl<W> RW<W>
47 : where
48 : W: OwnedAsyncWriter,
49 : {
50 1258 : pub fn new(writer: W) -> Self {
51 1258 : let bytes_flushed_tracker =
52 1258 : owned_buffers_io::util::size_tracking_writer::Writer::new(writer);
53 1258 : let buffered_writer = owned_buffers_io::write::BufferedWriter::new(
54 1258 : bytes_flushed_tracker,
55 1258 : zero_padded::Buffer::default(),
56 1258 : );
57 1258 : Self { buffered_writer }
58 1258 : }
59 :
60 201816 : pub(crate) fn as_writer(&self) -> &W {
61 201816 : self.buffered_writer.as_inner().as_inner()
62 201816 : }
63 :
64 10221504 : pub async fn write_all_borrowed(
65 10221504 : &mut self,
66 10221504 : buf: &[u8],
67 10221504 : ctx: &RequestContext,
68 10221504 : ) -> std::io::Result<usize> {
69 10221504 : self.buffered_writer.write_buffered_borrowed(buf, ctx).await
70 10221504 : }
71 :
72 10204490 : pub fn bytes_written(&self) -> u64 {
73 10204490 : let flushed_offset = self.buffered_writer.as_inner().bytes_written();
74 10204490 : let buffer: &zero_padded::Buffer<TAIL_SZ> = self.buffered_writer.inspect_buffer();
75 10204490 : flushed_offset + u64::try_from(buffer.pending()).unwrap()
76 10204490 : }
77 :
78 : /// Get a slice of all blocks that [`Self::read_blk`] would return as [`ReadResult::ServedFromZeroPaddedMutableTail`].
79 968 : pub fn get_tail_zero_padded(&self) -> &[u8] {
80 968 : let buffer: &zero_padded::Buffer<TAIL_SZ> = self.buffered_writer.inspect_buffer();
81 968 : let buffer_written_up_to = buffer.pending();
82 : // pad to next page boundary
83 968 : let read_up_to = if buffer_written_up_to % PAGE_SZ == 0 {
84 0 : buffer_written_up_to
85 : } else {
86 968 : buffer_written_up_to
87 968 : .checked_add(PAGE_SZ - (buffer_written_up_to % PAGE_SZ))
88 968 : .unwrap()
89 : };
90 968 : &buffer.as_zero_padded_slice()[0..read_up_to]
91 968 : }
92 :
93 524822 : pub(crate) async fn read_blk(&self, blknum: u32) -> Result<ReadResult<'_, W>, std::io::Error> {
94 524822 : let flushed_offset = self.buffered_writer.as_inner().bytes_written();
95 524822 : let buffer: &zero_padded::Buffer<TAIL_SZ> = self.buffered_writer.inspect_buffer();
96 524822 : let buffered_offset = flushed_offset + u64::try_from(buffer.pending()).unwrap();
97 524822 : let read_offset = (blknum as u64) * (PAGE_SZ as u64);
98 :
99 : // The trailing page ("block") might only be partially filled,
100 : // yet the blob_io code relies on us to return a full PAGE_SZed slice anyway.
101 : // Moreover, it has to be zero-padded, because when we still had
102 : // a write-back page cache, it provided pre-zeroed pages, and blob_io came to rely on it.
103 : // DeltaLayer probably has the same issue, not sure why it needs no special treatment.
104 : // => check here that the read doesn't go beyond this potentially trailing
105 : // => the zero-padding is done in the `else` branch below
106 524822 : let blocks_written = if buffered_offset % (PAGE_SZ as u64) == 0 {
107 64 : buffered_offset / (PAGE_SZ as u64)
108 : } else {
109 524758 : (buffered_offset / (PAGE_SZ as u64)) + 1
110 : };
111 524822 : if (blknum as u64) >= blocks_written {
112 0 : return Err(std::io::Error::new(std::io::ErrorKind::Other, anyhow::anyhow!("read past end of ephemeral_file: read=0x{read_offset:x} buffered=0x{buffered_offset:x} flushed=0x{flushed_offset}")));
113 524822 : }
114 524822 :
115 524822 : // assertions for the `if-else` below
116 524822 : assert_eq!(
117 524822 : flushed_offset % (TAIL_SZ as u64), 0,
118 0 : "we only use write_buffered_borrowed to write to the buffered writer, so it's guaranteed that flushes happen buffer.cap()-sized chunks"
119 : );
120 524822 : assert_eq!(
121 524822 : flushed_offset % (PAGE_SZ as u64),
122 : 0,
123 0 : "the logic below can't handle if the page is spread across the flushed part and the buffer"
124 : );
125 :
126 524822 : if read_offset < flushed_offset {
127 199718 : assert!(read_offset + (PAGE_SZ as u64) <= flushed_offset);
128 199718 : Ok(ReadResult::NeedsReadFromWriter {
129 199718 : writer: self.as_writer(),
130 199718 : })
131 : } else {
132 325104 : let read_offset_in_buffer = read_offset
133 325104 : .checked_sub(flushed_offset)
134 325104 : .expect("would have taken `if` branch instead of this one");
135 325104 : let read_offset_in_buffer = usize::try_from(read_offset_in_buffer).unwrap();
136 325104 : let zero_padded_slice = buffer.as_zero_padded_slice();
137 325104 : let page = &zero_padded_slice[read_offset_in_buffer..(read_offset_in_buffer + PAGE_SZ)];
138 325104 : Ok(ReadResult::ServedFromZeroPaddedMutableTail {
139 325104 : buffer: page
140 325104 : .try_into()
141 325104 : .expect("the slice above got it as page-size slice"),
142 325104 : })
143 : }
144 524822 : }
145 : }
|