Skip to content

Commit

Permalink
Rollup merge of #97015 - nrc:read-buf-cursor, r=Mark-Simulacrum
Browse files Browse the repository at this point in the history
std::io: migrate ReadBuf to BorrowBuf/BorrowCursor

This PR replaces `ReadBuf` (used by the `Read::read_buf` family of methods) with `BorrowBuf` and `BorrowCursor`.

The general idea is to split `ReadBuf` because its API is large and confusing. `BorrowBuf` represents a borrowed buffer which is mostly read-only and (other than for construction) deals only with filled vs unfilled segments. a `BorrowCursor` is a mostly write-only view of the unfilled part of a `BorrowBuf` which distinguishes between initialized and uninitialized segments. For `Read::read_buf`, the caller would create a `BorrowBuf`, then pass a `BorrowCursor` to `read_buf`.

In addition to the major API split, I've made the following smaller changes:

* Removed some methods entirely from the API (mostly the functionality can be replicated with two calls rather than a single one)
* Unified naming, e.g., by replacing initialized with init and assume_init with set_init
* Added an easy way to get the number of bytes written to a cursor (`written` method)

As well as simplifying the API (IMO), this approach has the following advantages:

* Since we pass the cursor by value, we remove the 'unsoundness footgun' where a malicious `read_buf` could swap out the `ReadBuf`.
* Since `read_buf` cannot write into the filled part of the buffer, we prevent the filled part shrinking or changing which could cause underflow for the caller or unexpected behaviour.

## Outline

```rust
pub struct BorrowBuf<'a>

impl Debug for BorrowBuf<'_>

impl<'a> From<&'a mut [u8]> for BorrowBuf<'a>
impl<'a> From<&'a mut [MaybeUninit<u8>]> for BorrowBuf<'a>

impl<'a> BorrowBuf<'a> {
    pub fn capacity(&self) -> usize
    pub fn len(&self) -> usize
    pub fn init_len(&self) -> usize
    pub fn filled(&self) -> &[u8]
    pub fn unfilled<'this>(&'this mut self) -> BorrowCursor<'this, 'a>
    pub fn clear(&mut self) -> &mut Self
    pub unsafe fn set_init(&mut self, n: usize) -> &mut Self
}

pub struct BorrowCursor<'buf, 'data>

impl<'buf, 'data> BorrowCursor<'buf, 'data> {
    pub fn clone<'this>(&'this mut self) -> BorrowCursor<'this, 'data>
    pub fn capacity(&self) -> usize
    pub fn written(&self) -> usize
    pub fn init_ref(&self) -> &[u8]
    pub fn init_mut(&mut self) -> &mut [u8]
    pub fn uninit_mut(&mut self) -> &mut [MaybeUninit<u8>]
    pub unsafe fn as_mut(&mut self) -> &mut [MaybeUninit<u8>]
    pub unsafe fn advance(&mut self, n: usize) -> &mut Self
    pub fn ensure_init(&mut self) -> &mut Self
    pub unsafe fn set_init(&mut self, n: usize) -> &mut Self
    pub fn append(&mut self, buf: &[u8])
}
```

## TODO

* ~~Migrate non-unix libs and tests~~
* ~~Naming~~
  * ~~`BorrowBuf` or `BorrowedBuf` or `SliceBuf`? (We might want an owned equivalent for the async IO traits)~~
  * ~~Should we rename the `readbuf` module? We might keep the name indicate it includes both the buf and cursor variations and someday the owned version too. Or we could change it. It is not publicly exposed, so it is not that important~~.
  * ~~`read_buf` method: we read into the cursor now, so the `_buf` suffix is a bit weird.~~
* ~~Documentation~~
* Tests are incomplete (I adjusted existing tests, but did not add new ones).

cc #78485, #94741
supersedes: #95770, #93359
fixes #93305
  • Loading branch information
matthiaskrgr authored Aug 28, 2022
2 parents 91f128b + ac70aea commit b9306c2
Show file tree
Hide file tree
Showing 21 changed files with 472 additions and 418 deletions.
10 changes: 5 additions & 5 deletions library/std/src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ mod tests;

use crate::ffi::OsString;
use crate::fmt;
use crate::io::{self, IoSlice, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, Write};
use crate::io::{self, BorrowedCursor, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write};
use crate::path::{Path, PathBuf};
use crate::sys::fs as fs_imp;
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
Expand Down Expand Up @@ -703,8 +703,8 @@ impl Read for File {
self.inner.read_vectored(bufs)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
self.inner.read_buf(cursor)
}

#[inline]
Expand Down Expand Up @@ -755,8 +755,8 @@ impl Read for &File {
self.inner.read(buf)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
self.inner.read_buf(buf)
fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
self.inner.read_buf(cursor)
}

fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result<usize> {
Expand Down
14 changes: 7 additions & 7 deletions library/std/src/io/buffered/bufreader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ mod buffer;

use crate::fmt;
use crate::io::{
self, BufRead, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
self, BorrowedCursor, BufRead, IoSliceMut, Read, Seek, SeekFrom, SizeHint, DEFAULT_BUF_SIZE,
};
use buffer::Buffer;

Expand Down Expand Up @@ -266,21 +266,21 @@ impl<R: Read> Read for BufReader<R> {
Ok(nread)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
fn read_buf(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.buf.pos() == self.buf.filled() && buf.remaining() >= self.capacity() {
if self.buf.pos() == self.buf.filled() && cursor.capacity() >= self.capacity() {
self.discard_buffer();
return self.inner.read_buf(buf);
return self.inner.read_buf(cursor);
}

let prev = buf.filled_len();
let prev = cursor.written();

let mut rem = self.fill_buf()?;
rem.read_buf(buf)?;
rem.read_buf(cursor.reborrow())?;

self.consume(buf.filled_len() - prev); //slice impl of read_buf known to never unfill buf
self.consume(cursor.written() - prev); //slice impl of read_buf known to never unfill buf

Ok(())
}
Expand Down
12 changes: 8 additions & 4 deletions library/std/src/io/buffered/bufreader/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
/// that user code which wants to do reads from a `BufReader` via `buffer` + `consume` can do so
/// without encountering any runtime bounds checks.
use crate::cmp;
use crate::io::{self, Read, ReadBuf};
use crate::io::{self, BorrowedBuf, Read};
use crate::mem::MaybeUninit;

pub struct Buffer {
Expand Down Expand Up @@ -93,11 +93,15 @@ impl Buffer {
if self.pos >= self.filled {
debug_assert!(self.pos == self.filled);

let mut readbuf = ReadBuf::uninit(&mut self.buf);
let mut buf = BorrowedBuf::from(&mut *self.buf);
// SAFETY: `self.filled` bytes will always have been initialized.
unsafe {
buf.set_init(self.filled);
}

reader.read_buf(&mut readbuf)?;
reader.read_buf(buf.unfilled())?;

self.filled = readbuf.filled_len();
self.filled = buf.len();
self.pos = 0;
}
Ok(self.buffer())
Expand Down
34 changes: 18 additions & 16 deletions library/std/src/io/buffered/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::io::prelude::*;
use crate::io::{self, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, ReadBuf, SeekFrom};
use crate::io::{
self, BorrowedBuf, BufReader, BufWriter, ErrorKind, IoSlice, LineWriter, SeekFrom,
};
use crate::mem::MaybeUninit;
use crate::panic;
use crate::sync::atomic::{AtomicUsize, Ordering};
Expand Down Expand Up @@ -61,48 +63,48 @@ fn test_buffered_reader_read_buf() {
let inner: &[u8] = &[5, 6, 7, 0, 1, 2, 3, 4];
let mut reader = BufReader::with_capacity(2, inner);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);
let buf: &mut [_] = &mut [MaybeUninit::uninit(); 3];
let mut buf: BorrowedBuf<'_> = buf.into();

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled(), [5, 6, 7]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 2];
let mut buf = ReadBuf::uninit(&mut buf);
let buf: &mut [_] = &mut [MaybeUninit::uninit(); 2];
let mut buf: BorrowedBuf<'_> = buf.into();

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled(), [0, 1]);
assert_eq!(reader.buffer(), []);

let mut buf = [MaybeUninit::uninit(); 1];
let mut buf = ReadBuf::uninit(&mut buf);
let buf: &mut [_] = &mut [MaybeUninit::uninit(); 1];
let mut buf: BorrowedBuf<'_> = buf.into();

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled(), [2]);
assert_eq!(reader.buffer(), [3]);

let mut buf = [MaybeUninit::uninit(); 3];
let mut buf = ReadBuf::uninit(&mut buf);
let buf: &mut [_] = &mut [MaybeUninit::uninit(); 3];
let mut buf: BorrowedBuf<'_> = buf.into();

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled(), [3]);
assert_eq!(reader.buffer(), []);

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled(), [3, 4]);
assert_eq!(reader.buffer(), []);

buf.clear();

reader.read_buf(&mut buf).unwrap();
reader.read_buf(buf.unfilled()).unwrap();

assert_eq!(buf.filled_len(), 0);
assert!(buf.filled().is_empty());
}

#[test]
Expand Down
34 changes: 18 additions & 16 deletions library/std/src/io/copy.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{BufWriter, ErrorKind, Read, ReadBuf, Result, Write, DEFAULT_BUF_SIZE};
use super::{BorrowedBuf, BufWriter, ErrorKind, Read, Result, Write, DEFAULT_BUF_SIZE};
use crate::mem::MaybeUninit;

/// Copies the entire contents of a reader into a writer.
Expand Down Expand Up @@ -97,37 +97,39 @@ impl<I: Write> BufferedCopySpec for BufWriter<I> {

loop {
let buf = writer.buffer_mut();
let mut read_buf = ReadBuf::uninit(buf.spare_capacity_mut());
let mut read_buf: BorrowedBuf<'_> = buf.spare_capacity_mut().into();

// SAFETY: init is either 0 or the initialized_len of the previous iteration
unsafe {
read_buf.assume_init(init);
// SAFETY: init is either 0 or the init_len from the previous iteration.
read_buf.set_init(init);
}

if read_buf.capacity() >= DEFAULT_BUF_SIZE {
match reader.read_buf(&mut read_buf) {
let mut cursor = read_buf.unfilled();
match reader.read_buf(cursor.reborrow()) {
Ok(()) => {
let bytes_read = read_buf.filled_len();
let bytes_read = cursor.written();

if bytes_read == 0 {
return Ok(len);
}

init = read_buf.initialized_len() - bytes_read;
init = read_buf.init_len() - bytes_read;
len += bytes_read as u64;

// SAFETY: ReadBuf guarantees all of its filled bytes are init
// SAFETY: BorrowedBuf guarantees all of its filled bytes are init
unsafe { buf.set_len(buf.len() + bytes_read) };
len += bytes_read as u64;

// Read again if the buffer still has enough capacity, as BufWriter itself would do
// This will occur if the reader returns short reads
continue;
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
} else {
writer.flush_buf()?;
init = 0;
}

writer.flush_buf()?;
}
}
}
Expand All @@ -136,13 +138,13 @@ fn stack_buffer_copy<R: Read + ?Sized, W: Write + ?Sized>(
reader: &mut R,
writer: &mut W,
) -> Result<u64> {
let mut buf = [MaybeUninit::uninit(); DEFAULT_BUF_SIZE];
let mut buf = ReadBuf::uninit(&mut buf);
let buf: &mut [_] = &mut [MaybeUninit::uninit(); DEFAULT_BUF_SIZE];
let mut buf: BorrowedBuf<'_> = buf.into();

let mut len = 0;

loop {
match reader.read_buf(&mut buf) {
match reader.read_buf(buf.unfilled()) {
Ok(()) => {}
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
Expand Down
10 changes: 5 additions & 5 deletions library/std/src/io/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::io::prelude::*;

use crate::alloc::Allocator;
use crate::cmp;
use crate::io::{self, ErrorKind, IoSlice, IoSliceMut, ReadBuf, SeekFrom};
use crate::io::{self, BorrowedCursor, ErrorKind, IoSlice, IoSliceMut, SeekFrom};

/// A `Cursor` wraps an in-memory buffer and provides it with a
/// [`Seek`] implementation.
Expand Down Expand Up @@ -323,12 +323,12 @@ where
Ok(n)
}

fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
let prev_filled = buf.filled_len();
fn read_buf(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> {
let prev_written = cursor.written();

Read::read_buf(&mut self.fill_buf()?, buf)?;
Read::read_buf(&mut self.fill_buf()?, cursor.reborrow())?;

self.pos += (buf.filled_len() - prev_filled) as u64;
self.pos += (cursor.written() - prev_written) as u64;

Ok(())
}
Expand Down
22 changes: 11 additions & 11 deletions library/std/src/io/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::cmp;
use crate::collections::VecDeque;
use crate::fmt;
use crate::io::{
self, BufRead, ErrorKind, IoSlice, IoSliceMut, Read, ReadBuf, Seek, SeekFrom, Write,
self, BorrowedCursor, BufRead, ErrorKind, IoSlice, IoSliceMut, Read, Seek, SeekFrom, Write,
};
use crate::mem;

Expand All @@ -21,8 +21,8 @@ impl<R: Read + ?Sized> Read for &mut R {
}

#[inline]
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
(**self).read_buf(buf)
fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
(**self).read_buf(cursor)
}

#[inline]
Expand Down Expand Up @@ -125,8 +125,8 @@ impl<R: Read + ?Sized> Read for Box<R> {
}

#[inline]
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
(**self).read_buf(buf)
fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
(**self).read_buf(cursor)
}

#[inline]
Expand Down Expand Up @@ -249,11 +249,11 @@ impl Read for &[u8] {
}

#[inline]
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
let amt = cmp::min(buf.remaining(), self.len());
fn read_buf(&mut self, mut cursor: BorrowedCursor<'_>) -> io::Result<()> {
let amt = cmp::min(cursor.capacity(), self.len());
let (a, b) = self.split_at(amt);

buf.append(a);
cursor.append(a);

*self = b;
Ok(())
Expand Down Expand Up @@ -427,10 +427,10 @@ impl<A: Allocator> Read for VecDeque<u8, A> {
}

#[inline]
fn read_buf(&mut self, buf: &mut ReadBuf<'_>) -> io::Result<()> {
fn read_buf(&mut self, cursor: BorrowedCursor<'_>) -> io::Result<()> {
let (ref mut front, _) = self.as_slices();
let n = cmp::min(buf.remaining(), front.len());
Read::read_buf(front, buf)?;
let n = cmp::min(cursor.capacity(), front.len());
Read::read_buf(front, cursor)?;
self.drain(..n);
Ok(())
}
Expand Down
Loading

0 comments on commit b9306c2

Please sign in to comment.