Skip to content

Commit

Permalink
Provide abstractions for properly aligning abomonated bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
HadrienG2 committed Nov 11, 2019
1 parent 952a208 commit a3eee05
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 4 deletions.
259 changes: 259 additions & 0 deletions src/align/alloc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/// Tools for storing abomonated objects with correct alignment
///
/// Use of `decode::<T>()` requires that the input bytes are aligned on a
/// `T::alignment()` boundary, or else undefined behavior will ensue.
///
/// This module provides tools for ensuring this alignment constraint on input
/// bytes of unknown or known-incorrect alignment before calling `decode()`.
use crate::{
Entomb,
Exhume,
};

use std::{
alloc::{self, Layout},
marker::PhantomData,
ops::{Deref, DerefMut},
ptr::NonNull,
};


/// Overaligned `Box<[u8]>` for abomonated objects of type T
///
/// Compared with a regular `Box<[u8]>`, this heap-allocated bag of bytes also
/// ensures that the heap allocation is aligned on `T::alignment()`, and thus
/// suitable for use as input to `decode::<T>()`.
pub struct Coffin<T: Entomb>(NonNull<[u8]>, PhantomData<T>);

impl<T: Entomb> Coffin<T> {
/// Copy abomonated bytes into a suitably aligned heap allocation
///
/// May abort the computation if memory is exhausted or the system allocator
/// is not able to satisfy the size or alignment requirements.
pub fn new(bytes: &[u8]) -> Self {
// Perform the memory allocation using the system allocator. This is
// safe because all safety preconditions are checked by Self::layout().
let size = bytes.len();
let layout = Self::layout(size);
let ptr = unsafe { alloc::alloc(layout) };

// Abort on memory allocation errors the recommended way. Since the
// system allocator may abort, no point in not aborting ourselves...
if ptr.is_null() { alloc::handle_alloc_error(layout); }

// Transfer the input bytes on our new allocation. This is safe as...
// - `bytes.as_ptr()` has to be valid for `size` by slice construction
// - `ptr` is non-null and must point to a memory region of `size` bytes
// - Pointers are always byte-aligned, so alignment is irrelevant.
// - Heap allocations may not overlap with existing objects.
unsafe { ptr.copy_from_nonoverlapping(bytes.as_ptr(), size); }

// Produce the output slice. The transmute is safe as...
// - We don't care about lifetimes as we want a NonNull in the end
// - As discussed above, `ptr` is non-null and well-aligned.
// - The bytes of the slice have been initialized above
Self(unsafe { std::slice::from_raw_parts_mut(ptr, size) }.into(),
PhantomData)
}

/// Compute the proper layout for a coffin allocation, checking the safety
/// preconditions of the system memory allocator along the way.
///
/// We handle errors via panics because they all emerge from edge cases that
/// should only be encountered by users actively trying to break this code.
fn layout(size: usize) -> Layout {
// Basic sanity check for debug builds
debug_assert!(size >= std::mem::size_of::<T>(),
"Requested size is quite obviously not big enough");

// We're going to use the system allocator, so we cannot accept
// zero-sized slices of bytes.
assert!(size > 0, "Allocation size must be positive");

// At this point, the only layout errors that remain are those caused by
// a bad Abomonation::alignment implementation (alignment is zero or not
// a power of 2) or by a huge input size (close to usize::MAX).
Layout::from_size_align(size, T::alignment())
.expect("Bad Abomonation::alignment() impl or excessive size")
}
}

impl<T: Entomb> Deref for Coffin<T> {
type Target = [u8];

fn deref(&self) -> &Self::Target {
// This is safe as...
// - The target allocation is live until the Coffin will be dropped.
// - Normal borrow-checking rules apply and prevent the user from
// aliasing or retaining the output reference in an invalid way.
//
// ...but see the Drop documentation for a possible edge case :(
unsafe { self.0.as_ref() }
}
}

impl<T: Entomb> DerefMut for Coffin<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
// This is safe for the same reason that Deref is.
unsafe { self.0.as_mut() }
}
}

impl<T: Entomb> Drop for Coffin<T> {
fn drop(&mut self) {
// In principle, this should be safe for the same reason that DerefMut
// is, however there is a wrinkle for all of those...
//
// If we want any form of Deref to be safe, the Rust compiler must
// prevent LLVM from inserting memory reads from the slice after
// deallocation, and currently it doesn't.
//
// There is no clear reason why LLVM would do this, though, and `std`
// encounters the same problem everywhere, so we'll take the risk...
//
// FIXME: Once the Rust team has figured out the right way to handle
// this, use it here if it requires manual action.
//
// Here's one ongoing discussion of this topic for reference:
// https://github.com/rust-lang/rust/issues/55005
let slice = unsafe { self.0.as_mut() };

// This is safe because...
// - Every Coffin is always created with its own allocation, only Drop
// can liberate it, and Drop will only be called once.
// - Layout is computed in the same way as in `Coffin::new()`, and the
// size of the target slice is the same as that of new's input bytes.
unsafe { alloc::dealloc(slice.as_mut_ptr(),
Self::layout(slice.len())); }
}
}


/// `Cow`-style abstraction for aligning abomonated bytes before `decode()`
///
/// Often, one needs to decode input bytes which are _probably_ well-aligned,
/// but may not always to be. For example, POSIX memory allocations are aligned
/// on 16-byte boundaries, which is sufficient for most types... as long as
/// multiple abomonated objects are not stored in a sequence without padding
/// bytes in between.
///
/// In those circumstances, pessimistically using `Coffin<T>` all the time
/// would cause unnecessarily intensive use of the system memory allocator.
/// Instead, it is better to check if the input bytes are well-aligned and only
/// reallocate them if necessary, which is what this abstraction does.
pub enum AlignedBytes<'bytes, T: Exhume<'bytes>> {
/// The orignal bytes were sufficiently well-aligned
Borrowed(&'bytes mut [u8]),

/// The abomonated bytes were relocated into a well-aligned heap location
Owned(Coffin<T>),
}

impl<'bytes, T: Exhume<'bytes>> AlignedBytes<'bytes, T> {
/// Prepare possibly misaligned bytes for decoding
pub fn new(bytes: &'bytes mut [u8]) -> Self {
let misalignment = (bytes.as_ptr() as usize) % T::alignment();
if misalignment == 0 {
Self::Borrowed(bytes)
} else {
Self::Owned(Coffin::new(bytes))
}
}
}

impl<'bytes, T: Exhume<'bytes>> From<&'bytes mut [u8]> for AlignedBytes<'bytes, T> {
fn from(bytes: &'bytes mut [u8]) -> Self {
Self::new(bytes)
}
}

impl<'bytes, T: Exhume<'bytes>> From<Coffin<T>> for AlignedBytes<'bytes, T> {
fn from(coffin: Coffin<T>) -> Self {
Self::Owned(coffin)
}
}

impl<'bytes, T: Exhume<'bytes>> Deref for AlignedBytes<'bytes, T> {
type Target = [u8];

fn deref(&self) -> &[u8] {
match self {
Self::Borrowed(b) => b,
Self::Owned(o) => o,
}
}
}

impl<'bytes, T: Exhume<'bytes>> DerefMut for AlignedBytes<'bytes, T> {
fn deref_mut(&mut self) -> &mut [u8] {
match self {
Self::Borrowed(b) => b,
Self::Owned(o) => o,
}
}
}


#[cfg(test)]
mod tests {
use super::{AlignedBytes, Coffin, Entomb, Exhume};

#[test]
fn coffin() {
check_coffin::<u8>();
check_coffin::<u16>();
check_coffin::<u32>();
check_coffin::<u64>();
check_coffin::<u128>();
}

fn check_coffin<T: Entomb>() {
let bytes = make_test_bytes_for::<T>();
let coffin = Coffin::<T>::new(&bytes[..]);
assert_eq!(&coffin[..], &bytes[..],
"Coffin data is incorrect");
assert_eq!(coffin.as_ptr() as usize % T::alignment(), 0,
"Coffin alignment is not strong enough");
}

#[test]
fn aligned_bytes() {
check_aligned_bytes::<u16>();
check_aligned_bytes::<u32>();
check_aligned_bytes::<u64>();
check_aligned_bytes::<u128>();
}

fn check_aligned_bytes<T>()
where for<'a> T: Exhume<'a>
{
assert!(std::mem::align_of::<T>() > 1,
"This test requires generating misaligned data");

let mut bytes = make_test_bytes_for::<T>();
let mut coffin = Coffin::<T>::new(&bytes[..]);
let aligned_bytes = AlignedBytes::<T>::new(&mut coffin[..]);
match aligned_bytes {
AlignedBytes::Borrowed(_) => {}
AlignedBytes::Owned(_) => panic!("Should not allocate here"),
}
assert_eq!(&aligned_bytes[..], &bytes[..]);

bytes.push(42);
let mut coffin = Coffin::<T>::new(&bytes[..]);
let aligned_bytes = AlignedBytes::<T>::new(&mut coffin[1..]);
match aligned_bytes {
AlignedBytes::Borrowed(_) => panic!("Should allocate here"),
AlignedBytes::Owned(_) => {},
}
assert_eq!(&aligned_bytes[..], &bytes[1..]);
}

fn make_test_bytes_for<T>() -> Vec<u8> {
let mut i = 0;
std::iter::repeat_with(|| { i += 1; i })
.take(std::mem::size_of::<T>())
.collect::<Vec<_>>()
}
}
3 changes: 3 additions & 0 deletions src/align/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/// Utilities for handling alignment in abomonated data
mod io;
mod alloc;

#[deprecated(note = "Made pub for internal unsafe_abomonate use only")]
pub use self::io::{AlignedReader, AlignedWriter};

pub use self::alloc::{AlignedBytes, Coffin};
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ pub unsafe fn encode<T: Entomb, W: Write>(typed: &T, write: W) -> IOResult<()> {
/// abomonated data of type T, which you can check with `T::alignment()`.
/// Failure to meet this requirement will result in undefined behavior.
///
/// If you are not able to guarantee sufficient alignment from your data source, you may find the
/// `align::AlignedBytes<T>` utility useful. It checks if your data is well-aligned, and moves it
/// into a well-aligned heap allocation otherwise.
///
/// # Examples
/// ```
/// use abomonation::{encode, decode};
Expand Down
19 changes: 15 additions & 4 deletions tests/tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
extern crate abomonation;

use abomonation::*;
use abomonation::align::AlignedBytes;
use std::fmt::Debug;

// Test struct for the unsafe_abomonate macro
Expand Down Expand Up @@ -135,10 +136,20 @@ fn test_multiple_encode_decode() {
unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); }
unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); }

let (t, r) = unsafe { decode::<u32>(&mut bytes) }.unwrap(); assert_eq!(*t, 0);
let (t, r) = unsafe { decode::<u64>(r) }.unwrap(); assert_eq!(*t, 7);
let (t, r) = unsafe { decode::<Vec<i32>>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]);
let (t, _r) = unsafe { decode::<String>(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned());
let (t, r) = unsafe { decode::<u32>(&mut bytes) }.unwrap();
assert_eq!(*t, 0);

let mut r = AlignedBytes::<u64>::new(r);
let (t, r) = unsafe { decode::<u64>(&mut r) }.unwrap();
assert_eq!(*t, 7);

let mut r = AlignedBytes::<Vec<i32>>::new(r);
let (t, r) = unsafe { decode::<Vec<i32>>(&mut r) }.unwrap();
assert_eq!(*t, vec![1,2,3]);

let mut r = AlignedBytes::<String>::new(r);
let (t, _r) = unsafe { decode::<String>(&mut r) }.unwrap();
assert_eq!(*t, "grawwwwrr".to_owned());
}

#[test]
Expand Down

0 comments on commit a3eee05

Please sign in to comment.