From 984b0b05cf1e508659e3fc7e4f88fc58124da985 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 12:17:37 +0200 Subject: [PATCH 01/34] Move to a NonNull-based exhume() interface --- src/lib.rs | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2e617dc..f857aab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ use std::io::Write; // for bytes.write_all; push_all is unstable and extend is s use std::io::Result as IOResult; use std::marker::PhantomData; use std::num::*; +use std::ptr::NonNull; pub mod abomonated; @@ -124,9 +125,9 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) if bytes.len() < mem::size_of::() { None } else { let (split1, split2) = bytes.split_at_mut(mem::size_of::()); - let result: &mut T = mem::transmute(split1.get_unchecked_mut(0)); - if let Some(remaining) = result.exhume(split2) { - Some((result, remaining)) + let result: NonNull = mem::transmute(split1.get_unchecked_mut(0)); + if let Some(remaining) = T::exhume(result, split2) { + Some((&*result.as_ptr(), remaining)) } else { None @@ -165,10 +166,17 @@ pub trait Abomonation { /// reports any failures in writing to `write`. #[inline(always)] unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } - /// Recover any information for `&mut self` not evident from its binary representation. + /// Recover any information for `self_` not evident from its binary representation. /// /// Most commonly this populates pointers with valid references into `bytes`. - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { Some(bytes) } + /// + /// Implementors should take note that `self_` is initially in an invalid state, as its inner + /// pointers may be dangling. As Rust references come with a data validity invariant, building + /// references to invalid state is undefined behavior, so one should strive to implement + /// `exhume` using raw pointer operations as much as feasible. + // + // FIXME: Replace self_ with self once Rust has arbitrary self types + #[inline(always)] unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } /// Reports the number of further bytes required to entomb `self`. #[inline(always)] fn extent(&self) -> usize { 0 } @@ -227,7 +235,7 @@ macro_rules! unsafe_abomonate { $( self.$field.entomb(write)?; )* Ok(()) } - #[inline] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { $( let temp = bytes; bytes = self.$field.exhume(temp)?; )* Some(bytes) } @@ -251,7 +259,7 @@ macro_rules! tuple_abomonate { Ok(()) } #[allow(non_snake_case)] - #[inline(always)] unsafe fn exhume<'a,'b>(&'a mut self, mut bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let ($(ref mut $name,)*) = *self; $( let temp = bytes; bytes = $name.exhume(temp)?; )* Some(bytes) @@ -314,7 +322,7 @@ impl Abomonation for Option { } Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { if let &mut Some(ref mut inner) = self { let tmp = bytes; bytes = inner.exhume(tmp)?; } @@ -333,7 +341,7 @@ impl Abomonation for Result { }; Ok(()) } - #[inline(always)] unsafe fn exhume<'a, 'b>(&'a mut self, bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { match self { &mut Ok(ref mut inner) => inner.exhume(bytes), &mut Err(ref mut inner) => inner.exhume(bytes), @@ -390,7 +398,7 @@ macro_rules! array_abomonate { Ok(()) } #[inline(always)] - unsafe fn exhume<'a, 'b>(&'a mut self, mut bytes: &'b mut[u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { for element in self { let tmp = bytes; bytes = element.exhume(tmp)?; } @@ -448,7 +456,7 @@ impl Abomonation for String { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { if self.len() > bytes.len() { None } else { let (mine, rest) = bytes.split_at_mut(self.len()); @@ -469,7 +477,7 @@ impl Abomonation for Vec { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { // extract memory from bytes to back our vector let binary_len = self.len() * mem::size_of::(); @@ -503,7 +511,7 @@ impl Abomonation for Box { Ok(()) } #[inline] - unsafe fn exhume<'a,'b>(&'a mut self, bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { From e97de1bd5051adc36bf53f99e34ce967d11a0b28 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:03:58 +0200 Subject: [PATCH 02/34] Port unsafe_abomonate to NonNull-based interface --- src/lib.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f857aab..48d9491 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -235,10 +235,17 @@ macro_rules! unsafe_abomonate { $( self.$field.entomb(write)?; )* Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - $( let temp = bytes; bytes = self.$field.exhume(temp)?; )* + $( + // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. + // The proposed &raw mut operator would allow avoiding this. + let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); + bytes = Abomonation::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[inline] fn extent(&self) -> usize { let mut size = 0; $( size += self.$field.extent(); )* From 5fd495799695bb60f8885cb5a63e280120113c2a Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Wed, 25 Sep 2019 22:53:39 +0200 Subject: [PATCH 03/34] Clarify that the tuple_abomonate macro takes types as input --- src/lib.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 48d9491..1d5c034 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -257,25 +257,25 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { - ( $($name:ident)+) => ( - impl<$($name: Abomonation),*> Abomonation for ($($name,)*) { + ( $($ty:ident)+) => ( + impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { - let ($(ref $name,)*) = *self; - $($name.entomb(write)?;)* + let ($(ref $ty,)*) = *self; + $($ty.entomb(write)?;)* Ok(()) } #[allow(non_snake_case)] #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - let ($(ref mut $name,)*) = *self; - $( let temp = bytes; bytes = $name.exhume(temp)?; )* + let ($(ref mut $ty,)*) = *self; + $( let temp = bytes; bytes = $ty.exhume(temp)?; )* Some(bytes) } #[allow(non_snake_case)] #[inline(always)] fn extent(&self) -> usize { let mut size = 0; - let ($(ref $name,)*) = *self; - $( size += $name.extent(); )* + let ($(ref $ty,)*) = *self; + $( size += $ty.extent(); )* size } } From b182a3877f7b4c80ad6181ecdfdaa3854fe47abe Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:04:35 +0200 Subject: [PATCH 04/34] Port tuple_abomonate to NonNull-based interface --- src/lib.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1d5c034..252f654 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -265,12 +265,20 @@ macro_rules! tuple_abomonate { $($ty.entomb(write)?;)* Ok(()) } + #[allow(non_snake_case)] #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - let ($(ref mut $ty,)*) = *self; - $( let temp = bytes; bytes = $ty.exhume(temp)?; )* + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I think avoiding this would require a cleaner way to iterate over tuple fields. + // One possibility would be a C++11-style combination of variadic generics and recursion. + let ($(ref mut $ty,)*) = *self_.as_ptr(); + $( + let field_ptr : NonNull<$ty> = From::from($ty); + bytes = $ty::exhume(field_ptr, bytes)?; + )* Some(bytes) } + #[allow(non_snake_case)] #[inline(always)] fn extent(&self) -> usize { let mut size = 0; From d4a78e4b85312d63f390246f8365d048bcfdb240 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 15:04:58 +0200 Subject: [PATCH 05/34] Port Box abomonation to NonNull-based interface --- src/lib.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 252f654..0950f44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -525,17 +525,20 @@ impl Abomonation for Box { (**self).entomb(bytes)?; Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - std::ptr::write(self, mem::transmute(mine.as_mut_ptr() as *mut T)); - let temp = rest; rest = (**self).exhume(temp)?; + let box_target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); + rest = T::exhume(box_target, rest)?; + self_.as_ptr().write(Box::from_raw(box_target.as_ptr())); Some(rest) } } + #[inline] fn extent(&self) -> usize { mem::size_of::() + (&**self).extent() } From 421a486b2f11ebc5804be536b044be191598ae00 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 18 Sep 2019 19:43:53 +0200 Subject: [PATCH 06/34] Port Rust enum abomonation to NonNull-based interface --- src/lib.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0950f44..c681214 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -337,12 +337,17 @@ impl Abomonation for Option { } Ok(()) } + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - if let &mut Some(ref mut inner) = self { - let tmp = bytes; bytes = inner.exhume(tmp)?; + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + if let Some(ref mut inner) = *self_.as_ptr() { + let inner_ptr : NonNull = From::from(inner); + bytes = T::exhume(inner_ptr, bytes)?; } Some(bytes) } + #[inline] fn extent(&self) -> usize { self.as_ref().map(|inner| inner.extent()).unwrap_or(0) } @@ -356,12 +361,22 @@ impl Abomonation for Result { }; Ok(()) } + #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - match self { - &mut Ok(ref mut inner) => inner.exhume(bytes), - &mut Err(ref mut inner) => inner.exhume(bytes), + // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on enum implementation details. + match *self_.as_ptr() { + Ok(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + T::exhume(inner_ptr, bytes) + } + Err(ref mut inner) => { + let inner_ptr : NonNull = From::from(inner); + E::exhume(inner_ptr, bytes) + } } } + #[inline] fn extent(&self) -> usize { match self { &Ok(ref inner) => inner.extent(), From 5ef07a7d460d168e3c1dd12a425fba257043c6b4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 13:48:14 +0200 Subject: [PATCH 07/34] Port slice-like abomonations to NonNull-based interface (and deduplicate them) --- src/lib.rs | 78 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c681214..e5750eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -424,22 +424,16 @@ macro_rules! array_abomonate { impl Abomonation for [T; $size] { #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - for element in self { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } + #[inline(always)] - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - for element in self { - let tmp = bytes; bytes = element.exhume(tmp)?; - } - Some(bytes) + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + exhume_slice(self_.as_ptr() as *mut T, $size, bytes) } + #[inline(always)] fn extent(&self) -> usize { - let mut size = 0; - for element in self { - size += element.extent(); - } - size + slice_extent(&self[..]) } } ) @@ -485,15 +479,20 @@ impl Abomonation for String { write.write_all(self.as_bytes())?; Ok(()) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - if self.len() > bytes.len() { None } + // FIXME: This (briefly) constructs an &String to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on String implementation details. + let self_len = self_.as_ref().len(); + if self_len > bytes.len() { None } else { - let (mine, rest) = bytes.split_at_mut(self.len()); - std::ptr::write(self, String::from_raw_parts(mem::transmute(mine.as_ptr()), self.len(), self.len())); + let (mine, rest) = bytes.split_at_mut(self_len); + self_.as_ptr().write(String::from_raw_parts(mine.as_mut_ptr(), self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { self.len() } @@ -503,33 +502,28 @@ impl Abomonation for Vec { #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; - for element in self.iter() { element.entomb(write)?; } - Ok(()) + entomb_slice(&self[..], write) } + #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { - - // extract memory from bytes to back our vector - let binary_len = self.len() * mem::size_of::(); + // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on Vec implementation details. + let self_len = self_.as_ref().len(); + let binary_len = self_len * mem::size_of::(); if binary_len > bytes.len() { None } else { let (mine, mut rest) = bytes.split_at_mut(binary_len); - let slice = std::slice::from_raw_parts_mut(mine.as_mut_ptr() as *mut T, self.len()); - std::ptr::write(self, Vec::from_raw_parts(slice.as_mut_ptr(), self.len(), self.len())); - for element in self.iter_mut() { - let temp = rest; // temp variable explains lifetimes (mysterious!) - rest = element.exhume(temp)?; - } + let first_ptr = mine.as_mut_ptr() as *mut T; + rest = exhume_slice(first_ptr, self_len, rest)?; + self_.as_ptr().write(Vec::from_raw_parts(first_ptr, self_len, self_len)); Some(rest) } } + #[inline] fn extent(&self) -> usize { - let mut sum = mem::size_of::() * self.len(); - for element in self.iter() { - sum += element.extent(); - } - sum + mem::size_of::() * self.len() + slice_extent(&self[..]) } } @@ -564,6 +558,28 @@ impl Abomonation for Box { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } +// Common subset of "entomb" for all [T]-like types +unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { + for element in slice { element.entomb(write)?; } + Ok(()) +} + +// Common subset of "exhume" for all [T]-like types +// (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) +#[inline] +unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + for i in 0..length { + let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); + bytes = T::exhume(element_ptr, bytes)?; + } + Some(bytes) +} + +// Common subset of "extent" for all [T]-like types +fn slice_extent(slice: &[T]) -> usize { + slice.iter().map(T::extent).sum() +} + mod network { use Abomonation; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; From b23d2dc2acb83438eb3716255c7b27e1097a4dc9 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:22:33 +0200 Subject: [PATCH 08/34] Require Debug and use assert_eq for improved test failure messages --- tests/tests.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/tests.rs b/tests/tests.rs index 340e776..3ff0dec 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,7 @@ extern crate abomonation; use abomonation::*; +use std::fmt::Debug; #[test] fn test_array() { _test_pass(vec![[0, 1, 2]; 1024]); } #[test] fn test_nonzero() { _test_pass(vec![[std::num::NonZeroI32::new(1)]; 1024]); } @@ -38,21 +39,21 @@ fn test_phantom_data_for_non_abomonatable_type() { _test_pass(PhantomData::::default()); } -fn _test_pass(record: T) { +fn _test_pass(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } { let (result, rest) = unsafe { decode::(&mut bytes[..]) }.unwrap(); - assert!(&record == result); - assert!(rest.len() == 0); + assert_eq!(&record, result); + assert_eq!(rest.len(), 0); } } -fn _test_fail(record: T) { +fn _test_fail(record: T) { let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } bytes.pop(); - assert!(unsafe { decode::(&mut bytes[..]) }.is_none()); + assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); } fn _test_size(record: T) { @@ -62,7 +63,7 @@ fn _test_size(record: T) { } -#[derive(Eq, PartialEq)] +#[derive(Debug, Eq, PartialEq)] struct MyStruct { a: String, b: u64, @@ -82,8 +83,8 @@ fn test_macro() { // decode a &Vec<(u64, String)> from binary data if let Some((result, rest)) = unsafe { decode::(&mut bytes) } { - assert!(result == &record); - assert!(rest.len() == 0); + assert_eq!(result, &record); + assert_eq!(rest.len(), 0); } } @@ -106,10 +107,10 @@ fn test_multiple_encode_decode() { unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); } unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == 0); - let (t, r) = unsafe { decode::(r) }.unwrap(); assert!(*t == 7); - let (t, r) = unsafe { decode::>(r) }.unwrap(); assert!(*t == vec![1,2,3]); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == "grawwwwrr".to_owned()); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, 0); + let (t, r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, 7); + let (t, r) = unsafe { decode::>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned()); } #[test] @@ -125,6 +126,6 @@ fn test_net_types() { unsafe { encode(&socket_addr4, &mut bytes).unwrap(); } unsafe { encode(&socket_addr6, &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert!(*t == socket_addr4); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert!(*t == socket_addr6); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, socket_addr4); + let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, socket_addr6); } From b5f7b71bdd7fdea8fa11c48a09cfdb5901b1a0c4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:23:11 +0200 Subject: [PATCH 09/34] Clarify invalid data as a source of UB --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1c8565c..8afb905 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ A mortifying serialization library for Rust Abomonation (spelling intentional) is a serialization library for Rust based on the very simple idea that if someone presents data for serialization it will copy those exact bits, and then follow any pointers and copy those bits, and so on. When deserializing it recovers the exact bits, and then corrects pointers to aim at the serialized forms of the chased data. -**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment. +**Warning**: Abomonation should not be used on any data you care strongly about, or from any computer you value the data on. The `encode` and `decode` methods do things that may be undefined behavior, and you shouldn't stand for that. Specifically, `encode` exposes padding bytes to `memcpy`, and `decode` doesn't much respect alignment and may need to construct Rust references to invalid data. Please consult the [abomonation documentation](https://frankmcsherry.github.com/abomonation) for more specific information. From e4a4963073b32646169a6cc8637447ea7adf6aa4 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Thu, 19 Sep 2019 18:23:26 +0200 Subject: [PATCH 10/34] Remove outdated Abomonable notion --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8afb905..13de90c 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ Be warned that these numbers are not *goodput*, but rather the total number of b ## unsafe_abomonate! -Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonable` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. +Abomonation comes with the `unsafe_abomonate!` macro implementing `Abomonation` for structs which are essentially equivalent to a tuple of other `Abomonation` types. To use the macro, you must put the `#[macro_use]` modifier before `extern crate abomonation;`. Please note that `unsafe_abomonate!` synthesizes unsafe implementations of `Abomonation`, and it is should be considered unsafe to invoke. @@ -82,4 +82,4 @@ if let Some((result, remaining)) = unsafe { decode::(&mut bytes) } { } ``` -Be warned that implementing `Abomonable` for types can be a giant disaster and is entirely discouraged. +Be warned that implementing `Abomonation` for types can be a giant disaster and is entirely discouraged. From b8629a300d1bc71bb8e5f8f47ec81931fe2ce399 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:11:12 +0200 Subject: [PATCH 11/34] Make sure that black_box does its job --- benches/bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/bench.rs b/benches/bench.rs index 9ce8a3c..c106228 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -40,7 +40,7 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { bencher.bytes = bytes.len() as u64; bencher.iter(|| { bytes.clear(); - unsafe { encode(&record, &mut bytes).unwrap(); } + unsafe { encode(&record, &mut bytes).unwrap() } }); } From 48b2b9aa9d64cff1bd25b264fd9a8c092e30b4bb Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 07:49:52 +0200 Subject: [PATCH 12/34] Remove some strange whitespace --- benches/bench.rs | 2 -- benches/clone.rs | 2 -- benches/recycler.rs | 2 -- 3 files changed, 6 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index c106228..4492541 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -31,7 +31,6 @@ use test::Bencher; #[bench] fn vec_u_vn_s_dec(bencher: &mut Bencher) { _bench_dec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_enc(bencher: &mut Bencher, record: T) { - // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -45,7 +44,6 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { } fn _bench_dec(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/clone.rs b/benches/clone.rs index a040895..a5f5b2e 100644 --- a/benches/clone.rs +++ b/benches/clone.rs @@ -28,7 +28,6 @@ use test::Bencher; #[bench] fn vec_u_vn_s_cln(bencher: &mut Bencher) { _bench_cln(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_e_d(bencher: &mut Bencher, record: T) { - // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +42,6 @@ fn _bench_e_d(bencher: &mut Bencher, record: T) { } fn _bench_cln(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/recycler.rs b/benches/recycler.rs index cc9ae94..96391b5 100644 --- a/benches/recycler.rs +++ b/benches/recycler.rs @@ -28,7 +28,6 @@ use test::Bencher; // #[bench] fn vec_u_vn_s_rec(bencher: &mut Bencher) { _bench_rec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } fn _bench_own(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +42,6 @@ fn _bench_own(bencher: &mut Bencher, record: T) { fn _bench_rec(bencher: &mut Bencher, record: T) { - // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } From 7388af5ab46724ef68398d79e5206cd08c33be81 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 09:39:22 +0200 Subject: [PATCH 13/34] Snipe some unnecessary usage of Ok(()) --- src/lib.rs | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e5750eb..3c1c817 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,8 +75,7 @@ pub mod abomonated; pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(write)?; - Ok(()) + typed.entomb(write) } /// Decodes a mutable binary slice into an immutable typed reference. @@ -262,7 +261,7 @@ macro_rules! tuple_abomonate { #[allow(non_snake_case)] #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; - $($ty.entomb(write)?;)* + $( $ty.entomb(write)?; )* Ok(()) } @@ -356,10 +355,9 @@ impl Abomonation for Option { impl Abomonation for Result { #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { - &Ok(ref inner) => inner.entomb(write)?, - &Err(ref inner) => inner.entomb(write)?, - }; - Ok(()) + &Ok(ref inner) => inner.entomb(write), + &Err(ref inner) => inner.entomb(write), + } } #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { @@ -476,8 +474,7 @@ array_abomonate!(32); impl Abomonation for String { #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write.write_all(self.as_bytes())?; - Ok(()) + write.write_all(self.as_bytes()) } #[inline] @@ -531,8 +528,7 @@ impl Abomonation for Box { #[inline] unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - (**self).entomb(bytes)?; - Ok(()) + (**self).entomb(bytes) } #[inline] From 3f847833809249459a65b57b143b2b08776bce70 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:11:30 +0200 Subject: [PATCH 14/34] Don't use manual inlining where benchmarks don't show a benefit --- src/lib.rs | 50 ++++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3c1c817..dc1a5e6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,7 +119,6 @@ pub unsafe fn encode(typed: &T, write: &mut W) -> IORe /// assert!(remaining.len() == 0); /// } /// ``` -#[inline] pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8])> { if bytes.len() < mem::size_of::() { None } else { @@ -139,7 +138,6 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// # Safety /// /// The `measure` method is safe. It neither produces nor consults serialized representations. -#[inline] pub fn measure(typed: &T) -> usize { mem::size_of::() + typed.extent() } @@ -163,7 +161,7 @@ pub trait Abomonation { /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value /// reports any failures in writing to `write`. - #[inline(always)] unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } + unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } /// Recover any information for `self_` not evident from its binary representation. /// @@ -175,10 +173,10 @@ pub trait Abomonation { /// `exhume` using raw pointer operations as much as feasible. // // FIXME: Replace self_ with self once Rust has arbitrary self types - #[inline(always)] unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } + unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } /// Reports the number of further bytes required to entomb `self`. - #[inline(always)] fn extent(&self) -> usize { 0 } + fn extent(&self) -> usize { 0 } } /// The `unsafe_abomonate!` macro takes a type name with an optional list of fields, and implements @@ -230,12 +228,12 @@ macro_rules! unsafe_abomonate { }; ($t:ty : $($field:ident),*) => { impl Abomonation for $t { - #[inline] unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { + unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { $( self.$field.entomb(write)?; )* Ok(()) } - #[inline] unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { $( // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. // The proposed &raw mut operator would allow avoiding this. @@ -245,7 +243,7 @@ macro_rules! unsafe_abomonate { Some(bytes) } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { let mut size = 0; $( size += self.$field.extent(); )* size @@ -259,14 +257,14 @@ macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] - #[inline(always)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; $( $ty.entomb(write)?; )* Ok(()) } #[allow(non_snake_case)] - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I think avoiding this would require a cleaner way to iterate over tuple fields. // One possibility would be a C++11-style combination of variadic generics and recursion. @@ -279,7 +277,7 @@ macro_rules! tuple_abomonate { } #[allow(non_snake_case)] - #[inline(always)] fn extent(&self) -> usize { + fn extent(&self) -> usize { let mut size = 0; let ($(ref $ty,)*) = *self; $( size += $ty.extent(); )* @@ -330,14 +328,14 @@ impl Abomonation for ::std::time::Duration { } impl Abomonation for PhantomData {} impl Abomonation for Option { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { inner.entomb(write)?; } Ok(()) } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. if let Some(ref mut inner) = *self_.as_ptr() { @@ -347,20 +345,20 @@ impl Abomonation for Option { Some(bytes) } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { self.as_ref().map(|inner| inner.extent()).unwrap_or(0) } } impl Abomonation for Result { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => inner.entomb(write), &Err(ref inner) => inner.entomb(write), } } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. match *self_.as_ptr() { @@ -375,7 +373,7 @@ impl Abomonation for Result { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { match self { &Ok(ref inner) => inner.extent(), &Err(ref inner) => inner.extent(), @@ -416,21 +414,18 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE); tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF); - macro_rules! array_abomonate { ($size:expr) => ( impl Abomonation for [T; $size] { - #[inline(always)] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } - #[inline(always)] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { exhume_slice(self_.as_ptr() as *mut T, $size, bytes) } - #[inline(always)] fn extent(&self) -> usize { + fn extent(&self) -> usize { slice_extent(&self[..]) } } @@ -472,7 +467,6 @@ array_abomonate!(31); array_abomonate!(32); impl Abomonation for String { - #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -490,13 +484,12 @@ impl Abomonation for String { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { self.len() } } impl Abomonation for Vec { - #[inline] unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) @@ -518,20 +511,17 @@ impl Abomonation for Vec { } } - #[inline] fn extent(&self) -> usize { mem::size_of::() * self.len() + slice_extent(&self[..]) } } impl Abomonation for Box { - #[inline] unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; (**self).entomb(bytes) } - #[inline] unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } @@ -544,13 +534,13 @@ impl Abomonation for Box { } } - #[inline] fn extent(&self) -> usize { + fn extent(&self) -> usize { mem::size_of::() + (&**self).extent() } } // This method currently enables undefined behavior, by exposing padding bytes. -#[inline] unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { +unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } @@ -587,4 +577,4 @@ mod network { impl Abomonation for SocketAddr { } impl Abomonation for SocketAddrV4 { } impl Abomonation for SocketAddrV6 { } -} \ No newline at end of file +} From cb8e2af5eb9810e4eda4156eecf33cac2d998f7b Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:38:36 +0200 Subject: [PATCH 15/34] Add inline(always) where it matters in benchmarks --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index dc1a5e6..c0aa555 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ pub mod abomonated; /// } /// ``` /// -#[inline] +#[inline(always)] pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; From 17a03542b2c0b64d67e8859e6ac0543695fa2bc3 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 01:56:57 +0200 Subject: [PATCH 16/34] Avoid multiple codegen units in benchmarks --- Cargo.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 7c2f62d..9ee1ca1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,9 @@ license = "MIT" [dev-dependencies] recycler="0.1.4" + +[profile.bench] +# Multiple codegen units speed up compilation, but make compilation output less +# deteministic and generally decrease codegen quality through worse inlining. +# Let's turn it off for benchmarking. +codegen-units = 1 From ee283b030f44b98a55bf4514fba4e26f125c988e Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 11:24:56 +0200 Subject: [PATCH 17/34] Improve Abomonation documentation and make the trait unsafe By its very nature, Abomonation does very unsafe (and UB-ish) things. But we should strive to explain these as well as we can in the current state of the unsafe Rust formalization effort in order to reduce the potential for known misuse. --- src/lib.rs | 114 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 64 insertions(+), 50 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c0aa555..a01bac7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -149,14 +149,28 @@ pub fn measure(typed: &T) -> usize { /// /// # Safety /// -/// Abomonation has no safe methods. Please do not call them. They should be called only by +/// `entomb` and `exhume` are not meant to be called directly. They should be called only by /// `encode` and `decode`, each of which impose restrictions on ownership and lifetime of the data -/// they take as input and return as output. +/// they take as input and return as output, thus improving safety. +/// +/// Not all Rust types are abomonable, and one should think carefully about the implications of +/// implementing `Abomonation` for a type. To lend itself to this exercise, a type must... +/// +/// - Provide exhaustive access to its internal representation +/// - Allow reconstruction from said representation +/// - Survive having its heap allocations being silently replaced by inline pointers to +/// the same storage block, as long as only a shared reference is observed. +/// +/// The last point is the reason why `Abomonation` only provides a shared reference to the +/// reconstructed object. Without this, it would be trivial to trigger, say, a `Box` destructor +/// that tries to call `free()` on the inner pointer. But the use of a shared reference only +/// provides minimal sanity, and for types with internal mutability (those with an `UnsafeCell` +/// inside), this precaution is insufficient. `Abomonation` is generally not safely implementable +/// for such types, but may work in particular cases like atomics. /// /// If you are concerned about safety, it may be best to avoid Abomonation all together. It does /// several things that may be undefined behavior, depending on how undefined behavior is defined. -pub trait Abomonation { - +pub unsafe trait Abomonation { /// Write any additional information about `&self` beyond its binary representation. /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value @@ -224,10 +238,10 @@ pub trait Abomonation { #[deprecated(since="0.5", note="please use the abomonation_derive crate")] macro_rules! unsafe_abomonate { ($t:ty) => { - impl Abomonation for $t { } + unsafe impl Abomonation for $t { } }; ($t:ty : $($field:ident),*) => { - impl Abomonation for $t { + unsafe impl Abomonation for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { $( self.$field.entomb(write)?; )* Ok(()) @@ -255,7 +269,7 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( - impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { + unsafe impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; @@ -287,47 +301,47 @@ macro_rules! tuple_abomonate { ); } -impl Abomonation for u8 { } -impl Abomonation for u16 { } -impl Abomonation for u32 { } -impl Abomonation for u64 { } -impl Abomonation for u128 { } -impl Abomonation for usize { } +unsafe impl Abomonation for u8 { } +unsafe impl Abomonation for u16 { } +unsafe impl Abomonation for u32 { } +unsafe impl Abomonation for u64 { } +unsafe impl Abomonation for u128 { } +unsafe impl Abomonation for usize { } -impl Abomonation for i8 { } -impl Abomonation for i16 { } -impl Abomonation for i32 { } -impl Abomonation for i64 { } -impl Abomonation for i128 { } -impl Abomonation for isize { } +unsafe impl Abomonation for i8 { } +unsafe impl Abomonation for i16 { } +unsafe impl Abomonation for i32 { } +unsafe impl Abomonation for i64 { } +unsafe impl Abomonation for i128 { } +unsafe impl Abomonation for isize { } -impl Abomonation for NonZeroU8 { } -impl Abomonation for NonZeroU16 { } -impl Abomonation for NonZeroU32 { } -impl Abomonation for NonZeroU64 { } -impl Abomonation for NonZeroU128 { } -impl Abomonation for NonZeroUsize { } +unsafe impl Abomonation for NonZeroU8 { } +unsafe impl Abomonation for NonZeroU16 { } +unsafe impl Abomonation for NonZeroU32 { } +unsafe impl Abomonation for NonZeroU64 { } +unsafe impl Abomonation for NonZeroU128 { } +unsafe impl Abomonation for NonZeroUsize { } -impl Abomonation for NonZeroI8 { } -impl Abomonation for NonZeroI16 { } -impl Abomonation for NonZeroI32 { } -impl Abomonation for NonZeroI64 { } -impl Abomonation for NonZeroI128 { } -impl Abomonation for NonZeroIsize { } +unsafe impl Abomonation for NonZeroI8 { } +unsafe impl Abomonation for NonZeroI16 { } +unsafe impl Abomonation for NonZeroI32 { } +unsafe impl Abomonation for NonZeroI64 { } +unsafe impl Abomonation for NonZeroI128 { } +unsafe impl Abomonation for NonZeroIsize { } -impl Abomonation for f32 { } -impl Abomonation for f64 { } +unsafe impl Abomonation for f32 { } +unsafe impl Abomonation for f64 { } -impl Abomonation for bool { } -impl Abomonation for () { } +unsafe impl Abomonation for bool { } +unsafe impl Abomonation for () { } -impl Abomonation for char { } +unsafe impl Abomonation for char { } -impl Abomonation for ::std::time::Duration { } +unsafe impl Abomonation for ::std::time::Duration { } -impl Abomonation for PhantomData {} +unsafe impl Abomonation for PhantomData {} -impl Abomonation for Option { +unsafe impl Abomonation for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { inner.entomb(write)?; @@ -350,7 +364,7 @@ impl Abomonation for Option { } } -impl Abomonation for Result { +unsafe impl Abomonation for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => inner.entomb(write), @@ -416,7 +430,7 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD macro_rules! array_abomonate { ($size:expr) => ( - impl Abomonation for [T; $size] { + unsafe impl Abomonation for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } @@ -466,7 +480,7 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -impl Abomonation for String { +unsafe impl Abomonation for String { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -489,7 +503,7 @@ impl Abomonation for String { } } -impl Abomonation for Vec { +unsafe impl Abomonation for Vec { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) @@ -516,7 +530,7 @@ impl Abomonation for Vec { } } -impl Abomonation for Box { +unsafe impl Abomonation for Box { unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; (**self).entomb(bytes) @@ -570,11 +584,11 @@ mod network { use Abomonation; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; - impl Abomonation for IpAddr { } - impl Abomonation for Ipv4Addr { } - impl Abomonation for Ipv6Addr { } + unsafe impl Abomonation for IpAddr { } + unsafe impl Abomonation for Ipv4Addr { } + unsafe impl Abomonation for Ipv6Addr { } - impl Abomonation for SocketAddr { } - impl Abomonation for SocketAddrV4 { } - impl Abomonation for SocketAddrV6 { } + unsafe impl Abomonation for SocketAddr { } + unsafe impl Abomonation for SocketAddrV4 { } + unsafe impl Abomonation for SocketAddrV6 { } } From 6c12b2e05c5325e88335f7e90fe119a26f8da0fa Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 20 Sep 2019 11:58:05 +0200 Subject: [PATCH 18/34] Take writer by value --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a01bac7..aba9051 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,10 +72,10 @@ pub mod abomonated; /// ``` /// #[inline(always)] -pub unsafe fn encode(typed: &T, write: &mut W) -> IOResult<()> { +pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(write) + typed.entomb(&mut write) } /// Decodes a mutable binary slice into an immutable typed reference. From a4377f464121e1b45bb8879e970826baa9e5b0ab Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Wed, 25 Sep 2019 22:34:32 +0200 Subject: [PATCH 19/34] Do not rely on autoderef to select the right Abomonation impl --- src/lib.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index aba9051..6611e3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,7 +75,7 @@ pub mod abomonated; pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; - typed.entomb(&mut write) + T::entomb(typed, &mut write) } /// Decodes a mutable binary slice into an immutable typed reference. @@ -139,7 +139,7 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// /// The `measure` method is safe. It neither produces nor consults serialized representations. pub fn measure(typed: &T) -> usize { - mem::size_of::() + typed.extent() + mem::size_of::() + T::extent(&typed) } /// Abomonation provides methods to serialize any heap data the implementor owns. @@ -243,7 +243,7 @@ macro_rules! unsafe_abomonate { ($t:ty : $($field:ident),*) => { unsafe impl Abomonation for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { - $( self.$field.entomb(write)?; )* + $( Abomonation::entomb(&self.$field, write)?; )* Ok(()) } @@ -259,7 +259,7 @@ macro_rules! unsafe_abomonate { fn extent(&self) -> usize { let mut size = 0; - $( size += self.$field.extent(); )* + $( size += Abomonation::extent(&self.$field); )* size } } @@ -273,7 +273,7 @@ macro_rules! tuple_abomonate { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; - $( $ty.entomb(write)?; )* + $( $ty::entomb($ty, write)?; )* Ok(()) } @@ -294,7 +294,7 @@ macro_rules! tuple_abomonate { fn extent(&self) -> usize { let mut size = 0; let ($(ref $ty,)*) = *self; - $( size += $ty.extent(); )* + $( size += $ty::extent($ty); )* size } } @@ -344,7 +344,7 @@ unsafe impl Abomonation for PhantomData {} unsafe impl Abomonation for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { - inner.entomb(write)?; + T::entomb(inner, write)?; } Ok(()) } @@ -360,15 +360,15 @@ unsafe impl Abomonation for Option { } fn extent(&self) -> usize { - self.as_ref().map(|inner| inner.extent()).unwrap_or(0) + self.as_ref().map(T::extent).unwrap_or(0) } } unsafe impl Abomonation for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { - &Ok(ref inner) => inner.entomb(write), - &Err(ref inner) => inner.entomb(write), + &Ok(ref inner) => T::entomb(inner, write), + &Err(ref inner) => E::entomb(inner, write), } } @@ -389,8 +389,8 @@ unsafe impl Abomonation for Result { fn extent(&self) -> usize { match self { - &Ok(ref inner) => inner.extent(), - &Err(ref inner) => inner.extent(), + &Ok(ref inner) => T::extent(inner), + &Err(ref inner) => E::extent(inner), } } } @@ -533,7 +533,7 @@ unsafe impl Abomonation for Vec { unsafe impl Abomonation for Box { unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - (**self).entomb(bytes) + T::entomb(&**self, bytes) } unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { @@ -549,7 +549,7 @@ unsafe impl Abomonation for Box { } fn extent(&self) -> usize { - mem::size_of::() + (&**self).extent() + mem::size_of::() + T::extent(&**self) } } @@ -560,7 +560,7 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { // Common subset of "entomb" for all [T]-like types unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { - for element in slice { element.entomb(write)?; } + for element in slice { T::entomb(element, write)?; } Ok(()) } From cda9ebb1479c7eed45ef5e2d9cdb6d0f7bf4f284 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 26 Sep 2019 00:01:22 +0200 Subject: [PATCH 20/34] Make Box::entomb simpler and more consistent with others --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6611e3d..aacd8f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -531,9 +531,9 @@ unsafe impl Abomonation for Vec { } unsafe impl Abomonation for Box { - unsafe fn entomb(&self, bytes: &mut W) -> IOResult<()> { - bytes.write_all(std::slice::from_raw_parts(mem::transmute(&**self), mem::size_of::()))?; - T::entomb(&**self, bytes) + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; + T::entomb(&**self, write) } unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { From e63516bcaac2ced8ac31fd24c973d2d036586908 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Sun, 29 Sep 2019 16:58:34 +0200 Subject: [PATCH 21/34] Add basic support for types which contain references --- benches/bench.rs | 8 +- benches/clone.rs | 8 +- benches/recycler.rs | 8 +- src/abomonated.rs | 93 +++++++++---- src/lib.rs | 330 ++++++++++++++++++++++++++++---------------- tests/tests.rs | 170 ++++++++++++----------- 6 files changed, 384 insertions(+), 233 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 4492541..e5d0c2b 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -30,7 +30,9 @@ use test::Bencher; #[bench] fn vec_u_vn_s_enc(bencher: &mut Bencher) { _bench_enc(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } #[bench] fn vec_u_vn_s_dec(bencher: &mut Bencher) { _bench_dec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_enc(bencher: &mut Bencher, record: T) { +fn _bench_enc(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> +{ // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -43,7 +45,9 @@ fn _bench_enc(bencher: &mut Bencher, record: T) { }); } -fn _bench_dec(bencher: &mut Bencher, record: T) { +fn _bench_dec(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Eq +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/clone.rs b/benches/clone.rs index a5f5b2e..7078d3d 100644 --- a/benches/clone.rs +++ b/benches/clone.rs @@ -27,7 +27,9 @@ use test::Bencher; #[bench] fn vec_u_vn_s_e_d(bencher: &mut Bencher) { _bench_e_d(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } #[bench] fn vec_u_vn_s_cln(bencher: &mut Bencher) { _bench_cln(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_e_d(bencher: &mut Bencher, record: T) { +fn _bench_e_d(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> +{ // prepare encoded data for bencher.bytes let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -41,7 +43,9 @@ fn _bench_e_d(bencher: &mut Bencher, record: T) { }); } -fn _bench_cln(bencher: &mut Bencher, record: T) { +fn _bench_cln(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Clone +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/benches/recycler.rs b/benches/recycler.rs index 96391b5..fef1bee 100644 --- a/benches/recycler.rs +++ b/benches/recycler.rs @@ -27,7 +27,9 @@ use test::Bencher; // TODO : this reveals that working with a `vec![(); 1 << 40]` does not get optimized away. // #[bench] fn vec_u_vn_s_rec(bencher: &mut Bencher) { _bench_rec(bencher, vec![vec![(0u64, vec![(); 1 << 40], format!("grawwwwrr!")); 32]; 32]); } -fn _bench_own(bencher: &mut Bencher, record: T) { +fn _bench_own(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Clone +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } @@ -41,7 +43,9 @@ fn _bench_own(bencher: &mut Bencher, record: T) { } -fn _bench_rec(bencher: &mut Bencher, record: T) { +fn _bench_rec(bencher: &mut Bencher, record: T) + where for<'de> T: Abomonation<'de> + Recyclable +{ // prepare encoded data let mut bytes = Vec::new(); unsafe { encode(&record, &mut bytes).unwrap(); } diff --git a/src/abomonated.rs b/src/abomonated.rs index 328b195..02732ca 100644 --- a/src/abomonated.rs +++ b/src/abomonated.rs @@ -1,5 +1,4 @@ -use std::mem::transmute; use std::marker::PhantomData; use std::ops::{Deref, DerefMut}; @@ -7,17 +6,22 @@ use super::{Abomonation, decode}; /// A type wrapping owned decoded abomonated data. /// -/// This type ensures that decoding and pointer correction has already happened, -/// and implements `Deref` using a pointer cast (transmute). +/// This type ensures that decoding and pointer correction has already happened. +/// It provides a way to move the deserialized data around, while keeping +/// on-demand access to it via the `as_ref()` method. +/// +/// As an extra convenience, `Deref` is also implemented if T does +/// not contain references. Unfortunately, this trait cannot be safely +/// implemented when T does contain references. /// /// #Safety /// -/// The safety of this type, and in particular its `transute` implementation of -/// the `Deref` trait, relies on the owned bytes not being externally mutated -/// once provided. You could imagine a new type implementing `DerefMut` as required, -/// but which also retains the ability (e.g. through `RefCell`) to mutate the bytes. -/// This would be very bad, but seems hard to prevent in the type system. Please -/// don't do this. +/// The safety of this type, and in particular of access to the deserialized +/// data, relies on the owned bytes not being externally mutated after the +/// `Abomonated` is constructed. You could imagine a new type implementing +/// `DerefMut` as required, but which also retains the ability (e.g. through +/// `RefCell`) to mutate the bytes. This would be very bad, but seems hard to +/// prevent in the type system. Please don't do this. /// /// You must also use a type `S` whose bytes have a fixed location in memory. /// Otherwise moving an instance of `Abomonated` may invalidate decoded @@ -54,8 +58,11 @@ pub struct Abomonated> { decoded: S, } -impl> Abomonated { - +impl<'s, 't, T, S> Abomonated + where S: DerefMut + 's, + T: Abomonation<'t>, + 's: 't +{ /// Attempts to create decoded data from owned mutable bytes. /// /// This method will return `None` if it is unable to decode the data with @@ -94,34 +101,64 @@ impl> Abomonated { /// not change if the `bytes: S` instance is moved. Good examples are /// `Vec` whereas bad examples are `[u8; 16]`. pub unsafe fn new(mut bytes: S) -> Option { + // Fix type `T`'s inner pointers. Will return `None` on failure. + // + // FIXME: `slice::from_raw_parts_mut` is used to work around the borrow + // checker marking `bytes` as forever borrowed if `&mut bytes` is + // directly passed as input to `decode()`. But that is itself a + // byproduct of the API contract specified by the `where` clause + // above, which allows S to be `&'t mut [u8]` (and therefore + // require such a perpetual borrow) in the worst case. + // + // A different API contract might allow us to achieve the same + // result without resorting to such evil unsafe tricks. + // + decode::(std::slice::from_raw_parts_mut(bytes.as_mut_ptr(), + bytes.len()))?; - // performs the underlying pointer correction, indicates success. - let decoded = decode::(bytes.deref_mut()).is_some(); - - if decoded { - Some(Abomonated { - phantom: PhantomData, - decoded: bytes, - }) - } - else { - None - } + // Build the Abomonated structure + Some(Abomonated { + phantom: PhantomData, + decoded: bytes, + }) } } -impl> Abomonated { +impl<'t, T, S> Abomonated + where S: DerefMut, + T: Abomonation<'t> +{ + /// Get a read-only view on the deserialized bytes pub fn as_bytes(&self) -> &[u8] { &self.decoded } -} + /// Get a read-only view on the deserialized data + // + // NOTE: This method can be safely used even if type T contains references, + // because it makes sure that the borrow of `self` lasts long enough + // to encompass the lifetime of these references. + // + // Otherwise, it would be possible to extract an `&'static Something` + // from a short-lived borrow of a `Box<[u8]>`, then drop the `Box`, + // and end up with a dangling reference. + // + pub fn as_ref<'a: 't>(&'a self) -> &'a T { + unsafe { &*(self.decoded.as_ptr() as *const T) } + } +} -impl> Deref for Abomonated { +// NOTE: The lifetime constraint that was applied to `as_ref()` cannot be +// applied to a `Deref` implementation. Therefore, `Deref` can only be +// used on types T which do not contain references, as enforced by the +// higher-ranked trait bound below. +impl Deref for Abomonated + where S: DerefMut, + for<'t> T: Abomonation<'t>, +{ type Target = T; #[inline] fn deref(&self) -> &T { - let result: &T = unsafe { transmute(self.decoded.get_unchecked(0)) }; - result + self.as_ref() } } diff --git a/src/lib.rs b/src/lib.rs index aacd8f1..f78bec4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -72,7 +72,7 @@ pub mod abomonated; /// ``` /// #[inline(always)] -pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { +pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); write.write_all(slice)?; T::entomb(typed, &mut write) @@ -119,7 +119,9 @@ pub unsafe fn encode(typed: &T, mut write: W) -> IORes /// assert!(remaining.len() == 0); /// } /// ``` -pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8])> { +pub unsafe fn decode<'bytes, T>(bytes: &'bytes mut [u8]) -> Option<(&'bytes T, &'bytes mut [u8])> + where T: Exhume<'bytes> +{ if bytes.len() < mem::size_of::() { None } else { let (split1, split2) = bytes.split_at_mut(mem::size_of::()); @@ -138,12 +140,24 @@ pub unsafe fn decode(bytes: &mut [u8]) -> Option<(&T, &mut [u8]) /// # Safety /// /// The `measure` method is safe. It neither produces nor consults serialized representations. -pub fn measure(typed: &T) -> usize { +pub fn measure(typed: &T) -> usize { mem::size_of::() + T::extent(&typed) } /// Abomonation provides methods to serialize any heap data the implementor owns. /// +/// The trait's lifetime parameter `'de` represents the set of slices of bytes `&'de mut [u8]` +/// from which it is valid to deserialize an `&'de impl Abomonation<'de>`. Types which own all of +/// their data may be deserialized freely, and implement this trait for all `'de`. However, we +/// need to enforce some deserialization restrictions on types which contain references. +/// +/// The reason is that abomonation performes in-place deserialization. To do that, it has to +/// patch a type's reference to point to other serialized data. Where a type _should_ contain an +/// `&'a T`, abomonation patches that into an `&'de T`. For this substitution to be valid, we +/// need `'de` to outlive `'a`. Otherwise, a careless user could ask abomonation to deserialize a +/// `&'static T` and get a `&'de T` which masquerades as an `&'static T` instead. The user could +/// then copy this reference, drop the bytes, and get use-after-free undefined behavior. +/// /// The default implementations for Abomonation's methods are all empty. Many types have no owned /// data to transcribe. Some do, however, and need to carefully implement these unsafe methods. /// @@ -170,13 +184,35 @@ pub fn measure(typed: &T) -> usize { /// /// If you are concerned about safety, it may be best to avoid Abomonation all together. It does /// several things that may be undefined behavior, depending on how undefined behavior is defined. -pub unsafe trait Abomonation { +pub unsafe trait Abomonation<'de> : Entomb + Exhume<'de> {} +unsafe impl<'de, T: Entomb + Exhume<'de>> Abomonation<'de> for T {} + +/// Types which can be serialized into bytes by abomonation +/// +/// Please consult the Abomonation trait's documentation for more details. Most +/// types which are serializable by abomonation are also deserializable by +/// abomonation, but we need to have a separate serialization and +/// deserialization trait for obscure lifetime-related reasons. +/// +pub unsafe trait Entomb { /// Write any additional information about `&self` beyond its binary representation. /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value /// reports any failures in writing to `write`. unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } + /// Reports the number of further bytes required to entomb `self`. + fn extent(&self) -> usize { 0 } +} + +/// Types which can be deserialized from `&'de mut [u8]` to `&'de T` by abomonation +/// +/// Please consult the Abomonation trait's documentation for more details. Most +/// types which are serializable by abomonation are also deserializable by +/// abomonation, but we need to have a separate serialization and +/// deserialization trait for obscure lifetime-related reasons. +/// +pub unsafe trait Exhume<'de> : Entomb + 'de { /// Recover any information for `self_` not evident from its binary representation. /// /// Most commonly this populates pointers with valid references into `bytes`. @@ -187,10 +223,7 @@ pub unsafe trait Abomonation { /// `exhume` using raw pointer operations as much as feasible. // // FIXME: Replace self_ with self once Rust has arbitrary self types - unsafe fn exhume<'a>(_self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { Some(bytes) } - - /// Reports the number of further bytes required to entomb `self`. - fn extent(&self) -> usize { 0 } + unsafe fn exhume(_self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { Some(bytes) } } /// The `unsafe_abomonate!` macro takes a type name with an optional list of fields, and implements @@ -238,30 +271,33 @@ pub unsafe trait Abomonation { #[deprecated(since="0.5", note="please use the abomonation_derive crate")] macro_rules! unsafe_abomonate { ($t:ty) => { - unsafe impl Abomonation for $t { } + unsafe impl $crate::Entomb for $t { } + unsafe impl $crate::Exhume<'_> for $t { } }; ($t:ty : $($field:ident),*) => { - unsafe impl Abomonation for $t { + unsafe impl $crate::Entomb for $t { unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { - $( Abomonation::entomb(&self.$field, write)?; )* + $( $crate::Entomb::entomb(&self.$field, write)?; )* Ok(()) } - unsafe fn exhume<'a>(self_: ::std::ptr::NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + let mut size = 0; + $( size += $crate::Entomb::extent(&self.$field); )* + size + } + } + + unsafe impl<'de> $crate::Exhume<'de> for $t { + unsafe fn exhume(self_: ::std::ptr::NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { $( // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. // The proposed &raw mut operator would allow avoiding this. let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); - bytes = Abomonation::exhume(field_ptr, bytes)?; + bytes = $crate::Exhume::exhume(field_ptr, bytes)?; )* Some(bytes) } - - fn extent(&self) -> usize { - let mut size = 0; - $( size += Abomonation::extent(&self.$field); )* - size - } } }; } @@ -269,7 +305,7 @@ macro_rules! unsafe_abomonate { // general code for tuples (can't use '0', '1', ... as field identifiers) macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( - unsafe impl<$($ty: Abomonation),*> Abomonation for ($($ty,)*) { + unsafe impl<$($ty: Entomb),*> Entomb for ($($ty,)*) { #[allow(non_snake_case)] unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { let ($(ref $ty,)*) = *self; @@ -278,7 +314,17 @@ macro_rules! tuple_abomonate { } #[allow(non_snake_case)] - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + let mut size = 0; + let ($(ref $ty,)*) = *self; + $( size += $ty::extent($ty); )* + size + } + } + + unsafe impl<'de, $($ty: Exhume<'de>),*> Exhume<'de> for ($($ty,)*) { + #[allow(non_snake_case)] + unsafe fn exhume(self_: NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I think avoiding this would require a cleaner way to iterate over tuple fields. // One possibility would be a C++11-style combination of variadic generics and recursion. @@ -289,59 +335,82 @@ macro_rules! tuple_abomonate { )* Some(bytes) } - - #[allow(non_snake_case)] - fn extent(&self) -> usize { - let mut size = 0; - let ($(ref $ty,)*) = *self; - $( size += $ty::extent($ty); )* - size - } } ); } -unsafe impl Abomonation for u8 { } -unsafe impl Abomonation for u16 { } -unsafe impl Abomonation for u32 { } -unsafe impl Abomonation for u64 { } -unsafe impl Abomonation for u128 { } -unsafe impl Abomonation for usize { } - -unsafe impl Abomonation for i8 { } -unsafe impl Abomonation for i16 { } -unsafe impl Abomonation for i32 { } -unsafe impl Abomonation for i64 { } -unsafe impl Abomonation for i128 { } -unsafe impl Abomonation for isize { } - -unsafe impl Abomonation for NonZeroU8 { } -unsafe impl Abomonation for NonZeroU16 { } -unsafe impl Abomonation for NonZeroU32 { } -unsafe impl Abomonation for NonZeroU64 { } -unsafe impl Abomonation for NonZeroU128 { } -unsafe impl Abomonation for NonZeroUsize { } - -unsafe impl Abomonation for NonZeroI8 { } -unsafe impl Abomonation for NonZeroI16 { } -unsafe impl Abomonation for NonZeroI32 { } -unsafe impl Abomonation for NonZeroI64 { } -unsafe impl Abomonation for NonZeroI128 { } -unsafe impl Abomonation for NonZeroIsize { } - -unsafe impl Abomonation for f32 { } -unsafe impl Abomonation for f64 { } - -unsafe impl Abomonation for bool { } -unsafe impl Abomonation for () { } - -unsafe impl Abomonation for char { } - -unsafe impl Abomonation for ::std::time::Duration { } - -unsafe impl Abomonation for PhantomData {} - -unsafe impl Abomonation for Option { +unsafe impl Entomb for u8 {} +unsafe impl Exhume<'_> for u8 {} +unsafe impl Entomb for u16 {} +unsafe impl Exhume<'_> for u16 {} +unsafe impl Entomb for u32 {} +unsafe impl Exhume<'_> for u32 {} +unsafe impl Entomb for u64 {} +unsafe impl Exhume<'_> for u64 {} +unsafe impl Entomb for u128 {} +unsafe impl Exhume<'_> for u128 {} +unsafe impl Entomb for usize {} +unsafe impl Exhume<'_> for usize {} + +unsafe impl Entomb for i8 {} +unsafe impl Exhume<'_> for i8 {} +unsafe impl Entomb for i16 {} +unsafe impl Exhume<'_> for i16 {} +unsafe impl Entomb for i32 {} +unsafe impl Exhume<'_> for i32 {} +unsafe impl Entomb for i64 {} +unsafe impl Exhume<'_> for i64 {} +unsafe impl Entomb for i128 {} +unsafe impl Exhume<'_> for i128 {} +unsafe impl Entomb for isize {} +unsafe impl Exhume<'_> for isize {} + +unsafe impl Entomb for NonZeroU8 {} +unsafe impl Exhume<'_> for NonZeroU8 {} +unsafe impl Entomb for NonZeroU16 {} +unsafe impl Exhume<'_> for NonZeroU16 {} +unsafe impl Entomb for NonZeroU32 {} +unsafe impl Exhume<'_> for NonZeroU32 {} +unsafe impl Entomb for NonZeroU64 {} +unsafe impl Exhume<'_> for NonZeroU64 {} +unsafe impl Entomb for NonZeroU128 {} +unsafe impl Exhume<'_> for NonZeroU128 {} +unsafe impl Entomb for NonZeroUsize {} +unsafe impl Exhume<'_> for NonZeroUsize {} + +unsafe impl Entomb for NonZeroI8 {} +unsafe impl Exhume<'_> for NonZeroI8 {} +unsafe impl Entomb for NonZeroI16 {} +unsafe impl Exhume<'_> for NonZeroI16 {} +unsafe impl Entomb for NonZeroI32 {} +unsafe impl Exhume<'_> for NonZeroI32 {} +unsafe impl Entomb for NonZeroI64 {} +unsafe impl Exhume<'_> for NonZeroI64 {} +unsafe impl Entomb for NonZeroI128 {} +unsafe impl Exhume<'_> for NonZeroI128 {} +unsafe impl Entomb for NonZeroIsize {} +unsafe impl Exhume<'_> for NonZeroIsize {} + +unsafe impl Entomb for f32 {} +unsafe impl Exhume<'_> for f32 {} +unsafe impl Entomb for f64 {} +unsafe impl Exhume<'_> for f64 {} + +unsafe impl Entomb for bool {} +unsafe impl Exhume<'_> for bool {} +unsafe impl Entomb for () {} +unsafe impl Exhume<'_> for () {} + +unsafe impl Entomb for char {} +unsafe impl Exhume<'_> for char {} + +unsafe impl Entomb for ::std::time::Duration {} +unsafe impl Exhume<'_> for ::std::time::Duration {} + +unsafe impl Entomb for PhantomData {} +unsafe impl<'de, T: 'de> Exhume<'de> for PhantomData {} + +unsafe impl Entomb for Option { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { if let &Some(ref inner) = self { T::entomb(inner, write)?; @@ -349,7 +418,12 @@ unsafe impl Abomonation for Option { Ok(()) } - unsafe fn exhume<'a>(self_: NonNull, mut bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + self.as_ref().map(T::extent).unwrap_or(0) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Option { + unsafe fn exhume(self_: NonNull, mut bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. if let Some(ref mut inner) = *self_.as_ptr() { @@ -358,13 +432,9 @@ unsafe impl Abomonation for Option { } Some(bytes) } - - fn extent(&self) -> usize { - self.as_ref().map(T::extent).unwrap_or(0) - } } -unsafe impl Abomonation for Result { +unsafe impl Entomb for Result { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { match self { &Ok(ref inner) => T::entomb(inner, write), @@ -372,7 +442,15 @@ unsafe impl Abomonation for Result { } } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + match self { + &Ok(ref inner) => T::extent(inner), + &Err(ref inner) => E::extent(inner), + } + } +} +unsafe impl<'de, T: Exhume<'de>, E: Exhume<'de>> Exhume<'de> for Result { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. match *self_.as_ptr() { @@ -386,13 +464,6 @@ unsafe impl Abomonation for Result { } } } - - fn extent(&self) -> usize { - match self { - &Ok(ref inner) => T::extent(inner), - &Err(ref inner) => E::extent(inner), - } - } } tuple_abomonate!(A); @@ -430,19 +501,21 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD macro_rules! array_abomonate { ($size:expr) => ( - unsafe impl Abomonation for [T; $size] { + unsafe impl Entomb for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { entomb_slice(&self[..], write) } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut[u8]) -> Option<&'a mut [u8]> { - exhume_slice(self_.as_ptr() as *mut T, $size, bytes) - } - fn extent(&self) -> usize { slice_extent(&self[..]) } } + + unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T; $size] { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + exhume_slice(self_.as_ptr() as *mut T, $size, bytes) + } + } ) } @@ -480,13 +553,18 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -unsafe impl Abomonation for String { +unsafe impl Entomb for String { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } + fn extent(&self) -> usize { + self.len() + } +} +unsafe impl<'de> Exhume<'de> for String { #[inline] - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &String to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on String implementation details. let self_len = self_.as_ref().len(); @@ -497,20 +575,21 @@ unsafe impl Abomonation for String { Some(rest) } } - - fn extent(&self) -> usize { - self.len() - } } -unsafe impl Abomonation for Vec { +unsafe impl Entomb for Vec { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; entomb_slice(&self[..], write) } + fn extent(&self) -> usize { + mem::size_of::() * self.len() + slice_extent(&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { #[inline] - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on Vec implementation details. let self_len = self_.as_ref().len(); @@ -524,19 +603,20 @@ unsafe impl Abomonation for Vec { Some(rest) } } - - fn extent(&self) -> usize { - mem::size_of::() * self.len() + slice_extent(&self[..]) - } } -unsafe impl Abomonation for Box { +unsafe impl Entomb for Box { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; T::entomb(&**self, write) } - unsafe fn exhume<'a>(self_: NonNull, bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { + fn extent(&self) -> usize { + mem::size_of::() + T::extent(&**self) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { let binary_len = mem::size_of::(); if binary_len > bytes.len() { None } else { @@ -547,10 +627,6 @@ unsafe impl Abomonation for Box { Some(rest) } } - - fn extent(&self) -> usize { - mem::size_of::() + T::extent(&**self) - } } // This method currently enables undefined behavior, by exposing padding bytes. @@ -559,15 +635,24 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { } // Common subset of "entomb" for all [T]-like types -unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { +unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { for element in slice { T::entomb(element, write)?; } Ok(()) } +// Common subset of "extent" for all [T]-like types +fn slice_extent(slice: &[T]) -> usize { + slice.iter().map(T::extent).sum() +} + // Common subset of "exhume" for all [T]-like types // (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) #[inline] -unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut bytes: &'a mut [u8]) -> Option<&'a mut [u8]> { +unsafe fn exhume_slice<'de, T: Exhume<'de>>( + first_ptr: *mut T, + length: usize, + mut bytes: &'de mut [u8] +) -> Option<&'de mut [u8]> { for i in 0..length { let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); bytes = T::exhume(element_ptr, bytes)?; @@ -575,20 +660,21 @@ unsafe fn exhume_slice<'a, T: Abomonation>(first_ptr: *mut T, length: usize, mut Some(bytes) } -// Common subset of "extent" for all [T]-like types -fn slice_extent(slice: &[T]) -> usize { - slice.iter().map(T::extent).sum() -} - mod network { - use Abomonation; + use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; - unsafe impl Abomonation for IpAddr { } - unsafe impl Abomonation for Ipv4Addr { } - unsafe impl Abomonation for Ipv6Addr { } - - unsafe impl Abomonation for SocketAddr { } - unsafe impl Abomonation for SocketAddrV4 { } - unsafe impl Abomonation for SocketAddrV6 { } + unsafe impl Entomb for IpAddr {} + unsafe impl Exhume<'_> for IpAddr {} + unsafe impl Entomb for Ipv4Addr {} + unsafe impl Exhume<'_> for Ipv4Addr {} + unsafe impl Entomb for Ipv6Addr {} + unsafe impl Exhume<'_> for Ipv6Addr {} + + unsafe impl Entomb for SocketAddr {} + unsafe impl Exhume<'_> for SocketAddr {} + unsafe impl Entomb for SocketAddrV4 {} + unsafe impl Exhume<'_> for SocketAddrV4 {} + unsafe impl Entomb for SocketAddrV6 {} + unsafe impl Exhume<'_> for SocketAddrV6 {} } diff --git a/tests/tests.rs b/tests/tests.rs index 3ff0dec..6ffafce 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -3,44 +3,92 @@ extern crate abomonation; use abomonation::*; use std::fmt::Debug; -#[test] fn test_array() { _test_pass(vec![[0, 1, 2]; 1024]); } -#[test] fn test_nonzero() { _test_pass(vec![[std::num::NonZeroI32::new(1)]; 1024]); } -#[test] fn test_opt_vec() { _test_pass(vec![Some(vec![0,1,2]), None]); } -#[test] fn test_alignment() { _test_pass(vec![(format!("x"), vec![1,2,3]); 1024]); } -#[test] fn test_alignment_128() { _test_pass(vec![(format!("x"), vec![1u128,2,3]); 1024]); } -#[test] fn test_option_box_u64() { _test_pass(vec![Some(Box::new(0u64))]); } -#[test] fn test_option_vec() { _test_pass(vec![Some(vec![0, 1, 2])]); } -#[test] fn test_u32x4_pass() { _test_pass(vec![((1,2,3),vec![(0u32, 0u32, 0u32, 0u32); 1024])]); } -#[test] fn test_u64_pass() { _test_pass(vec![0u64; 1024]); } -#[test] fn test_u128_pass() { _test_pass(vec![0u128; 1024]); } -#[test] fn test_string_pass() { _test_pass(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_pass() { _test_pass(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } - -#[test] fn test_u64_fail() { _test_fail(vec![0u64; 1024]); } -#[test] fn test_u128_fail() { _test_fail(vec![0u128; 1024]); } -#[test] fn test_string_fail() { _test_fail(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_fail() { _test_fail(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } - -#[test] fn test_array_size() { _test_size(vec![[0, 1, 2]; 1024]); } -#[test] fn test_opt_vec_size() { _test_size(vec![Some(vec![0,1,2]), None]); } -#[test] fn test_alignment_size() { _test_size(vec![(format!("x"), vec![1,2,3]); 1024]); } -#[test] fn test_option_box_u64_size() { _test_size(vec![Some(Box::new(0u64))]); } -#[test] fn test_option_vec_size() { _test_size(vec![Some(vec![0, 1, 2])]); } -#[test] fn test_u32x4_size() { _test_size(vec![((1,2,3),vec![(0u32, 0u32, 0u32, 0u32); 1024])]); } -#[test] fn test_u64_size() { _test_size(vec![0u64; 1024]); } -#[test] fn test_u128_size() { _test_size(vec![0u128; 1024]); } -#[test] fn test_string_size() { _test_size(vec![format!("grawwwwrr!"); 1024]); } -#[test] fn test_vec_u_s_size() { _test_size(vec![vec![(0u64, format!("grawwwwrr!")); 32]; 32]); } +// Test struct for the unsafe_abomonate macro +#[derive(Clone, Debug, Eq, PartialEq)] +struct MyStruct { + a: String, + b: u64, + c: Vec, +} +unsafe_abomonate!(MyStruct : a, b, c); -#[test] -fn test_phantom_data_for_non_abomonatable_type() { - use std::marker::PhantomData; - struct NotAbomonatable; - _test_pass(PhantomData::::default()); +// Test for PhantomData abomonation, which has no Abomonation bound +struct NotAbomonatable; +type PhantomNotAbo = std::marker::PhantomData::; + +// Generic serialization/deserialization testing procedure, add your data here. +macro_rules! gen_tests { + ( + $( $data:expr => ($pass:ident, $fail:ident, $size:ident) ), *$(,)* + ) => { + $( + #[test] fn $pass() { _test_pass(&mut Vec::new(), vec![$data; 1024]); } + #[test] fn $fail() { _test_fail(&mut Vec::new(), vec![$data; 1024]); } + #[test] fn $size() { _test_size(&mut Vec::new(), vec![$data; 1024]); } + )* + }; +} +gen_tests!{ + [4, 1, 2] => (test_array_pass, + test_array_fail, + test_array_size), + + std::num::NonZeroI32::new(1) => (test_nonzero_pass, + test_nonzero_fail, + test_nonzero_size), + + Some(vec![8, 1, 2]) => (test_option_vec_pass, + test_option_vec_fail, + test_option_vec_size), + + (format!("x"), vec![1, 2, 3]) => (test_alignment_pass, + test_alignment_fail, + test_alignment_size), + + (format!("x"), vec![1u128, 2, 3]) => (test_alignment_128_pass, + test_alignment_128_fail, + test_alignment_128_size), + + Some(Box::new(9u64)) => (test_option_box_u64_pass, + test_option_box_u64_fail, + test_option_box_u64_size), + + (3u32, 8u32, 1u32, 7u32) => (test_u32x4_pass, + test_u32x4_fail, + test_u32x4_size), + + 42u64 => (test_u64_pass, + test_u64_fail, + test_u64_size), + + 687u128 => (test_u128_pass, + test_u128_fail, + test_u128_size), + + format!("grawwwwrr!") => (test_string_pass, + test_string_fail, + test_string_size), + + vec![(0u64, format!("grawwwwrr!")); 32] => (test_vec_u_s_pass, + test_vec_u_s_fail, + test_vec_u_s_size), + + MyStruct{ a: "test".to_owned(), + b: 0, + c: vec![0, 1, 2] } => (test_macro_pass, + test_macro_fail, + test_macro_size), + + PhantomNotAbo::default() => (test_phantom_notabo_pass, + test_phantom_notabo_fail, + test_phantom_notabo_size), } -fn _test_pass(record: T) { - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_pass to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_pass<'bytes, T>(mut bytes: &'bytes mut Vec, record: T) + where T: Abomonation<'bytes> + Debug + Eq +{ unsafe { encode(&record, &mut bytes).unwrap(); } { let (result, rest) = unsafe { decode::(&mut bytes[..]) }.unwrap(); @@ -49,52 +97,20 @@ fn _test_pass(record: T) { } } -fn _test_fail(record: T) { - let mut bytes = Vec::new(); - unsafe { encode(&record, &mut bytes).unwrap(); } - bytes.pop(); - assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); -} - -fn _test_size(record: T) { - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_fail to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_fail<'bytes, T>(mut bytes: &'bytes mut Vec, record: T) + where T: Abomonation<'bytes> + Debug + Eq +{ unsafe { encode(&record, &mut bytes).unwrap(); } - assert_eq!(bytes.len(), measure(&record)); -} - - -#[derive(Debug, Eq, PartialEq)] -struct MyStruct { - a: String, - b: u64, - c: Vec, -} - -unsafe_abomonate!(MyStruct : a, b, c); - -#[test] -fn test_macro() { - // create some test data out of abomonation-approved types - let record = MyStruct{ a: "test".to_owned(), b: 0, c: vec![0, 1, 2] }; - - // encode vector into a Vec - let mut bytes = Vec::new(); - unsafe { encode(&record, &mut bytes).unwrap(); } - - // decode a &Vec<(u64, String)> from binary data - if let Some((result, rest)) = unsafe { decode::(&mut bytes) } { - assert_eq!(result, &record); - assert_eq!(rest.len(), 0); + if bytes.pop().is_some() { + assert_eq!(unsafe { decode::(&mut bytes[..]) }, None); } } -#[test] -fn test_macro_size() { - // create some test data out of abomonation-approved types - let record = MyStruct{ a: "test".to_owned(), b: 0, c: vec![0, 1, 2] }; - - // encode vector into a Vec - let mut bytes = Vec::new(); +// FIXME: I could not find an API which allows _test_size to allocate a Vec +// internally without restricting the set of allowed Abomonation impls. +fn _test_size<'bytes, T: Abomonation<'bytes>>(mut bytes: &'bytes mut Vec, record: T) { unsafe { encode(&record, &mut bytes).unwrap(); } assert_eq!(bytes.len(), measure(&record)); } From 7a326fe4791064cab6ed2deebb9e728a3897a5de Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 21:57:46 +0200 Subject: [PATCH 22/34] Add support for this &[mut] T --- src/lib.rs | 74 +++++++++++++++++++++++++++++++++++++++++++------- tests/tests.rs | 4 +++ 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f78bec4..797de38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -605,7 +605,21 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { } } -unsafe impl Entomb for Box { +// NOTE: While it might be tempting to decouple 'de from the reference target +// and implement Exhume<'de> for &'target T, the two lifetimes actually +// have to be exactly the same. Here's proof : +// +// - Deserialization would produce an &'de &'target T. A reference is +// only valid if the target is longer-lived, so we need 'target: 'de. +// - The deserializer will silently patch &'target T into &'de T. This is +// only safe to do if &'de T : &'target T, so we also need 'de: 'target. +// +// If 'target must outlive 'de and 'de must outlive 'target, then the two +// lifetimes actually must be exactly the same. Which kind of makes sense: +// we start from 'de bytes, and we end up producing references that point +// into those same bytes. +// +unsafe impl<'de, T: Entomb> Entomb for &'de T { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; T::entomb(&**self, write) @@ -615,17 +629,45 @@ unsafe impl Entomb for Box { mem::size_of::() + T::extent(&**self) } } +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de T { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(target); + Some(rest) + } +} + +unsafe impl<'de, T: Entomb> Entomb for &'de mut T { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&T>::entomb(&&**self, write) + } + + fn extent(&self) -> usize { + <&T>::extent(&&**self) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut T { + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(target); + Some(rest) + } +} + +unsafe impl Entomb for Box { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&T>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&T>::extent(&self.as_ref()) + } +} unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { - let binary_len = mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let box_target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); - rest = T::exhume(box_target, rest)?; - self_.as_ptr().write(Box::from_raw(box_target.as_ptr())); - Some(rest) - } + let (target, rest) = exhume_ref(bytes)?; + self_.as_ptr().write(Box::from_raw(target as *mut _)); + Some(rest) } } @@ -660,6 +702,18 @@ unsafe fn exhume_slice<'de, T: Exhume<'de>>( Some(bytes) } +// Common subset of "exhume" for all &mut T-like types +unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de mut T, &'de mut [u8])> { + let binary_len = mem::size_of::(); + if binary_len > bytes.len() { None } + else { + let (mine, mut rest) = bytes.split_at_mut(binary_len); + let target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); + rest = T::exhume(target, rest)?; + Some((&mut *target.as_ptr(), rest)) + } +} + mod network { use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; diff --git a/tests/tests.rs b/tests/tests.rs index 6ffafce..8028b0b 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -82,6 +82,10 @@ gen_tests!{ PhantomNotAbo::default() => (test_phantom_notabo_pass, test_phantom_notabo_fail, test_phantom_notabo_size), + + Some(&42u64) => (test_ref_u64_pass, + test_ref_u64_fail, + test_ref_u64_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From f2fe84daa864b7ed60d1de8eaf198a6ca0d7461b Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 22:16:31 +0200 Subject: [PATCH 23/34] Add support for str and &[mut] str --- src/lib.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++------ tests/tests.rs | 4 ++++ 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 797de38..ae1343e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -403,6 +403,8 @@ unsafe impl Exhume<'_> for () {} unsafe impl Entomb for char {} unsafe impl Exhume<'_> for char {} +unsafe impl Entomb for str {} +unsafe impl Exhume<'_> for str {} unsafe impl Entomb for ::std::time::Duration {} unsafe impl Exhume<'_> for ::std::time::Duration {} @@ -553,7 +555,7 @@ array_abomonate!(30); array_abomonate!(31); array_abomonate!(32); -unsafe impl Entomb for String { +unsafe impl<'de> Entomb for &'de str { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(self.as_bytes()) } @@ -562,18 +564,57 @@ unsafe impl Entomb for String { self.len() } } +unsafe impl<'de> Exhume<'de> for &'de str { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &str to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on &str implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl<'de> Entomb for &'de mut str { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&str>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&str>::extent(&self.as_ref()) + } +} +unsafe impl<'de> Exhume<'de> for &'de mut str { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &mut str to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on &str implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl Entomb for String { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&str>::entomb(&self.as_ref(), write) + } + + fn extent(&self) -> usize { + <&str>::extent(&self.as_ref()) + } +} unsafe impl<'de> Exhume<'de> for String { #[inline] unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { // FIXME: This (briefly) constructs an &String to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on String implementation details. let self_len = self_.as_ref().len(); - if self_len > bytes.len() { None } - else { - let (mine, rest) = bytes.split_at_mut(self_len); - self_.as_ptr().write(String::from_raw_parts(mine.as_mut_ptr(), self_len, self_len)); - Some(rest) - } + let (s, rest) = exhume_str_ref(self_len, bytes)?; + self_.as_ptr().write(String::from_raw_parts(s.as_mut_ptr(), s.len(), s.len())); + Some(rest) } } @@ -714,6 +755,15 @@ unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de } } +// Common subset of "exhume" for all &str-like types +unsafe fn exhume_str_ref<'de>(length: usize, bytes: &'de mut [u8]) -> Option<(&'de mut str, &'de mut [u8])> { + if length > bytes.len() { None } + else { + let (mine, rest) = bytes.split_at_mut(length); + Some((std::str::from_utf8_unchecked_mut(mine), rest)) + } +} + mod network { use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; diff --git a/tests/tests.rs b/tests/tests.rs index 8028b0b..85e8708 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -86,6 +86,10 @@ gen_tests!{ Some(&42u64) => (test_ref_u64_pass, test_ref_u64_fail, test_ref_u64_size), + + &"grawwwwrr!" => (test_str_pass, + test_str_fail, + test_str_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From 1b7166bff5d8c6f206055c0fb19df5d253f7c6bf Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 3 Oct 2019 22:30:41 +0200 Subject: [PATCH 24/34] Add support for [T] and &[mut] [T] --- src/lib.rs | 112 +++++++++++++++++++++++++++++++++++++------------ tests/tests.rs | 4 ++ 2 files changed, 90 insertions(+), 26 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ae1343e..a9c3822 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -501,15 +501,34 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE); tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF); +unsafe impl Entomb for [T] { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + for element in self { T::entomb(element, write)?; } + Ok(()) + } + + fn extent(&self) -> usize { + self.iter().map(T::extent).sum() + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T] { + unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + // FIXME: This constructs an &[T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + exhume_slice(self_.as_ptr() as *mut T, self_len, bytes) + } +} + macro_rules! array_abomonate { ($size:expr) => ( unsafe impl Entomb for [T; $size] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - entomb_slice(&self[..], write) + <[T]>::entomb(&self[..], write) } fn extent(&self) -> usize { - slice_extent(&self[..]) + <[T]>::extent(&self[..]) } } @@ -618,14 +637,56 @@ unsafe impl<'de> Exhume<'de> for String { } } -unsafe impl Entomb for Vec { +unsafe impl<'de, T: Entomb> Entomb for &'de [T] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write.write_all(typed_to_bytes(&self[..]))?; - entomb_slice(&self[..], write) + <[T]>::entomb(&self[..], write) + } + + fn extent(&self) -> usize { + mem::size_of::() * self.len() + <[T]>::extent(&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de [T] { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &[T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl<'de, T: Entomb> Entomb for &'de mut [T] { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&[T]>::entomb(&&self[..], write) + } + + fn extent(&self) -> usize { + <&[T]>::extent(&&self[..]) + } +} +unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut [T] { + #[inline] + unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + // FIXME: This (briefly) constructs an &mut [T] to invalid data, which is UB. + // I'm not sure if this can be fully resolved without relying on slice implementation details. + let self_len = self_.as_ref().len(); + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(s); + Some(rest) + } +} + +unsafe impl Entomb for Vec { + unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + <&[T]>::entomb(&&self[..], write) } fn extent(&self) -> usize { - mem::size_of::() * self.len() + slice_extent(&self[..]) + <&[T]>::extent(&&self[..]) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { @@ -634,15 +695,9 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on Vec implementation details. let self_len = self_.as_ref().len(); - let binary_len = self_len * mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let first_ptr = mine.as_mut_ptr() as *mut T; - rest = exhume_slice(first_ptr, self_len, rest)?; - self_.as_ptr().write(Vec::from_raw_parts(first_ptr, self_len, self_len)); - Some(rest) - } + let (s, rest) = exhume_slice_ref(self_len, bytes)?; + self_.as_ptr().write(Vec::from_raw_parts(s.as_mut_ptr(), self_len, self_len)); + Some(rest) } } @@ -717,19 +772,8 @@ unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) } -// Common subset of "entomb" for all [T]-like types -unsafe fn entomb_slice(slice: &[T], write: &mut W) -> IOResult<()> { - for element in slice { T::entomb(element, write)?; } - Ok(()) -} - -// Common subset of "extent" for all [T]-like types -fn slice_extent(slice: &[T]) -> usize { - slice.iter().map(T::extent).sum() -} - // Common subset of "exhume" for all [T]-like types -// (I'd gladly take a NonNull<[T]>, but it is too difficult to build raw pointers to slices) +// (I'd gladly move this to [T]::exhume, but building a NonNull<[T]> is currently too difficult) #[inline] unsafe fn exhume_slice<'de, T: Exhume<'de>>( first_ptr: *mut T, @@ -743,6 +787,22 @@ unsafe fn exhume_slice<'de, T: Exhume<'de>>( Some(bytes) } +// Common subset of "exhume" for all &[T]-like types +#[inline] +unsafe fn exhume_slice_ref<'de, T: Exhume<'de>>( + length: usize, + bytes: &'de mut [u8] +) -> Option<(&'de mut [T], &'de mut [u8])> { + let binary_len = length * mem::size_of::(); + if binary_len > bytes.len() { None } + else { + let (mine, mut rest) = bytes.split_at_mut(binary_len); + let first_ptr = mine.as_mut_ptr() as *mut T; + rest = exhume_slice(first_ptr, length, rest)?; + Some((std::slice::from_raw_parts_mut(first_ptr, length).into(), rest)) + } +} + // Common subset of "exhume" for all &mut T-like types unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de mut T, &'de mut [u8])> { let binary_len = mem::size_of::(); diff --git a/tests/tests.rs b/tests/tests.rs index 85e8708..ad16bf3 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -90,6 +90,10 @@ gen_tests!{ &"grawwwwrr!" => (test_str_pass, test_str_fail, test_str_size), + + &[0, 1, 2] => (test_slice_pass, + test_slice_fail, + test_slice_size), } // FIXME: I could not find an API which allows _test_pass to allocate a Vec From 8ba2a086947a9e0d7e5867cdfc3dace78ff72735 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Fri, 11 Oct 2019 21:04:52 +0200 Subject: [PATCH 25/34] Add tests for Abomonated of reference --- tests/tests.rs | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/tests.rs b/tests/tests.rs index ad16bf3..26b43cb 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -157,3 +157,33 @@ fn test_net_types() { let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, socket_addr4); let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, socket_addr6); } + +#[test] +fn test_abomonated_owned() { + use abomonation::abomonated::Abomonated; + + let s_owned = "This is an owned string".to_owned(); + + let mut bytes = Vec::new(); + unsafe { encode::(&s_owned, &mut bytes).unwrap(); } + + let abo = unsafe { Abomonated::::new(bytes).unwrap() }; + assert_eq!(abo.as_ref(), &s_owned); + assert_eq!(&*abo, &s_owned); +} + +#[test] +fn test_abomonated_ref() { + use abomonation::abomonated::Abomonated; + + let s_owned = "This is an owned string".to_owned(); + let s_borrow = &s_owned[11..16]; // "owned" + + let mut bytes = Vec::new(); + unsafe { encode::<&str, _>(&s_borrow, &mut bytes).unwrap(); } + + let abo = unsafe { Abomonated::<&str, _>::new(bytes).unwrap() }; + assert_eq!(abo.as_ref(), &s_borrow); + // NOTE: Cannot use Deref here because &str contains a reference + // FIXME: Figure out a way to add a compile_fail test for this +} From f9c5ff724c6bb5f44eff4bf27b891ed3e5f91ea9 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Mon, 21 Oct 2019 18:18:37 +0200 Subject: [PATCH 26/34] Clarify and narrow down padding bytes UB --- src/lib.rs | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a9c3822..6f84506 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ //! } //! ``` -use std::mem; // yup, used pretty much everywhere. +use std::mem::{self, MaybeUninit}; use std::io::Write; // for bytes.write_all; push_all is unstable and extend is slow. use std::io::Result as IOResult; use std::marker::PhantomData; @@ -48,9 +48,16 @@ pub mod abomonated; /// /// # Safety /// -/// This method is unsafe because it is unsafe to transmute typed allocations to binary. -/// Furthermore, Rust currently indicates that it is undefined behavior to observe padding -/// bytes, which will happen when we `memmcpy` structs which contain padding bytes. +/// This method is unsafe because Rust currently specifies that it is undefined +/// behavior to construct an `&[u8]` to padding bytes, which will happen when we +/// write down binary data of a type T which contains padding bytes as we must +/// pass down an `&[u8]` to the `Write` API. +/// +/// Eliminating this UB will require changes to the Rust languages or `std` to +/// add either of 1/a non-UB way to turn padding bytes into `&[u8]` or 2/a way +/// to send an `&[MaybeUninit]` (which allows padding bytes) to a Write +/// implementation. See the following discussion thread for more info: +/// https://internals.rust-lang.org/t/writing-down-binary-data-with-padding-bytes/11197/ /// /// # Examples /// ``` @@ -73,8 +80,7 @@ pub mod abomonated; /// #[inline(always)] pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { - let slice = std::slice::from_raw_parts(mem::transmute(typed), mem::size_of::()); - write.write_all(slice)?; + write_bytes(&mut write, std::slice::from_ref(typed))?; T::entomb(typed, &mut write) } @@ -639,7 +645,7 @@ unsafe impl<'de> Exhume<'de> for String { unsafe impl<'de, T: Entomb> Entomb for &'de [T] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write.write_all(typed_to_bytes(&self[..]))?; + write_bytes(write, &self[..])?; <[T]>::entomb(&self[..], write) } @@ -717,7 +723,7 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { // unsafe impl<'de, T: Entomb> Entomb for &'de T { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write.write_all(typed_to_bytes(std::slice::from_ref(&**self)))?; + write_bytes(write, std::slice::from_ref(&**self))?; T::entomb(&**self, write) } @@ -767,9 +773,23 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { } } -// This method currently enables undefined behavior, by exposing padding bytes. -unsafe fn typed_to_bytes(slice: &[T]) -> &[u8] { - std::slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * mem::size_of::()) +// Copy typed data into a writer, currently UB if type T contains padding bytes. +unsafe fn write_bytes(write: &mut impl Write, slice: &[T]) -> IOResult<()> { + // This is the correct way to reinterpret typed data as bytes, it accounts + // for the fact that T may contain uninitialized padding bytes. + let bytes = std::slice::from_raw_parts( + slice.as_ptr() as *const MaybeUninit, + slice.len() * mem::size_of::() + ); + + // FIXME: Unfortunately, `Write::write_all()` expects initialized bytes. + // This transmute is undefined behavior if T contains padding bytes. + // + // To resolve this UB, we'd need either a "freeze" operation that + // turns uninitialized bytes into arbitrary initialized bytes, or an + // extra `Write` interface that accepts uninitialized bytes. + // + write.write_all(mem::transmute::<&[MaybeUninit], &[u8]>(bytes)) } // Common subset of "exhume" for all [T]-like types From 7a995f71ee5714de007a50832b8d428c1cfc1369 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 25 Oct 2019 18:10:46 +0200 Subject: [PATCH 27/34] Expose alignment of nontrivial abomonated types --- src/lib.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6f84506..18c6d01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -209,6 +209,22 @@ pub unsafe trait Entomb { /// Reports the number of further bytes required to entomb `self`. fn extent(&self) -> usize { 0 } + + /// Report the alignment of the complete Abomonation-serialized data + fn alignment() -> usize + where Self: Sized; // TODO: { mem::align_of::() } (once ecosystem is ready) + + /// Version of "alignment" that takes a &self parameter for use in + /// declarative macros. + /// + /// This is _not_ analogous to `mem::align_of_val` and is only intended for + /// the internal consumption of the deprecated `unsafe_abomonate` macro. + /// Please do not use this trait method in any other code. + /// + #[deprecated(note="For internal use of unsafe_abomonate only")] + fn alignment_from_self_ref(&self) -> usize + where Self: Sized + { Self::alignment() } } /// Types which can be deserialized from `&'de mut [u8]` to `&'de T` by abomonation @@ -292,6 +308,16 @@ macro_rules! unsafe_abomonate { $( size += $crate::Entomb::extent(&self.$field); )* size } + + #[allow(deprecated)] + fn alignment() -> usize { + // This is ugly, but I can't think about a better way to do this + // in a declarative macro-based code generator... + let bad_ref: &Self = unsafe { &*::std::ptr::NonNull::dangling().as_ptr() }; + let mut align = ::std::mem::align_of::(); + $( align = align.max(bad_ref.$field.alignment_from_self_ref()); )* + align + } } unsafe impl<'de> $crate::Exhume<'de> for $t { @@ -326,6 +352,13 @@ macro_rules! tuple_abomonate { $( size += $ty::extent($ty); )* size } + + #[allow(non_snake_case)] + fn alignment() -> usize { + let mut align = mem::align_of::(); + $( align = align.max($ty::alignment()); )* + align + } } unsafe impl<'de, $($ty: Exhume<'de>),*> Exhume<'de> for ($($ty,)*) { @@ -429,6 +462,10 @@ unsafe impl Entomb for Option { fn extent(&self) -> usize { self.as_ref().map(T::extent).unwrap_or(0) } + + fn alignment() -> usize { + mem::align_of::().max(T::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Option { unsafe fn exhume(self_: NonNull, mut bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { @@ -456,6 +493,10 @@ unsafe impl Entomb for Result { &Err(ref inner) => E::extent(inner), } } + + fn alignment() -> usize { + mem::align_of::().max(T::alignment()).max(E::alignment()) + } } unsafe impl<'de, T: Exhume<'de>, E: Exhume<'de>> Exhume<'de> for Result { unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { @@ -516,6 +557,10 @@ unsafe impl Entomb for [T] { fn extent(&self) -> usize { self.iter().map(T::extent).sum() } + + fn alignment() -> usize { + <[T; 1]>::alignment() + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T] { unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { @@ -536,6 +581,10 @@ macro_rules! array_abomonate { fn extent(&self) -> usize { <[T]>::extent(&self[..]) } + + fn alignment() -> usize { + mem::align_of::().max(T::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T; $size] { @@ -588,6 +637,10 @@ unsafe impl<'de> Entomb for &'de str { fn extent(&self) -> usize { self.len() } + + fn alignment() -> usize { + mem::align_of::().max(<[u8; 1]>::alignment()) + } } unsafe impl<'de> Exhume<'de> for &'de str { #[inline] @@ -609,6 +662,10 @@ unsafe impl<'de> Entomb for &'de mut str { fn extent(&self) -> usize { <&str>::extent(&self.as_ref()) } + + fn alignment() -> usize { + <&str>::alignment() + } } unsafe impl<'de> Exhume<'de> for &'de mut str { #[inline] @@ -630,6 +687,10 @@ unsafe impl Entomb for String { fn extent(&self) -> usize { <&str>::extent(&self.as_ref()) } + + fn alignment() -> usize { + mem::align_of::().max(<[u8; 1]>::alignment()) + } } unsafe impl<'de> Exhume<'de> for String { #[inline] @@ -643,7 +704,7 @@ unsafe impl<'de> Exhume<'de> for String { } } -unsafe impl<'de, T: Entomb> Entomb for &'de [T] { +unsafe impl<'de, T: Entomb + 'de> Entomb for &'de [T] { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write_bytes(write, &self[..])?; <[T]>::entomb(&self[..], write) @@ -652,6 +713,10 @@ unsafe impl<'de, T: Entomb> Entomb for &'de [T] { fn extent(&self) -> usize { mem::size_of::() * self.len() + <[T]>::extent(&self[..]) } + + fn alignment() -> usize { + mem::align_of::().max(<[T; 1]>::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de [T] { #[inline] @@ -673,6 +738,10 @@ unsafe impl<'de, T: Entomb> Entomb for &'de mut [T] { fn extent(&self) -> usize { <&[T]>::extent(&&self[..]) } + + fn alignment() -> usize { + <&[T]>::alignment() + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut [T] { #[inline] @@ -694,6 +763,10 @@ unsafe impl Entomb for Vec { fn extent(&self) -> usize { <&[T]>::extent(&&self[..]) } + + fn alignment() -> usize { + mem::align_of::().max(<[T; 1]>::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { #[inline] @@ -721,7 +794,7 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { // we start from 'de bytes, and we end up producing references that point // into those same bytes. // -unsafe impl<'de, T: Entomb> Entomb for &'de T { +unsafe impl<'de, T: Entomb + 'de> Entomb for &'de T { unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { write_bytes(write, std::slice::from_ref(&**self))?; T::entomb(&**self, write) @@ -730,6 +803,10 @@ unsafe impl<'de, T: Entomb> Entomb for &'de T { fn extent(&self) -> usize { mem::size_of::() + T::extent(&**self) } + + fn alignment() -> usize { + mem::align_of::().max(T::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de T { unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { @@ -747,6 +824,10 @@ unsafe impl<'de, T: Entomb> Entomb for &'de mut T { fn extent(&self) -> usize { <&T>::extent(&&**self) } + + fn alignment() -> usize { + <&T>::alignment() + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut T { unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { @@ -764,6 +845,10 @@ unsafe impl Entomb for Box { fn extent(&self) -> usize { <&T>::extent(&self.as_ref()) } + + fn alignment() -> usize { + mem::align_of::().max(T::alignment()) + } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { From 956715ee0374dea10b7f53384bd1d914cfdaf251 Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 25 Oct 2019 18:11:15 +0200 Subject: [PATCH 28/34] Expose alignment of trivial abomonated types (TODO: Remove once ecosystem ready) --- src/lib.rs | 76 +++++++++++++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 18c6d01..7719e66 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -378,77 +378,77 @@ macro_rules! tuple_abomonate { ); } -unsafe impl Entomb for u8 {} +unsafe impl Entomb for u8 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for u8 {} -unsafe impl Entomb for u16 {} +unsafe impl Entomb for u16 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for u16 {} -unsafe impl Entomb for u32 {} +unsafe impl Entomb for u32 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for u32 {} -unsafe impl Entomb for u64 {} +unsafe impl Entomb for u64 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for u64 {} -unsafe impl Entomb for u128 {} +unsafe impl Entomb for u128 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for u128 {} -unsafe impl Entomb for usize {} +unsafe impl Entomb for usize { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for usize {} -unsafe impl Entomb for i8 {} +unsafe impl Entomb for i8 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for i8 {} -unsafe impl Entomb for i16 {} +unsafe impl Entomb for i16 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for i16 {} -unsafe impl Entomb for i32 {} +unsafe impl Entomb for i32 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for i32 {} -unsafe impl Entomb for i64 {} +unsafe impl Entomb for i64 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for i64 {} -unsafe impl Entomb for i128 {} +unsafe impl Entomb for i128 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for i128 {} -unsafe impl Entomb for isize {} +unsafe impl Entomb for isize { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for isize {} -unsafe impl Entomb for NonZeroU8 {} +unsafe impl Entomb for NonZeroU8 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroU8 {} -unsafe impl Entomb for NonZeroU16 {} +unsafe impl Entomb for NonZeroU16 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroU16 {} -unsafe impl Entomb for NonZeroU32 {} +unsafe impl Entomb for NonZeroU32 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroU32 {} -unsafe impl Entomb for NonZeroU64 {} +unsafe impl Entomb for NonZeroU64 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroU64 {} -unsafe impl Entomb for NonZeroU128 {} +unsafe impl Entomb for NonZeroU128 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroU128 {} -unsafe impl Entomb for NonZeroUsize {} +unsafe impl Entomb for NonZeroUsize { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroUsize {} -unsafe impl Entomb for NonZeroI8 {} +unsafe impl Entomb for NonZeroI8 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroI8 {} -unsafe impl Entomb for NonZeroI16 {} +unsafe impl Entomb for NonZeroI16 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroI16 {} -unsafe impl Entomb for NonZeroI32 {} +unsafe impl Entomb for NonZeroI32 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroI32 {} -unsafe impl Entomb for NonZeroI64 {} +unsafe impl Entomb for NonZeroI64 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroI64 {} -unsafe impl Entomb for NonZeroI128 {} +unsafe impl Entomb for NonZeroI128 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroI128 {} -unsafe impl Entomb for NonZeroIsize {} +unsafe impl Entomb for NonZeroIsize { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for NonZeroIsize {} -unsafe impl Entomb for f32 {} +unsafe impl Entomb for f32 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for f32 {} -unsafe impl Entomb for f64 {} +unsafe impl Entomb for f64 { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for f64 {} -unsafe impl Entomb for bool {} +unsafe impl Entomb for bool { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for bool {} -unsafe impl Entomb for () {} +unsafe impl Entomb for () { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for () {} -unsafe impl Entomb for char {} +unsafe impl Entomb for char { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for char {} -unsafe impl Entomb for str {} +unsafe impl Entomb for str { fn alignment() -> usize { mem::align_of::<[u8; 1]>() } } unsafe impl Exhume<'_> for str {} -unsafe impl Entomb for ::std::time::Duration {} +unsafe impl Entomb for ::std::time::Duration { fn alignment() -> usize { mem::align_of::() } } unsafe impl Exhume<'_> for ::std::time::Duration {} -unsafe impl Entomb for PhantomData {} +unsafe impl Entomb for PhantomData { fn alignment() -> usize { mem::align_of::() } } unsafe impl<'de, T: 'de> Exhume<'de> for PhantomData {} unsafe impl Entomb for Option { @@ -933,17 +933,17 @@ mod network { use super::{Entomb, Exhume}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6, IpAddr, Ipv4Addr, Ipv6Addr}; - unsafe impl Entomb for IpAddr {} + unsafe impl Entomb for IpAddr { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for IpAddr {} - unsafe impl Entomb for Ipv4Addr {} + unsafe impl Entomb for Ipv4Addr { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for Ipv4Addr {} - unsafe impl Entomb for Ipv6Addr {} + unsafe impl Entomb for Ipv6Addr { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for Ipv6Addr {} - unsafe impl Entomb for SocketAddr {} + unsafe impl Entomb for SocketAddr { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for SocketAddr {} - unsafe impl Entomb for SocketAddrV4 {} + unsafe impl Entomb for SocketAddrV4 { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for SocketAddrV4 {} - unsafe impl Entomb for SocketAddrV6 {} + unsafe impl Entomb for SocketAddrV6 { fn alignment() -> usize { std::mem::align_of::() } } unsafe impl Exhume<'_> for SocketAddrV6 {} } From 15cc150d9184e099b13f8478c36163b5a9ba0c8b Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Wed, 23 Oct 2019 17:05:53 +0200 Subject: [PATCH 29/34] Add infrastructure for well-aligned binary I/O --- src/align/io.rs | 290 +++++++++++++++++++++++++++++++++++++++++++++++ src/align/mod.rs | 6 + src/lib.rs | 1 + 3 files changed, 297 insertions(+) create mode 100644 src/align/io.rs create mode 100644 src/align/mod.rs diff --git a/src/align/io.rs b/src/align/io.rs new file mode 100644 index 0000000..ea95737 --- /dev/null +++ b/src/align/io.rs @@ -0,0 +1,290 @@ +/// Tools for reading and writing abomonated data in an alignment-aware way +/// +/// In order to enable UB-free in-place deserialization, abomonated objects +/// follow Rust's normal memory alignment rules. This requires inserting padding +/// bytes between serialized data and skipping them on readout. This module +/// provides tools to take care of this. + +use std::{ + io::Write, + mem, + ptr::NonNull, +}; + + +/// Alignment-aware binary data writer +/// +/// This wrapper around a standard Rust writer allows writing multiple binary +/// objects in a sequence with a memory layout that is suitable for in-place +/// readout. It does so by inserting padding bytes between the objects as if +/// they were members of a well-aligned C-style struct whose alignment is the +/// maximum of the alignment of all written objects. +pub struct AlignedWriter { + /// Inner writer to which data is eventually dispatched + inner: W, + + /// Amount of data that was sent to the inner writer so far + written_so_far: usize, + + /// Expected alignment of the output data + #[cfg(debug_assertions)] + output_alignment: usize, +} + +impl AlignedWriter { + /// Prepare a writer for alignment-aware binary writes + /// + /// In debug builds, `AlignedWriter` will check that the output memory + /// allocation is sufficiently well-aligned for the data that is written + /// into it, as per the `output_alignment` parameter to this function. + // + // FIXME: output_alignment should be #[cfg(debug_assertions)], but at the + // moment Rust 1.39 is a bit too freshly released to rely on that. + #[allow(unused)] + pub fn new(inner: W, output_alignment: usize) -> Self { + Self { + inner, + written_so_far: 0, + #[cfg(debug_assertions)] output_alignment, + } + } + + /// Write arbitrary binary data into the inner writer + /// + /// This is unsafe because Rust does not yet provide an UB-free way to + /// expose the padding bytes of arbitrary T objects to writers. + pub unsafe fn write_slice(&mut self, data: &[T]) -> crate::IOResult<()> { + // Check how aligned the binary data needs to be + let alignment = mem::align_of_val::<[T]>(data); + + // In debug builds, check that the output allocation has sufficiently + // strong alignment for the data that's being written to it. + // + // If the output alignment is too low, readout may go wrong because the + // AlignedReader will skip a number of padding bytes that may not be + // in sync with the amount that AlignedWriter has inserted, in a manner + // that depends on how the data being read out was _actually_ aligned. + debug_assert!( + if cfg!(debug_assertions) { alignment <= self.output_alignment } else { true }, + "Insufficient output alignment (output alignment is {}, got data of alignment {})", + self.output_alignment, alignment + ); + + // Inject padding bytes until the output is well-aligned, assuming that + // the first byte that was written was well-aligned for all output data. + while self.written_so_far % alignment != 0 { + self.inner.write_all(&[0u8])?; + self.written_so_far += 1; + } + + // Write down the binary data and exit + // FIXME: Move write_bytes functionality here + crate::write_bytes(&mut self.inner, data)?; + self.written_so_far += mem::size_of_val::<[T]>(data); + Ok(()) + } + + /// Convenience function for non-slice data + /// + /// This is unsafe for the same reason that `write_slice` is. + pub unsafe fn write(&mut self, data: &T) -> crate::IOResult<()> { + self.write_slice(std::slice::from_ref(data)) + } + + /// Query how much data was written so far + pub fn written_so_far(&self) -> usize { + self.written_so_far + } +} + +impl Write for AlignedWriter { + fn write(&mut self, buf: &[u8]) -> crate::IOResult { + // This will write buf.len() data because bytes are always well-aligned + // It is safe because &[u8] has no padding bytes + unsafe { self.write_slice(buf)? }; + Ok(buf.len()) + } + + fn flush(&mut self) -> crate::IOResult<()> { + // No flushing necessary, we don't buffer anything + Ok(()) + } +} + + +/// Slice-of-bytes reader for data written by `AlignedWriter` +/// +/// This reader takes as input a bunch of bytes that were written by +/// `AlignedWriter` and allows fetching back the corresponding binary data under +/// the assumption that the input bytes are aligned on the max of the alignment +/// of all the data that was written by `AlignedWriter`. +pub struct AlignedReader<'bytes> { + /// Remaining bytes to be read + bytes: &'bytes mut [u8], + + /// Expected alignment of the input data + #[cfg(debug_assertions)] + input_alignment: usize, +} + +impl<'bytes> AlignedReader<'bytes> { + /// Prepare some bytes for alignment-aware readout + /// + /// In debug builds, `AlignedReader` will check that the input bytes were + /// sufficiently well-aligned for the data that is being read from it, as + /// per the `input_alignment` parameter to this function. + // + // FIXME: input_alignment should be #[cfg(debug_assertions)], but at the + // moment Rust 1.39 is a bit too freshly released to rely on that. + #[allow(unused)] + pub fn new(bytes: &'bytes mut [u8], input_alignment: usize) -> Self { + debug_assert_eq!((bytes.as_ptr() as usize) % input_alignment, 0, + "Input data is not aligned on a {}-byte boundary as expected", + input_alignment); + Self { + bytes, + #[cfg(debug_assertions)] input_alignment, + } + } + + /// Read a slice of data of arbitrary type from the inner bytes, returns a + /// pointer to the first element of the slice, or None if the request + /// overflows the input bytes. + // + // FIXME: This should return a NonNull<[T]>, but pointers to slices are not + // ergonomic enough at this point in time. + pub fn read_slice(&mut self, len: usize) -> Option> { + // As far as I know, zero-length slices may be aligned differently but + // all nonzero-length slices are aligned identically + let alignment = if len == 0 { + mem::align_of::<[T; 0]>() + } else { + mem::align_of::<[T; 1]>() + }; + + // In debug builds, check that the input allocation has sufficiently + // strong alignment for the data that's being read from it. + // + // If the input alignment is too low, readout may go wrong because the + // AlignedReader will skip a number of padding bytes that may not be + // in sync with the amount that AlignedWriter has inserted, in a manner + // that depends on how the data being read out was _actually_ aligned. + debug_assert!( + if cfg!(debug_assertions) { alignment <= self.input_alignment } else { true }, + "Insufficient input alignment (input alignment is {}, asked for data of alignment {})", + self.input_alignment, alignment + ); + + // Drop the alignment padding bytes leading up to the inner T-typed data + let misalignment = self.bytes.as_ptr() as usize % alignment; + if misalignment != 0 { + let offset = alignment - misalignment; + if offset > self.bytes.len() { return None; } + // In an ideal world, one could just write: + // self.bytes = &mut self.bytes[offset..] + // Alas, in this world, we need... + self.bytes = unsafe { + mem::transmute::<&mut [u8], &'bytes mut [u8]>(&mut self.bytes[offset..]) + }; + } + + // Make sure that we sill have enough bytes for readout + let size = mem::size_of::() * len; + if size > self.bytes.len() { return None; } + + // Extract the inner T-typed data + // This is safe because we checked that the input size is large enough + // and the first pointer of a slice cannot be null + let (out, rest) = self.bytes.split_at_mut(size); + let result: NonNull = unsafe { + NonNull::new_unchecked(out.as_mut_ptr() as *mut T) + }; + + // Update the inner slice. In an ideal world, one could just write + // self.bytes = rest + // Alas, in this world, we need... + self.bytes = unsafe { + mem::transmute::<&mut [u8], &'bytes mut [u8]>(rest) + }; + Some(result) + } + + /// Read arbitrary data from the inner bytes + pub fn read(&mut self) -> Option> { + self.read_slice(1) + } + + /// Extract the remaining bytes + pub fn remaining(self) -> &'bytes mut [u8] { + self.bytes + } +} + + +#[cfg(test)] +mod tests { + use crate::align::AlignedBytes; + use super::{AlignedReader, AlignedWriter}; + + #[test] + fn round_trip() { + // We'll write the following binary data down + let u1 = 0x42u8; + let u2 = 0x12345678_9abcdef0_u64; + let u3s = [0x13579bdf_u32, 0x2468ace0_u32]; + type UMax = u64; + let max_align = std::mem::align_of::(); + + // Build a writer for it + let mut bytes = Vec::new(); + let mut writer = AlignedWriter::new(&mut bytes, max_align); + + // Write it down + unsafe { + writer.write(&u1).unwrap(); + writer.write(&u2).unwrap(); + writer.write_slice(&u3s[..]).unwrap(); + } + + // Check written bytes counter + let written = writer.written_so_far(); + std::mem::drop(writer); + assert_eq!(written, bytes.len(), + "Number of reported written bytes is wrong"); + assert_eq!(written, 1 + 7 + 8 + 4 + 4, + "Reported written bytes does not match written data"); + + // Check written data + assert_eq!(bytes[0], u1, + "8-bit number was written wrong"); + assert_eq!(bytes[1..8], [0, 0, 0, 0, 0, 0, 0], + "Padding for 64-bit number was written wrong"); + assert_eq!(bytes[8..16], u2.to_ne_bytes(), + "64-bit number was written wrong"); + assert_eq!(bytes[16..20], u3s[0].to_ne_bytes(), + "First 32-bit number was written wrong"); + assert_eq!(bytes[20..24], u3s[1].to_ne_bytes(), + "Second 32-bit number was written wrong"); + + // Prepare to read back the data + let mut aligned_bytes = AlignedBytes::::new(&mut bytes[..]); + let mut reader = AlignedReader::new(&mut aligned_bytes, max_align); + + // Read back the data + unsafe { + assert_eq!(reader.read::().unwrap().as_ref(), &u1, + "8-bit number was read wrong"); + assert_eq!(reader.read::().unwrap().as_ref(), &u2, + "64-bit number was read wrong"); + let slice_ptr = reader.read_slice::(u3s.len()).unwrap(); + let slice = std::slice::from_raw_parts(slice_ptr.as_ptr(), + u3s.len()); + assert_eq!(slice, &u3s, + "32-bit numbers were read wrong"); + } + + // Make sure that we consumed all the bytes + assert_eq!(reader.remaining(), &[], + "No bytes should remain"); + } +} diff --git a/src/align/mod.rs b/src/align/mod.rs new file mode 100644 index 0000000..7aa85b4 --- /dev/null +++ b/src/align/mod.rs @@ -0,0 +1,6 @@ +/// Utilities for handling alignment in abomonated data + +mod io; + +#[deprecated(note = "Made pub for internal unsafe_abomonate use only")] +pub use self::io::{AlignedReader, AlignedWriter}; diff --git a/src/lib.rs b/src/lib.rs index 7719e66..434e338 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ use std::num::*; use std::ptr::NonNull; pub mod abomonated; +pub mod align; /// Encodes a typed reference into a binary buffer. /// From 1051a4773139680a390a7adc146255482d3a66af Mon Sep 17 00:00:00 2001 From: Hadrien Grasland Date: Fri, 25 Oct 2019 18:22:04 +0200 Subject: [PATCH 30/34] Use aligned reads and writes in the abomonation implementation --- src/align/io.rs | 30 +++-- src/lib.rs | 325 +++++++++++++++++------------------------------- 2 files changed, 137 insertions(+), 218 deletions(-) diff --git a/src/align/io.rs b/src/align/io.rs index ea95737..1f4fa8a 100644 --- a/src/align/io.rs +++ b/src/align/io.rs @@ -7,7 +7,7 @@ use std::{ io::Write, - mem, + mem::{self, MaybeUninit}, ptr::NonNull, }; @@ -77,9 +77,27 @@ impl AlignedWriter { self.written_so_far += 1; } - // Write down the binary data and exit - // FIXME: Move write_bytes functionality here - crate::write_bytes(&mut self.inner, data)?; + // This is the correct way to reinterpret typed data as bytes, it + // accounts for the fact that T may contain padding bytes. + let bytes = std::slice::from_raw_parts( + data.as_ptr() as *const MaybeUninit, + data.len() * mem::size_of::() + ); + + // FIXME: Unfortunately, `Write::write_all()` expects initialized + // bytes. This transmute is undefined behavior if T contains + // uninitialized padding bytes. + // + // To resolve this UB, we'd need either a "freeze" operation + // that turns uninitialized bytes into arbitrary initialized + // bytes, or a `Write` interface that accepts uninit bytes. + // + // See this Rust internals forum topic for more discussion: + // https://internals.rust-lang.org/t/writing-down-binary-data-with-padding-bytes/11197/ + // + self.inner.write_all(mem::transmute::<&[MaybeUninit], &[u8]>(bytes))?; + + // Keep track of the amount of emitted data and exit self.written_so_far += mem::size_of_val::<[T]>(data); Ok(()) } @@ -223,7 +241,6 @@ impl<'bytes> AlignedReader<'bytes> { #[cfg(test)] mod tests { - use crate::align::AlignedBytes; use super::{AlignedReader, AlignedWriter}; #[test] @@ -267,8 +284,7 @@ mod tests { "Second 32-bit number was written wrong"); // Prepare to read back the data - let mut aligned_bytes = AlignedBytes::::new(&mut bytes[..]); - let mut reader = AlignedReader::new(&mut aligned_bytes, max_align); + let mut reader = AlignedReader::new(&mut bytes, max_align); // Read back the data unsafe { diff --git a/src/lib.rs b/src/lib.rs index 434e338..d77cb60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ //! } //! ``` -use std::mem::{self, MaybeUninit}; +use std::mem; use std::io::Write; // for bytes.write_all; push_all is unstable and extend is slow. use std::io::Result as IOResult; use std::marker::PhantomData; @@ -45,6 +45,9 @@ use std::ptr::NonNull; pub mod abomonated; pub mod align; +#[deprecated(note = "Made pub for internal unsafe_abomonate use only")] +pub use align::{AlignedReader, AlignedWriter}; + /// Encodes a typed reference into a binary buffer. /// /// # Safety @@ -80,9 +83,10 @@ pub mod align; /// ``` /// #[inline(always)] -pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<()> { - write_bytes(&mut write, std::slice::from_ref(typed))?; - T::entomb(typed, &mut write) +pub unsafe fn encode(typed: &T, write: W) -> IOResult<()> { + let mut writer = AlignedWriter::new(write, T::alignment()); + writer.write::(typed)?; + T::entomb(typed, &mut writer) } /// Decodes a mutable binary slice into an immutable typed reference. @@ -94,19 +98,26 @@ pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<( /// /// # Safety /// -/// The `decode` method is unsafe due to a number of unchecked invariants. +/// ## Data validity +/// +/// `decode()` does not check that the input bytes uphold T's validity +/// invariants. Decoding arbitrary `&[u8]` data can result in invalid +/// utf8 strings, enums with invalid discriminants, null references, etc. and +/// some forms of broken invariants are undefined behavior in Rust. /// -/// Decoding arbitrary `&[u8]` data can -/// result in invalid utf8 strings, enums with invalid discriminants, etc. `decode` *does* -/// perform bounds checks, as part of determining if enough data are present to completely decode, -/// and while it should only write within the bounds of its `&mut [u8]` argument, the use of invalid -/// utf8 and enums are undefined behavior. +/// `decode` *does* perform bounds checks, as part of determining if enough data +/// are present to completely decode, so it should only read and write within +/// the bounds of its `&mut [u8]` argument. But that only prevents UB for +/// truncated data, not arbitrary invalid data. /// -/// Please do not decode data that was not encoded by the corresponding implementation. +/// Therefore, please do not decode data that was not encoded by the +/// corresponding encode() implementation. /// -/// In addition, `decode` does not ensure that the bytes representing types will be correctly aligned. -/// On several platforms unaligned reads are undefined behavior, but on several other platforms they -/// are only a performance penalty. +/// ## Alignment +/// +/// `decode()` assumes that the input bytes follow the alignment requirements of +/// abomonated data of type T, which you can check with `T::alignment()`. +/// Failure to meet this requirement will result in undefined behavior. /// /// # Examples /// ``` @@ -129,17 +140,9 @@ pub unsafe fn encode(typed: &T, mut write: W) -> IOResult<( pub unsafe fn decode<'bytes, T>(bytes: &'bytes mut [u8]) -> Option<(&'bytes T, &'bytes mut [u8])> where T: Exhume<'bytes> { - if bytes.len() < mem::size_of::() { None } - else { - let (split1, split2) = bytes.split_at_mut(mem::size_of::()); - let result: NonNull = mem::transmute(split1.get_unchecked_mut(0)); - if let Some(remaining) = T::exhume(result, split2) { - Some((&*result.as_ptr(), remaining)) - } - else { - None - } - } + let mut reader = AlignedReader::new(bytes, T::alignment()); + let result_ptr = reader.read::()?; + Some((T::exhume(result_ptr, &mut reader)?, reader.remaining())) } /// Reports the number of bytes required to encode `self`. @@ -148,7 +151,9 @@ pub unsafe fn decode<'bytes, T>(bytes: &'bytes mut [u8]) -> Option<(&'bytes T, & /// /// The `measure` method is safe. It neither produces nor consults serialized representations. pub fn measure(typed: &T) -> usize { - mem::size_of::() + T::extent(&typed) + let mut aligned_sink = AlignedWriter::new(std::io::sink(), T::alignment()); + unsafe { encode(typed, &mut aligned_sink).expect("Sink should be infaillible"); } + aligned_sink.written_so_far() } /// Abomonation provides methods to serialize any heap data the implementor owns. @@ -206,10 +211,7 @@ pub unsafe trait Entomb { /// /// Most commonly this is owned data on the other end of pointers in `&self`. The return value /// reports any failures in writing to `write`. - unsafe fn entomb(&self, _write: &mut W) -> IOResult<()> { Ok(()) } - - /// Reports the number of further bytes required to entomb `self`. - fn extent(&self) -> usize { 0 } + unsafe fn entomb(&self, _write: &mut AlignedWriter) -> IOResult<()> { Ok(()) } /// Report the alignment of the complete Abomonation-serialized data fn alignment() -> usize @@ -246,7 +248,7 @@ pub unsafe trait Exhume<'de> : Entomb + 'de { /// `exhume` using raw pointer operations as much as feasible. // // FIXME: Replace self_ with self once Rust has arbitrary self types - unsafe fn exhume(_self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { Some(bytes) } + unsafe fn exhume(self_: NonNull, _reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { Some(&mut *self_.as_ptr()) } } /// The `unsafe_abomonate!` macro takes a type name with an optional list of fields, and implements @@ -299,17 +301,12 @@ macro_rules! unsafe_abomonate { }; ($t:ty : $($field:ident),*) => { unsafe impl $crate::Entomb for $t { - unsafe fn entomb(&self, write: &mut W) -> ::std::io::Result<()> { + #[allow(deprecated)] + unsafe fn entomb(&self, write: &mut $crate::AlignedWriter) -> ::std::io::Result<()> { $( $crate::Entomb::entomb(&self.$field, write)?; )* Ok(()) } - fn extent(&self) -> usize { - let mut size = 0; - $( size += $crate::Entomb::extent(&self.$field); )* - size - } - #[allow(deprecated)] fn alignment() -> usize { // This is ugly, but I can't think about a better way to do this @@ -322,14 +319,15 @@ macro_rules! unsafe_abomonate { } unsafe impl<'de> $crate::Exhume<'de> for $t { - unsafe fn exhume(self_: ::std::ptr::NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + #[allow(deprecated)] + unsafe fn exhume(self_: ::std::ptr::NonNull, reader: &mut $crate::AlignedReader<'de>) -> Option<&'de mut Self> { $( // FIXME: This (briefly) constructs an &mut _ to invalid data, which is UB. // The proposed &raw mut operator would allow avoiding this. let field_ptr: ::std::ptr::NonNull<_> = From::from(&mut (*self_.as_ptr()).$field); - bytes = $crate::Exhume::exhume(field_ptr, bytes)?; + $crate::Exhume::exhume(field_ptr, reader)?; )* - Some(bytes) + Some(&mut *self_.as_ptr()) } } }; @@ -340,20 +338,12 @@ macro_rules! tuple_abomonate { ( $($ty:ident)+) => ( unsafe impl<$($ty: Entomb),*> Entomb for ($($ty,)*) { #[allow(non_snake_case)] - unsafe fn entomb(&self, write: &mut WRITE) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { let ($(ref $ty,)*) = *self; $( $ty::entomb($ty, write)?; )* Ok(()) } - #[allow(non_snake_case)] - fn extent(&self) -> usize { - let mut size = 0; - let ($(ref $ty,)*) = *self; - $( size += $ty::extent($ty); )* - size - } - #[allow(non_snake_case)] fn alignment() -> usize { let mut align = mem::align_of::(); @@ -364,16 +354,16 @@ macro_rules! tuple_abomonate { unsafe impl<'de, $($ty: Exhume<'de>),*> Exhume<'de> for ($($ty,)*) { #[allow(non_snake_case)] - unsafe fn exhume(self_: NonNull, mut bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I think avoiding this would require a cleaner way to iterate over tuple fields. // One possibility would be a C++11-style combination of variadic generics and recursion. let ($(ref mut $ty,)*) = *self_.as_ptr(); $( let field_ptr : NonNull<$ty> = From::from($ty); - bytes = $ty::exhume(field_ptr, bytes)?; + $ty::exhume(field_ptr, reader)?; )* - Some(bytes) + Some(&mut *self_.as_ptr()) } } ); @@ -453,66 +443,56 @@ unsafe impl Entomb for PhantomData { fn alignment() -> usize { mem::align_ unsafe impl<'de, T: 'de> Exhume<'de> for PhantomData {} unsafe impl Entomb for Option { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { if let &Some(ref inner) = self { T::entomb(inner, write)?; } Ok(()) } - fn extent(&self) -> usize { - self.as_ref().map(T::extent).unwrap_or(0) - } - fn alignment() -> usize { mem::align_of::().max(T::alignment()) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Option { - unsafe fn exhume(self_: NonNull, mut bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. if let Some(ref mut inner) = *self_.as_ptr() { let inner_ptr : NonNull = From::from(inner); - bytes = T::exhume(inner_ptr, bytes)?; + T::exhume(inner_ptr, reader)?; } - Some(bytes) + Some(&mut *self_.as_ptr()) } } unsafe impl Entomb for Result { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { match self { &Ok(ref inner) => T::entomb(inner, write), &Err(ref inner) => E::entomb(inner, write), } } - fn extent(&self) -> usize { - match self { - &Ok(ref inner) => T::extent(inner), - &Err(ref inner) => E::extent(inner), - } - } - fn alignment() -> usize { mem::align_of::().max(T::alignment()).max(E::alignment()) } } unsafe impl<'de, T: Exhume<'de>, E: Exhume<'de>> Exhume<'de> for Result { - unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs a "ref mut" to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on enum implementation details. match *self_.as_ptr() { Ok(ref mut inner) => { let inner_ptr : NonNull = From::from(inner); - T::exhume(inner_ptr, bytes) + T::exhume(inner_ptr, reader)?; } Err(ref mut inner) => { let inner_ptr : NonNull = From::from(inner); - E::exhume(inner_ptr, bytes) + E::exhume(inner_ptr, reader)?; } } + Some(&mut *self_.as_ptr()) } } @@ -550,47 +530,40 @@ tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD tuple_abomonate!(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF); unsafe impl Entomb for [T] { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { for element in self { T::entomb(element, write)?; } Ok(()) } - fn extent(&self) -> usize { - self.iter().map(T::extent).sum() - } - fn alignment() -> usize { <[T; 1]>::alignment() } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T] { - unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This constructs an &[T] to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on slice implementation details. let self_len = self_.as_ref().len(); - exhume_slice(self_.as_ptr() as *mut T, self_len, bytes) + exhume_slice(self_.as_ptr() as *mut T, self_len, reader) } } macro_rules! array_abomonate { ($size:expr) => ( unsafe impl Entomb for [T; $size] { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <[T]>::entomb(&self[..], write) } - fn extent(&self) -> usize { - <[T]>::extent(&self[..]) - } - fn alignment() -> usize { mem::align_of::().max(T::alignment()) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for [T; $size] { - unsafe fn exhume(self_: NonNull, bytes: &'de mut[u8]) -> Option<&'de mut [u8]> { - exhume_slice(self_.as_ptr() as *mut T, $size, bytes) + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { + exhume_slice(self_.as_ptr() as *mut T, $size, reader)?; + Some(&mut *self_.as_ptr()) } } ) @@ -631,12 +604,8 @@ array_abomonate!(31); array_abomonate!(32); unsafe impl<'de> Entomb for &'de str { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write.write_all(self.as_bytes()) - } - - fn extent(&self) -> usize { - self.len() + unsafe fn entomb(&self, writer: &mut AlignedWriter) -> IOResult<()> { + writer.write_slice(self.as_bytes()) } fn alignment() -> usize { @@ -645,74 +614,62 @@ unsafe impl<'de> Entomb for &'de str { } unsafe impl<'de> Exhume<'de> for &'de str { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &str to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on &str implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_str_ref(self_len, bytes)?; + let s = exhume_str_ref(self_len, reader)?; self_.as_ptr().write(s); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl<'de> Entomb for &'de mut str { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&str>::entomb(&self.as_ref(), write) } - fn extent(&self) -> usize { - <&str>::extent(&self.as_ref()) - } - fn alignment() -> usize { <&str>::alignment() } } unsafe impl<'de> Exhume<'de> for &'de mut str { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &mut str to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on &str implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_str_ref(self_len, bytes)?; + let s = exhume_str_ref(self_len, reader)?; self_.as_ptr().write(s); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl Entomb for String { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&str>::entomb(&self.as_ref(), write) } - fn extent(&self) -> usize { - <&str>::extent(&self.as_ref()) - } - fn alignment() -> usize { mem::align_of::().max(<[u8; 1]>::alignment()) } } unsafe impl<'de> Exhume<'de> for String { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &String to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on String implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_str_ref(self_len, bytes)?; + let s = exhume_str_ref(self_len, reader)?; self_.as_ptr().write(String::from_raw_parts(s.as_mut_ptr(), s.len(), s.len())); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl<'de, T: Entomb + 'de> Entomb for &'de [T] { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write_bytes(write, &self[..])?; - <[T]>::entomb(&self[..], write) - } - - fn extent(&self) -> usize { - mem::size_of::() * self.len() + <[T]>::extent(&self[..]) + unsafe fn entomb(&self, writer: &mut AlignedWriter) -> IOResult<()> { + writer.write_slice::(&self[..])?; + <[T]>::entomb(&self[..], writer) } fn alignment() -> usize { @@ -721,63 +678,55 @@ unsafe impl<'de, T: Entomb + 'de> Entomb for &'de [T] { } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de [T] { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &[T] to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on slice implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_slice_ref(self_len, bytes)?; + let s = exhume_slice_ref(self_len, bytes)?; self_.as_ptr().write(s); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl<'de, T: Entomb> Entomb for &'de mut [T] { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&[T]>::entomb(&&self[..], write) } - fn extent(&self) -> usize { - <&[T]>::extent(&&self[..]) - } - fn alignment() -> usize { <&[T]>::alignment() } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut [T] { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &mut [T] to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on slice implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_slice_ref(self_len, bytes)?; + let s = exhume_slice_ref(self_len, bytes)?; self_.as_ptr().write(s); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl Entomb for Vec { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&[T]>::entomb(&&self[..], write) } - fn extent(&self) -> usize { - <&[T]>::extent(&&self[..]) - } - fn alignment() -> usize { mem::align_of::().max(<[T; 1]>::alignment()) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { #[inline] - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { + unsafe fn exhume(self_: NonNull, bytes: &mut AlignedReader<'de>) -> Option<&'de mut Self> { // FIXME: This (briefly) constructs an &Vec to invalid data, which is UB. // I'm not sure if this can be fully resolved without relying on Vec implementation details. let self_len = self_.as_ref().len(); - let (s, rest) = exhume_slice_ref(self_len, bytes)?; + let s = exhume_slice_ref(self_len, bytes)?; self_.as_ptr().write(Vec::from_raw_parts(s.as_mut_ptr(), self_len, self_len)); - Some(rest) + Some(&mut *self_.as_ptr()) } } @@ -796,13 +745,9 @@ unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Vec { // into those same bytes. // unsafe impl<'de, T: Entomb + 'de> Entomb for &'de T { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { - write_bytes(write, std::slice::from_ref(&**self))?; - T::entomb(&**self, write) - } - - fn extent(&self) -> usize { - mem::size_of::() + T::extent(&**self) + unsafe fn entomb(&self, writer: &mut AlignedWriter) -> IOResult<()> { + writer.write::(&**self)?; + T::entomb(&**self, writer) } fn alignment() -> usize { @@ -810,124 +755,82 @@ unsafe impl<'de, T: Entomb + 'de> Entomb for &'de T { } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de T { - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { - let (target, rest) = exhume_ref(bytes)?; + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { + let target = exhume_ref(reader)?; self_.as_ptr().write(target); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl<'de, T: Entomb> Entomb for &'de mut T { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&T>::entomb(&&**self, write) } - fn extent(&self) -> usize { - <&T>::extent(&&**self) - } - fn alignment() -> usize { <&T>::alignment() } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for &'de mut T { - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { - let (target, rest) = exhume_ref(bytes)?; + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { + let target = exhume_ref(reader)?; self_.as_ptr().write(target); - Some(rest) + Some(&mut *self_.as_ptr()) } } unsafe impl Entomb for Box { - unsafe fn entomb(&self, write: &mut W) -> IOResult<()> { + unsafe fn entomb(&self, write: &mut AlignedWriter) -> IOResult<()> { <&T>::entomb(&self.as_ref(), write) } - fn extent(&self) -> usize { - <&T>::extent(&self.as_ref()) - } - fn alignment() -> usize { mem::align_of::().max(T::alignment()) } } unsafe impl<'de, T: Exhume<'de>> Exhume<'de> for Box { - unsafe fn exhume(self_: NonNull, bytes: &'de mut [u8]) -> Option<&'de mut [u8]> { - let (target, rest) = exhume_ref(bytes)?; + unsafe fn exhume(self_: NonNull, reader: &mut AlignedReader<'de>) -> Option<&'de mut Self> { + let target = exhume_ref(reader)?; self_.as_ptr().write(Box::from_raw(target as *mut _)); - Some(rest) + Some(&mut *self_.as_ptr()) } } -// Copy typed data into a writer, currently UB if type T contains padding bytes. -unsafe fn write_bytes(write: &mut impl Write, slice: &[T]) -> IOResult<()> { - // This is the correct way to reinterpret typed data as bytes, it accounts - // for the fact that T may contain uninitialized padding bytes. - let bytes = std::slice::from_raw_parts( - slice.as_ptr() as *const MaybeUninit, - slice.len() * mem::size_of::() - ); - - // FIXME: Unfortunately, `Write::write_all()` expects initialized bytes. - // This transmute is undefined behavior if T contains padding bytes. - // - // To resolve this UB, we'd need either a "freeze" operation that - // turns uninitialized bytes into arbitrary initialized bytes, or an - // extra `Write` interface that accepts uninitialized bytes. - // - write.write_all(mem::transmute::<&[MaybeUninit], &[u8]>(bytes)) -} - // Common subset of "exhume" for all [T]-like types // (I'd gladly move this to [T]::exhume, but building a NonNull<[T]> is currently too difficult) #[inline] unsafe fn exhume_slice<'de, T: Exhume<'de>>( first_ptr: *mut T, length: usize, - mut bytes: &'de mut [u8] -) -> Option<&'de mut [u8]> { + reader: &mut AlignedReader<'de> +) -> Option<&'de mut [T]> { for i in 0..length { let element_ptr: NonNull = NonNull::new_unchecked(first_ptr.add(i)); - bytes = T::exhume(element_ptr, bytes)?; + T::exhume(element_ptr, reader)?; } - Some(bytes) + Some(std::slice::from_raw_parts_mut(first_ptr, length)) } // Common subset of "exhume" for all &[T]-like types #[inline] unsafe fn exhume_slice_ref<'de, T: Exhume<'de>>( length: usize, - bytes: &'de mut [u8] -) -> Option<(&'de mut [T], &'de mut [u8])> { - let binary_len = length * mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let first_ptr = mine.as_mut_ptr() as *mut T; - rest = exhume_slice(first_ptr, length, rest)?; - Some((std::slice::from_raw_parts_mut(first_ptr, length).into(), rest)) - } + reader: &mut AlignedReader<'de> +) -> Option<&'de mut [T]> { + let first_ptr = reader.read_slice::(length)?.as_ptr(); + exhume_slice(first_ptr, length, reader) } // Common subset of "exhume" for all &mut T-like types -unsafe fn exhume_ref<'de, T: Exhume<'de>>(bytes: &'de mut [u8]) -> Option<(&'de mut T, &'de mut [u8])> { - let binary_len = mem::size_of::(); - if binary_len > bytes.len() { None } - else { - let (mine, mut rest) = bytes.split_at_mut(binary_len); - let target : NonNull = NonNull::new_unchecked(mine.as_mut_ptr() as *mut T); - rest = T::exhume(target, rest)?; - Some((&mut *target.as_ptr(), rest)) - } +unsafe fn exhume_ref<'de, T: Exhume<'de>>(reader: &mut AlignedReader<'de>) -> Option<&'de mut T> { + let target = reader.read::()?; + T::exhume(target, reader) } // Common subset of "exhume" for all &str-like types -unsafe fn exhume_str_ref<'de>(length: usize, bytes: &'de mut [u8]) -> Option<(&'de mut str, &'de mut [u8])> { - if length > bytes.len() { None } - else { - let (mine, rest) = bytes.split_at_mut(length); - Some((std::str::from_utf8_unchecked_mut(mine), rest)) - } +unsafe fn exhume_str_ref<'de>(length: usize, reader: &mut AlignedReader<'de>) -> Option<&'de mut str> { + let bytes = exhume_slice_ref::(length, reader)?; + Some(std::str::from_utf8_unchecked_mut(bytes)) } mod network { From d8abfcffc46bd3c2e7421f5d7e7b246535280360 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Fri, 8 Nov 2019 17:55:37 +0100 Subject: [PATCH 31/34] Provide abstractions for properly aligning abomonated bytes --- src/align/alloc.rs | 259 +++++++++++++++++++++++++++++++++++++++++++++ src/align/mod.rs | 3 + src/lib.rs | 4 + tests/tests.rs | 19 +++- 4 files changed, 281 insertions(+), 4 deletions(-) create mode 100644 src/align/alloc.rs diff --git a/src/align/alloc.rs b/src/align/alloc.rs new file mode 100644 index 0000000..ec2487b --- /dev/null +++ b/src/align/alloc.rs @@ -0,0 +1,259 @@ +/// Tools for storing abomonated objects with correct alignment +/// +/// Use of `decode::()` requires that the input bytes are aligned on a +/// `T::alignment()` boundary, or else undefined behavior will ensue. +/// +/// This module provides tools for ensuring this alignment constraint on input +/// bytes of unknown or known-incorrect alignment before calling `decode()`. + +use crate::{ + Entomb, + Exhume, +}; + +use std::{ + alloc::{self, Layout}, + marker::PhantomData, + ops::{Deref, DerefMut}, + ptr::NonNull, +}; + + +/// Overaligned `Box<[u8]>` for abomonated objects of type T +/// +/// Compared with a regular `Box<[u8]>`, this heap-allocated bag of bytes also +/// ensures that the heap allocation is aligned on `T::alignment()`, and thus +/// suitable for use as input to `decode::()`. +pub struct Coffin(NonNull<[u8]>, PhantomData); + +impl Coffin { + /// Copy abomonated bytes into a suitably aligned heap allocation + /// + /// May abort the computation if memory is exhausted or the system allocator + /// is not able to satisfy the size or alignment requirements. + pub fn new(bytes: &[u8]) -> Self { + // Perform the memory allocation using the system allocator. This is + // safe because all safety preconditions are checked by Self::layout(). + let size = bytes.len(); + let layout = Self::layout(size); + let ptr = unsafe { alloc::alloc(layout) }; + + // Abort on memory allocation errors the recommended way. Since the + // system allocator may abort, no point in not aborting ourselves... + if ptr.is_null() { alloc::handle_alloc_error(layout); } + + // Transfer the input bytes on our new allocation. This is safe as... + // - `bytes.as_ptr()` has to be valid for `size` by slice construction + // - `ptr` is non-null and must point to a memory region of `size` bytes + // - Pointers are always byte-aligned, so alignment is irrelevant. + // - Heap allocations may not overlap with existing objects. + unsafe { ptr.copy_from_nonoverlapping(bytes.as_ptr(), size); } + + // Produce the output slice. The transmute is safe as... + // - We don't care about lifetimes as we want a NonNull in the end + // - As discussed above, `ptr` is non-null and well-aligned. + // - The bytes of the slice have been initialized above + Self(unsafe { std::slice::from_raw_parts_mut(ptr, size) }.into(), + PhantomData) + } + + /// Compute the proper layout for a coffin allocation, checking the safety + /// preconditions of the system memory allocator along the way. + /// + /// We handle errors via panics because they all emerge from edge cases that + /// should only be encountered by users actively trying to break this code. + fn layout(size: usize) -> Layout { + // Basic sanity check for debug builds + debug_assert!(size >= std::mem::size_of::(), + "Requested size is quite obviously not big enough"); + + // We're going to use the system allocator, so we cannot accept + // zero-sized slices of bytes. + assert!(size > 0, "Allocation size must be positive"); + + // At this point, the only layout errors that remain are those caused by + // a bad Abomonation::alignment implementation (alignment is zero or not + // a power of 2) or by a huge input size (close to usize::MAX). + Layout::from_size_align(size, T::alignment()) + .expect("Bad Abomonation::alignment() impl or excessive size") + } +} + +impl Deref for Coffin { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + // This is safe as... + // - The target allocation is live until the Coffin will be dropped. + // - Normal borrow-checking rules apply and prevent the user from + // aliasing or retaining the output reference in an invalid way. + // + // ...but see the Drop documentation for a possible edge case :( + unsafe { self.0.as_ref() } + } +} + +impl DerefMut for Coffin { + fn deref_mut(&mut self) -> &mut Self::Target { + // This is safe for the same reason that Deref is. + unsafe { self.0.as_mut() } + } +} + +impl Drop for Coffin { + fn drop(&mut self) { + // In principle, this should be safe for the same reason that DerefMut + // is, however there is a wrinkle for all of those... + // + // If we want any form of Deref to be safe, the Rust compiler must + // prevent LLVM from inserting memory reads from the slice after + // deallocation, and currently it doesn't. + // + // There is no clear reason why LLVM would do this, though, and `std` + // encounters the same problem everywhere, so we'll take the risk... + // + // FIXME: Once the Rust team has figured out the right way to handle + // this, use it here if it requires manual action. + // + // Here's one ongoing discussion of this topic for reference: + // https://github.com/rust-lang/rust/issues/55005 + let slice = unsafe { self.0.as_mut() }; + + // This is safe because... + // - Every Coffin is always created with its own allocation, only Drop + // can liberate it, and Drop will only be called once. + // - Layout is computed in the same way as in `Coffin::new()`, and the + // size of the target slice is the same as that of new's input bytes. + unsafe { alloc::dealloc(slice.as_mut_ptr(), + Self::layout(slice.len())); } + } +} + + +/// `Cow`-style abstraction for aligning abomonated bytes before `decode()` +/// +/// Often, one needs to decode input bytes which are _probably_ well-aligned, +/// but may not always to be. For example, POSIX memory allocations are aligned +/// on 16-byte boundaries, which is sufficient for most types... as long as +/// multiple abomonated objects are not stored in a sequence without padding +/// bytes in between. +/// +/// In those circumstances, pessimistically using `Coffin` all the time +/// would cause unnecessarily intensive use of the system memory allocator. +/// Instead, it is better to check if the input bytes are well-aligned and only +/// reallocate them if necessary, which is what this abstraction does. +pub enum AlignedBytes<'bytes, T: Exhume<'bytes>> { + /// The orignal bytes were sufficiently well-aligned + Borrowed(&'bytes mut [u8]), + + /// The abomonated bytes were relocated into a well-aligned heap location + Owned(Coffin), +} + +impl<'bytes, T: Exhume<'bytes>> AlignedBytes<'bytes, T> { + /// Prepare possibly misaligned bytes for decoding + pub fn new(bytes: &'bytes mut [u8]) -> Self { + let misalignment = (bytes.as_ptr() as usize) % T::alignment(); + if misalignment == 0 { + Self::Borrowed(bytes) + } else { + Self::Owned(Coffin::new(bytes)) + } + } +} + +impl<'bytes, T: Exhume<'bytes>> From<&'bytes mut [u8]> for AlignedBytes<'bytes, T> { + fn from(bytes: &'bytes mut [u8]) -> Self { + Self::new(bytes) + } +} + +impl<'bytes, T: Exhume<'bytes>> From> for AlignedBytes<'bytes, T> { + fn from(coffin: Coffin) -> Self { + Self::Owned(coffin) + } +} + +impl<'bytes, T: Exhume<'bytes>> Deref for AlignedBytes<'bytes, T> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(o) => o, + } + } +} + +impl<'bytes, T: Exhume<'bytes>> DerefMut for AlignedBytes<'bytes, T> { + fn deref_mut(&mut self) -> &mut [u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(o) => o, + } + } +} + + +#[cfg(test)] +mod tests { + use super::{AlignedBytes, Coffin, Entomb, Exhume}; + + #[test] + fn coffin() { + check_coffin::(); + check_coffin::(); + check_coffin::(); + check_coffin::(); + check_coffin::(); + } + + fn check_coffin() { + let bytes = make_test_bytes_for::(); + let coffin = Coffin::::new(&bytes[..]); + assert_eq!(&coffin[..], &bytes[..], + "Coffin data is incorrect"); + assert_eq!(coffin.as_ptr() as usize % T::alignment(), 0, + "Coffin alignment is not strong enough"); + } + + #[test] + fn aligned_bytes() { + check_aligned_bytes::(); + check_aligned_bytes::(); + check_aligned_bytes::(); + check_aligned_bytes::(); + } + + fn check_aligned_bytes() + where for<'a> T: Exhume<'a> + { + assert!(std::mem::align_of::() > 1, + "This test requires generating misaligned data"); + + let mut bytes = make_test_bytes_for::(); + let mut coffin = Coffin::::new(&bytes[..]); + let aligned_bytes = AlignedBytes::::new(&mut coffin[..]); + match aligned_bytes { + AlignedBytes::Borrowed(_) => {} + AlignedBytes::Owned(_) => panic!("Should not allocate here"), + } + assert_eq!(&aligned_bytes[..], &bytes[..]); + + bytes.push(42); + let mut coffin = Coffin::::new(&bytes[..]); + let aligned_bytes = AlignedBytes::::new(&mut coffin[1..]); + match aligned_bytes { + AlignedBytes::Borrowed(_) => panic!("Should allocate here"), + AlignedBytes::Owned(_) => {}, + } + assert_eq!(&aligned_bytes[..], &bytes[1..]); + } + + fn make_test_bytes_for() -> Vec { + let mut i = 0; + std::iter::repeat_with(|| { i += 1; i }) + .take(std::mem::size_of::()) + .collect::>() + } +} diff --git a/src/align/mod.rs b/src/align/mod.rs index 7aa85b4..eff9c4f 100644 --- a/src/align/mod.rs +++ b/src/align/mod.rs @@ -1,6 +1,9 @@ /// Utilities for handling alignment in abomonated data mod io; +mod alloc; #[deprecated(note = "Made pub for internal unsafe_abomonate use only")] pub use self::io::{AlignedReader, AlignedWriter}; + +pub use self::alloc::{AlignedBytes, Coffin}; diff --git a/src/lib.rs b/src/lib.rs index d77cb60..613b258 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,6 +119,10 @@ pub unsafe fn encode(typed: &T, write: W) -> IOResult<()> { /// abomonated data of type T, which you can check with `T::alignment()`. /// Failure to meet this requirement will result in undefined behavior. /// +/// If you are not able to guarantee sufficient alignment from your data source, you may find the +/// `align::AlignedBytes` utility useful. It checks if your data is well-aligned, and moves it +/// into a well-aligned heap allocation otherwise. +/// /// # Examples /// ``` /// use abomonation::{encode, decode}; diff --git a/tests/tests.rs b/tests/tests.rs index 26b43cb..0fec0e4 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,7 @@ extern crate abomonation; use abomonation::*; +use abomonation::align::AlignedBytes; use std::fmt::Debug; // Test struct for the unsafe_abomonate macro @@ -135,10 +136,20 @@ fn test_multiple_encode_decode() { unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); } unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, 0); - let (t, r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, 7); - let (t, r) = unsafe { decode::>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned()); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); + assert_eq!(*t, 0); + + let mut r = AlignedBytes::::new(r); + let (t, r) = unsafe { decode::(&mut r) }.unwrap(); + assert_eq!(*t, 7); + + let mut r = AlignedBytes::>::new(r); + let (t, r) = unsafe { decode::>(&mut r) }.unwrap(); + assert_eq!(*t, vec![1,2,3]); + + let mut r = AlignedBytes::::new(r); + let (t, _r) = unsafe { decode::(&mut r) }.unwrap(); + assert_eq!(*t, "grawwwwrr".to_owned()); } #[test] From a53205981d272f2b57d1bf0cb5ea65b1533cb6cc Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Tue, 12 Nov 2019 15:39:49 +0100 Subject: [PATCH 32/34] Add support for zero-sized types to Coffin --- src/align/alloc.rs | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/src/align/alloc.rs b/src/align/alloc.rs index ec2487b..dcf974c 100644 --- a/src/align/alloc.rs +++ b/src/align/alloc.rs @@ -32,10 +32,24 @@ impl Coffin { /// May abort the computation if memory is exhausted or the system allocator /// is not able to satisfy the size or alignment requirements. pub fn new(bytes: &[u8]) -> Self { - // Perform the memory allocation using the system allocator. This is - // safe because all safety preconditions are checked by Self::layout(). + // Compute the memory layout of the target memory allocation let size = bytes.len(); let layout = Self::layout(size); + + // Handle zero-sized types just like Box does + if layout.size() == 0 { + return Self ( + unsafe { std::slice::from_raw_parts_mut( + NonNull::dangling().as_ptr(), + 0, + ) }.into(), + PhantomData, + ) + } + + // Perform the memory allocation using the system allocator. This is + // safe because all safety preconditions are checked by Self::layout(), + // except for zero-sized allocations which we checked above. let ptr = unsafe { alloc::alloc(layout) }; // Abort on memory allocation errors the recommended way. Since the @@ -57,8 +71,9 @@ impl Coffin { PhantomData) } - /// Compute the proper layout for a coffin allocation, checking the safety - /// preconditions of the system memory allocator along the way. + /// Compute the proper layout for a coffin allocation, checking most safety + /// preconditions of the system memory allocator along the way **except for + /// the "no zero-sized allocation" requirement**. /// /// We handle errors via panics because they all emerge from edge cases that /// should only be encountered by users actively trying to break this code. @@ -67,10 +82,6 @@ impl Coffin { debug_assert!(size >= std::mem::size_of::(), "Requested size is quite obviously not big enough"); - // We're going to use the system allocator, so we cannot accept - // zero-sized slices of bytes. - assert!(size > 0, "Allocation size must be positive"); - // At this point, the only layout errors that remain are those caused by // a bad Abomonation::alignment implementation (alignment is zero or not // a power of 2) or by a huge input size (close to usize::MAX). @@ -119,7 +130,12 @@ impl Drop for Coffin { // https://github.com/rust-lang/rust/issues/55005 let slice = unsafe { self.0.as_mut() }; - // This is safe because... + // There is no allocation to deallocate for zero-sized types. + if slice.len() == 0 { + return; + } + + // Deallocate the memory otherwise. This is safe because... // - Every Coffin is always created with its own allocation, only Drop // can liberate it, and Drop will only be called once. // - Layout is computed in the same way as in `Coffin::new()`, and the From e424281e55b03a35a65dff4a472650352c06035d Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Tue, 12 Nov 2019 16:16:05 +0100 Subject: [PATCH 33/34] Add a default implementation of alignment() --- src/lib.rs | 65 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 613b258..e5a6485 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,7 +219,8 @@ pub unsafe trait Entomb { /// Report the alignment of the complete Abomonation-serialized data fn alignment() -> usize - where Self: Sized; // TODO: { mem::align_of::() } (once ecosystem is ready) + where Self: Sized + { mem::align_of::() } /// Version of "alignment" that takes a &self parameter for use in /// declarative macros. @@ -373,77 +374,77 @@ macro_rules! tuple_abomonate { ); } -unsafe impl Entomb for u8 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for u8 {} unsafe impl Exhume<'_> for u8 {} -unsafe impl Entomb for u16 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for u16 {} unsafe impl Exhume<'_> for u16 {} -unsafe impl Entomb for u32 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for u32 {} unsafe impl Exhume<'_> for u32 {} -unsafe impl Entomb for u64 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for u64 {} unsafe impl Exhume<'_> for u64 {} -unsafe impl Entomb for u128 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for u128 {} unsafe impl Exhume<'_> for u128 {} -unsafe impl Entomb for usize { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for usize {} unsafe impl Exhume<'_> for usize {} -unsafe impl Entomb for i8 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for i8 {} unsafe impl Exhume<'_> for i8 {} -unsafe impl Entomb for i16 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for i16 {} unsafe impl Exhume<'_> for i16 {} -unsafe impl Entomb for i32 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for i32 {} unsafe impl Exhume<'_> for i32 {} -unsafe impl Entomb for i64 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for i64 {} unsafe impl Exhume<'_> for i64 {} -unsafe impl Entomb for i128 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for i128 {} unsafe impl Exhume<'_> for i128 {} -unsafe impl Entomb for isize { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for isize {} unsafe impl Exhume<'_> for isize {} -unsafe impl Entomb for NonZeroU8 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroU8 {} unsafe impl Exhume<'_> for NonZeroU8 {} -unsafe impl Entomb for NonZeroU16 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroU16 {} unsafe impl Exhume<'_> for NonZeroU16 {} -unsafe impl Entomb for NonZeroU32 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroU32 {} unsafe impl Exhume<'_> for NonZeroU32 {} -unsafe impl Entomb for NonZeroU64 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroU64 {} unsafe impl Exhume<'_> for NonZeroU64 {} -unsafe impl Entomb for NonZeroU128 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroU128 {} unsafe impl Exhume<'_> for NonZeroU128 {} -unsafe impl Entomb for NonZeroUsize { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroUsize {} unsafe impl Exhume<'_> for NonZeroUsize {} -unsafe impl Entomb for NonZeroI8 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroI8 {} unsafe impl Exhume<'_> for NonZeroI8 {} -unsafe impl Entomb for NonZeroI16 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroI16 {} unsafe impl Exhume<'_> for NonZeroI16 {} -unsafe impl Entomb for NonZeroI32 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroI32 {} unsafe impl Exhume<'_> for NonZeroI32 {} -unsafe impl Entomb for NonZeroI64 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroI64 {} unsafe impl Exhume<'_> for NonZeroI64 {} -unsafe impl Entomb for NonZeroI128 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroI128 {} unsafe impl Exhume<'_> for NonZeroI128 {} -unsafe impl Entomb for NonZeroIsize { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for NonZeroIsize {} unsafe impl Exhume<'_> for NonZeroIsize {} -unsafe impl Entomb for f32 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for f32 {} unsafe impl Exhume<'_> for f32 {} -unsafe impl Entomb for f64 { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for f64 {} unsafe impl Exhume<'_> for f64 {} -unsafe impl Entomb for bool { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for bool {} unsafe impl Exhume<'_> for bool {} -unsafe impl Entomb for () { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for () {} unsafe impl Exhume<'_> for () {} -unsafe impl Entomb for char { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for char {} unsafe impl Exhume<'_> for char {} unsafe impl Entomb for str { fn alignment() -> usize { mem::align_of::<[u8; 1]>() } } unsafe impl Exhume<'_> for str {} -unsafe impl Entomb for ::std::time::Duration { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for ::std::time::Duration {} unsafe impl Exhume<'_> for ::std::time::Duration {} -unsafe impl Entomb for PhantomData { fn alignment() -> usize { mem::align_of::() } } +unsafe impl Entomb for PhantomData {} unsafe impl<'de, T: 'de> Exhume<'de> for PhantomData {} unsafe impl Entomb for Option { From b3787b61d52592c905951b18effa8fcb163c3d09 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 14 Nov 2019 22:07:53 +0100 Subject: [PATCH 34/34] Whoops, wrong cfg trick --- src/align/io.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/align/io.rs b/src/align/io.rs index 1f4fa8a..b729b78 100644 --- a/src/align/io.rs +++ b/src/align/io.rs @@ -64,8 +64,9 @@ impl AlignedWriter { // AlignedReader will skip a number of padding bytes that may not be // in sync with the amount that AlignedWriter has inserted, in a manner // that depends on how the data being read out was _actually_ aligned. + #[cfg(debug_assertions)] debug_assert!( - if cfg!(debug_assertions) { alignment <= self.output_alignment } else { true }, + alignment <= self.output_alignment, "Insufficient output alignment (output alignment is {}, got data of alignment {})", self.output_alignment, alignment ); @@ -187,8 +188,9 @@ impl<'bytes> AlignedReader<'bytes> { // AlignedReader will skip a number of padding bytes that may not be // in sync with the amount that AlignedWriter has inserted, in a manner // that depends on how the data being read out was _actually_ aligned. + #[cfg(debug_assertions)] debug_assert!( - if cfg!(debug_assertions) { alignment <= self.input_alignment } else { true }, + alignment <= self.input_alignment, "Insufficient input alignment (input alignment is {}, asked for data of alignment {})", self.input_alignment, alignment );