Skip to content

Commit

Permalink
v0.1.11: AsRef<str> and optimized single instance Vec<u8>
Browse files Browse the repository at this point in the history
Converting `Vec`/`String` back from shared external versions is now a zero copy operation. Ie `Vec::from(ByteData::from([0u8; 64].to_vec()))`.
  • Loading branch information
TimLuq committed Nov 10, 2024
1 parent 81208b9 commit 3028c46
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 20 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "bytedata"
version = "0.1.10"
version = "0.1.11"
edition = "2021"
rust-version = "1.75"
description = "Representation of a byte slice that is either static, borrowed, or shared."
Expand Down
73 changes: 68 additions & 5 deletions src/bytedata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,17 @@ impl<'a> ByteData<'a> {
#[inline]
#[must_use]
pub const fn from_static(dat: &'static [u8]) -> Self {
Self {
slice: ByteSlice::new(dat, true),
if dat.len() <= crate::byte_chunk::ByteChunk::LEN {
Self {
chunk: WrappedChunk {
kind: KIND_CHUNK_MASK,
data: crate::byte_chunk::ByteChunk::from_slice(dat),
},
}
} else {
Self {
slice: ByteSlice::new(dat, true),
}
}
}

Expand Down Expand Up @@ -286,6 +295,13 @@ impl<'a> ByteData<'a> {
Self {
chunk: empty_chunk(),
}
} else if dat.len() <= crate::byte_chunk::ByteChunk::LEN {
Self {
chunk: WrappedChunk {
kind: KIND_CHUNK_MASK,
data: crate::byte_chunk::ByteChunk::from_slice(dat),
},
}
} else {
Self {
slice: ByteSlice::new(dat, false),
Expand All @@ -305,7 +321,7 @@ impl<'a> ByteData<'a> {
}

#[cfg(feature = "alloc")]
/// Creates a `ByteData` from a `Vec<u8>`.
/// Creates a `ByteData` from a `Vec<u8>` using zero copy.
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[inline]
#[must_use]
Expand All @@ -328,7 +344,7 @@ impl<'a> ByteData<'a> {
/// Creates a `ByteData` from an externally kept byte sequence.
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
#[inline]
pub fn from_external<E: crate::external::ExternalBytes>(dat: E) -> Self {
pub fn from_external<E: crate::external::IntoExternalBytes>(dat: E) -> Self {
crate::external::ExtBytes::create(dat)
}

Expand Down Expand Up @@ -869,6 +885,7 @@ impl<'a> From<&'a [u8]> for ByteData<'a> {
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl From<SharedBytes> for ByteData<'_> {
#[inline]
fn from(dat: SharedBytes) -> Self {
Expand All @@ -881,14 +898,60 @@ impl From<SharedBytes> for ByteData<'_> {
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl From<Vec<u8>> for ByteData<'_> {
#[inline]
fn from(dat: Vec<u8>) -> Self {
Self::from_shared(dat.into())
let len = dat.len();
if len <= crate::byte_chunk::ByteChunk::LEN {
Self::from_chunk_slice(&dat)
} else if len < 32 {
Self::from_shared(dat.into())
} else {
Self::from_external(dat)
}
}
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl<'a> From<ByteData<'a>> for Vec<u8> {
#[allow(clippy::missing_inline_in_public_items)]
fn from(dat: ByteData<'a>) -> Self {
if !matches!(dat.kind(), Kind::External) {
return dat.as_slice().to_vec();
}
// SAFETY: External state has been checked.
let dat = unsafe { core::mem::transmute::<ByteData<'a>, crate::external::ExtBytes>(dat) };

let res = dat.take_inner::<Self, Self, _>(|inner| {
let (off, len) = inner.with_slice_ref(|vec, slic| {
// SAFETY: the slice should be a valid subslice.
let offset = unsafe { slic.as_ptr().byte_offset_from(vec.as_slice().as_ptr()) };
#[allow(clippy::cast_sign_loss)]
let offset = offset as usize;
let len = slic.len();
debug_assert!(offset <= vec.len(), "ByteData::into_vec: offset out of bounds");
debug_assert!(offset + len <= vec.len(), "ByteData::into_vec: len out of bounds");
(offset, len)
});
let inner = inner.into_inner();
inner.truncate(len + off);
let mut inner = core::mem::take(inner);
if off != 0 {
core::mem::drop(inner.drain(0..off));
}
inner
});
match res {
Ok(ok) => ok,
Err(err) => err.as_slice().to_vec(),
}
}
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl From<alloc::string::String> for ByteData<'_> {
#[inline]
fn from(dat: alloc::string::String) -> Self {
Expand Down
114 changes: 100 additions & 14 deletions src/external.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ impl<T: Sized> ExternalOps<T> {
}
}

/// A trait for types that can be used as external byte data.
pub trait IntoExternalBytes: Sized {
/// The external byte data type.
type External: ExternalBytes + From<Self>;
}

impl<T: ExternalBytes> IntoExternalBytes for T {
type External = T;
}

impl IntoExternalBytes for alloc::string::String {
type External = alloc::vec::Vec<u8>;
}

/// A trait for types that can be used as external byte data.
pub trait ExternalBytes: core::any::Any + Sync + Sized + 'static {
/// The operations that can be performed on this type.
Expand All @@ -41,10 +55,6 @@ impl ExternalBytes for alloc::vec::Vec<u8> {
const OPS: ExternalOps<Self> = ExternalOps::new(Self::as_slice);
}

impl ExternalBytes for alloc::string::String {
const OPS: ExternalOps<Self> = ExternalOps::new(Self::as_bytes);
}

impl ExternalBytes for alloc::sync::Arc<str> {
const OPS: ExternalOps<Self> = ExternalOps::new(|x| x.as_bytes());
}
Expand Down Expand Up @@ -80,6 +90,21 @@ impl ExtBytesRef {
}
}

pub(crate) struct TakeExtBytesInner<'a, T> {
data: &'a mut T,
slice: &'a [u8],
}
impl<'a, T> TakeExtBytesInner<'a, T> {
#[inline]
pub(crate) fn with_slice_ref<'b, F: FnOnce(&'b T, &'b [u8]) -> R, R>(&'b self, fun: F) -> R where 'a: 'b {
fun(self.data, self.slice)
}
#[inline]
pub(crate) fn into_inner(self) -> &'a mut T {
self.data
}
}

#[repr(C)]
pub(crate) struct ExtBytes {
magic: [u8; 8],
Expand All @@ -96,8 +121,9 @@ pub(crate) const KIND_EXT_BYTES: u8 = 0b0000_0011;
impl ExtBytes {
const MAGIC: [u8; 8] = [KIND_EXT_BYTES, 0, 0, 0, 0, 0, 0, 0];

pub(crate) fn create<'a, T: ExternalBytes>(ext_bytes: T) -> crate::ByteData<'a> {
let as_slice = T::OPS.as_slice;
pub(crate) fn create<'a, T: IntoExternalBytes>(ext_bytes: T) -> crate::ByteData<'a> {
let ext_bytes = ext_bytes.into();
let as_slice = <T::External>::OPS.as_slice;
{
// Try to use the data as a short chunk
let sl = as_slice(&ext_bytes);
Expand All @@ -107,8 +133,8 @@ impl ExtBytes {
}
}

let align = core::mem::align_of::<T>().max(core::mem::align_of::<ExtBytesWrapper>());
let mut alloc = core::mem::size_of::<T>() + core::mem::size_of::<ExtBytesWrapper>();
let align = core::mem::align_of::<T::External>().max(core::mem::align_of::<ExtBytesWrapper>());
let mut alloc = core::mem::size_of::<T::External>() + core::mem::size_of::<ExtBytesWrapper>();
if alloc % align != 0 {
alloc += align - (alloc % align);
}
Expand All @@ -127,20 +153,20 @@ impl ExtBytes {
}

// SAFETY: `data` is a valid pointer to an allocated area.
let payload = unsafe { data.add(offset).cast::<T>() };
let payload = unsafe { data.add(offset).cast::<T::External>() };
// SAFETY: writing to the location we just calculated is safe.
unsafe {
payload.write(ext_bytes);
};

// SAFETY: `payload` is a valid pointer to `T` as we just wrote to it.
// SAFETY: `payload` is a valid pointer to `T::External` as we just wrote to it.
let sl = as_slice(unsafe { &*payload });
let len = sl.len();
let ptr = sl.as_ptr();

if len <= crate::ByteChunk::LEN {
let aa = crate::ByteData::from_chunk_slice(sl);
if let Some(drop) = T::OPS.drop {
if let Some(drop) = <T::External>::OPS.drop {
// SAFETY: `payload` is a valid pointer to `T` which should be dropped.
unsafe { drop(payload) };
}
Expand All @@ -154,15 +180,15 @@ impl ExtBytes {
let item = ExtBytesWrapper {
// SAFETY: `T::OPS.drop` is an optional function pointer.
drop: unsafe {
core::mem::transmute::<Option<unsafe fn(*mut T)>, Option<unsafe fn(*mut ())>>(
T::OPS.drop,
core::mem::transmute::<Option<unsafe fn(*mut T::External)>, Option<unsafe fn(*mut ())>>(
<T::External>::OPS.drop,
)
},
alloc,
ref_count: core::sync::atomic::AtomicU32::new(1),
#[allow(clippy::cast_possible_truncation)]
align: align as u32,
kind: core::any::TypeId::of::<T>(),
kind: core::any::TypeId::of::<T::External>(),
};
// SAFETY: `header` is a valid pointer to `ExtBytesWrapper`.
unsafe { header.write(item) };
Expand Down Expand Up @@ -306,6 +332,42 @@ impl ExtBytes {
core::slice::from_raw_parts(dd.ptr, dd.len)
}))
}

/// Take the inner value of the `ExtBytes` instance if the type matches and there is only one reference.
pub(crate) fn take_inner<T: core::any::Any, R, F: for<'a> FnOnce(TakeExtBytesInner<'a, T>) -> R>(
self,
fun: F,
) -> Result<R, Self> {
debug_assert_eq!(
self.magic[0], KIND_EXT_BYTES,
"invalid magic number in ExtBytes"
);
debug_assert!(!self.data.is_null(), "null pointer in ExtBytes");
// SAFETY: `data` is a valid pointer to `ExtBytesRef`.
let dd = unsafe { &*self.data };
if dd.data.is_null() {
return Err(self);
}
// SAFETY: `ExtBytesRef.data` is a valid pointer to `ExtBytesWrapper`.
let ee = unsafe { &*dd.data };
if ee.kind != core::any::TypeId::of::<T>() || ee.ref_count.load(core::sync::atomic::Ordering::Relaxed) != 1 {
return Err(self);
}
let mut offset = core::mem::size_of::<ExtBytesWrapper>();
let align_mod = offset % ee.align as usize;
if align_mod != 0 {
offset += ee.align as usize - align_mod;
}
// SAFETY: `dd.data` is a valid pointer to the container data located at `offset`.
let t_val = unsafe { dd.data.cast::<u8>().add(offset) };
// SAFETY: `t_val` should now be cast to `*const T`.
let t_val = unsafe { &mut *t_val.cast::<T>().cast_mut() };
// SAFETY: `dd.ptr` is a valid pointer to the slice start.
let slic = unsafe { core::slice::from_raw_parts(dd.ptr, dd.len) };
let dat = fun(TakeExtBytesInner { data: t_val, slice: slic });
core::mem::drop(self);
Ok(dat)
}
}

impl Drop for ExtBytes {
Expand Down Expand Up @@ -378,3 +440,27 @@ impl Clone for ExtBytes {
}
}
}

#[cfg(test)]
mod test {
use crate::ByteData;

#[test]
/// Check if zero copy works for `Vec<u8>`.
fn test_bytedata_ext_vec() {
use alloc::vec::Vec;
let mut data = Vec::<u8>::with_capacity(64);
for i in 0..48 {
data.push(i);
}
let data_copy = data.clone();
let ptr = data.as_slice().as_ptr();
let mut data = ByteData::from_external(data);
data.make_sliced(..32);
assert_eq!(data.len(), 32);
let data = Vec::<u8>::from(data);
assert_eq!(data, &data_copy[..32]);
let check_ptr = data.as_slice().as_ptr();
assert!(core::ptr::addr_eq(ptr, check_ptr), "pointers should be equal");
}
}
35 changes: 35 additions & 0 deletions src/stringdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,13 @@ impl AsRef<[u8]> for StringData<'_> {
}
}

impl AsRef<str> for StringData<'_> {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}

impl Deref for StringData<'_> {
type Target = str;
#[inline]
Expand Down Expand Up @@ -445,6 +452,14 @@ impl TryFrom<SharedBytes> for StringData<'_> {
}
}

impl<'a> TryFrom<ByteData<'a>> for StringData<'a> {
type Error = ByteData<'a>;
#[inline]
fn try_from(dat: ByteData<'a>) -> Result<Self, Self::Error> {
Self::try_from_bytedata(dat)
}
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl From<String> for StringData<'_> {
Expand All @@ -454,6 +469,26 @@ impl From<String> for StringData<'_> {
}
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl<'a> From<StringData<'a>> for String {
#[inline]
fn from(dat: StringData<'a>) -> Self {
let dat = Vec::<u8>::from(dat.into_bytedata());
// SAFETY: `StringData` is guaranteed to be valid UTF-8, unless the user has used `unsafe` methods.
unsafe { Self::from_utf8_unchecked(dat) }
}
}

#[cfg(feature = "alloc")]
#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))]
impl<'a> From<StringData<'a>> for alloc::vec::Vec<u8> {
#[inline]
fn from(dat: StringData<'a>) -> Self {
Self::from(dat.into_bytedata())
}
}

impl<'b> PartialEq<StringData<'b>> for StringData<'_> {
#[inline]
fn eq(&self, other: &StringData<'b>) -> bool {
Expand Down

0 comments on commit 3028c46

Please sign in to comment.