diff --git a/src/inline_size.rs b/src/inline_size.rs new file mode 100644 index 0000000..487866f --- /dev/null +++ b/src/inline_size.rs @@ -0,0 +1,75 @@ +use core::mem; + +#[derive(Clone, Copy)] +#[repr(usize)] +pub(crate) enum InlineSize8 { + _V1 = 1, + _V2, + _V3, + _V4, + _V5, + _V6, + _V7, + _V8, +} + +impl InlineSize8 { + #[inline(always)] + /// SAFETY: The caller is responsible to ensure value is in \[5, 8\]. + pub(crate) const unsafe fn transmute_from_usize(value: usize) -> Self { + debug_assert!(value >= Self::_V1 as usize && value <= Self::_V8 as usize); + mem::transmute::(value) + } +} + +#[derive(Clone, Copy)] +#[repr(usize)] +pub(crate) enum InlineSize16 { + _V9 = 9, + _V10, + _V11, + _V12, + _V13, + _V14, + _V15, + _V16, +} + +impl InlineSize16 { + #[inline(always)] + /// SAFETY: The caller is responsible to ensure value is in \[9, 16\]. + pub(crate) const unsafe fn transmute_from_usize(value: usize) -> Self { + debug_assert!(value >= Self::_V9 as usize && value <= Self::_V16 as usize); + mem::transmute::(value) + } +} + +#[derive(Clone, Copy)] +#[repr(usize)] +pub(crate) enum InlineSize32 { + _V17 = 17, + _V18, + _V19, + _V20, + _V21, + _V22, + _V23, + _V24, + _V25, + _V26, + _V27, + _V28, + _V29, + _V30, + _V31, + _V32, +} + +impl InlineSize32 { + #[inline(always)] + /// SAFETY: The caller is responsible to ensure value is in \[17, 32\]. + pub(crate) const unsafe fn transmute_from_usize(value: usize) -> Self { + debug_assert!(value >= Self::_V17 as usize && value <= Self::_V32 as usize); + mem::transmute::(value) + } +} diff --git a/src/lib.rs b/src/lib.rs index 55afbc3..f32e44d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ use bytes::{Bytes, BytesMut}; use core::{ borrow::Borrow, cmp::Ordering, convert::Infallible, fmt, hash, iter, ops::Deref, str::FromStr, }; +use inline_size::*; use simdutf8::basic::{from_utf8, Utf8Error}; /// `FastStr` is a string type that try to avoid the cost of clone. @@ -24,6 +25,8 @@ mod size_asserts { static_assertions::assert_eq_size!(super::FastStr, [u8; 40]); // 40 bytes } +const INLINE_CAP: usize = InlineSize32::_V32 as usize; + impl FastStr { /// Create a new `FastStr` from any type `T` that can be converted to a string slice /// (e.g., `String`, `&str`, `Arc`, `Arc`). @@ -294,7 +297,23 @@ impl FastStr { ch.encode_utf8(&mut buf[len..]); len += size; } - Self(Repr::Inline { len, buf }) + + match len { + 0 => Self::empty(), + 1..=8 => Self(Repr::Inline8 { + len: unsafe { InlineSize8::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 8]) }, + }), + 9..=16 => Self(Repr::Inline16 { + len: unsafe { InlineSize16::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 16]) }, + }), + 17..=32 => Self(Repr::Inline32 { + len: unsafe { InlineSize32::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 32]) }, + }), + _ => unreachable!(), + } } fn can_inline(s: &str) -> bool { @@ -475,7 +494,22 @@ where buf[len..][..size].copy_from_slice(slice.as_bytes()); len += size; } - FastStr(Repr::Inline { len, buf }) + match len { + 0 => FastStr::empty(), + 1..=8 => FastStr(Repr::Inline8 { + len: unsafe { InlineSize8::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 8]) }, + }), + 9..=16 => FastStr(Repr::Inline16 { + len: unsafe { InlineSize16::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 16]) }, + }), + 17..=32 => FastStr(Repr::Inline32 { + len: unsafe { InlineSize32::transmute_from_usize(len) }, + buf: unsafe { *(buf.as_ptr() as *const [u8; 32]) }, + }), + _ => unreachable!(), + } } impl iter::FromIterator for FastStr { @@ -564,8 +598,6 @@ impl From> for FastStr { } } -const INLINE_CAP: usize = 24; - #[derive(Clone)] enum Repr { Empty, @@ -573,7 +605,9 @@ enum Repr { ArcStr(Arc), ArcString(Arc), StaticStr(&'static str), - Inline { len: usize, buf: [u8; INLINE_CAP] }, + Inline8 { len: InlineSize8, buf: [u8; 8] }, + Inline16 { len: InlineSize16, buf: [u8; 16] }, + Inline32 { len: InlineSize32, buf: [u8; 32] }, } impl Repr { @@ -609,9 +643,34 @@ impl Repr { /// /// The length of `s` must be <= `INLINE_CAP`. unsafe fn new_inline_impl(s: &str) -> Self { - let mut buf = [0u8; INLINE_CAP]; - core::ptr::copy_nonoverlapping(s.as_ptr(), buf.as_mut_ptr(), s.len()); - Self::Inline { len: s.len(), buf } + match s.len() { + 0 => Self::Empty, + 1..=8 => { + let mut buf = [0u8; 8]; + core::ptr::copy_nonoverlapping(s.as_ptr(), buf.as_mut_ptr(), 8); + Self::Inline8 { + len: InlineSize8::transmute_from_usize(s.len()), + buf, + } + } + 9..=16 => { + let mut buf = [0u8; 16]; + core::ptr::copy_nonoverlapping(s.as_ptr(), buf.as_mut_ptr(), 16); + Self::Inline16 { + len: InlineSize16::transmute_from_usize(s.len()), + buf, + } + } + 17..=32 => { + let mut buf = [0u8; 32]; + core::ptr::copy_nonoverlapping(s.as_ptr(), buf.as_mut_ptr(), 32); + Self::Inline32 { + len: InlineSize32::transmute_from_usize(s.len()), + buf, + } + } + _ => unreachable!(), + } } #[inline] @@ -653,7 +712,9 @@ impl Repr { Self::ArcStr(arc_str) => arc_str.len(), Self::ArcString(arc_string) => arc_string.len(), Self::StaticStr(s) => s.len(), - Self::Inline { len, .. } => *len, + Self::Inline8 { len, .. } => *len as usize, + Self::Inline16 { len, .. } => *len as usize, + Self::Inline32 { len, .. } => *len as usize, } } @@ -665,7 +726,9 @@ impl Repr { Self::ArcStr(arc_str) => arc_str.is_empty(), Self::ArcString(arc_string) => arc_string.is_empty(), Self::StaticStr(s) => s.is_empty(), - Self::Inline { len, .. } => *len == 0, + Self::Inline8 { .. } => false, + Self::Inline16 { .. } => false, + Self::Inline32 { .. } => false, } } @@ -678,7 +741,15 @@ impl Repr { Self::ArcStr(arc_str) => arc_str, Self::ArcString(arc_string) => arc_string, Self::StaticStr(s) => s, - Self::Inline { len, buf } => unsafe { core::str::from_utf8_unchecked(&buf[..*len]) }, + Self::Inline8 { len, buf } => unsafe { + core::str::from_utf8_unchecked(&buf[..*len as usize]) + }, + Self::Inline16 { len, buf } => unsafe { + core::str::from_utf8_unchecked(&buf[..*len as usize]) + }, + Self::Inline32 { len, buf } => unsafe { + core::str::from_utf8_unchecked(&buf[..*len as usize]) + }, } } @@ -693,8 +764,14 @@ impl Repr { Arc::try_unwrap(arc_string).unwrap_or_else(|arc| (*arc).clone()) } Self::StaticStr(s) => s.to_string(), - Self::Inline { len, buf } => unsafe { - String::from_utf8_unchecked(buf[..len].to_vec()) + Self::Inline8 { len, buf } => unsafe { + String::from_utf8_unchecked(buf[..len as usize].to_vec()) + }, + Self::Inline16 { len, buf } => unsafe { + String::from_utf8_unchecked(buf[..len as usize].to_vec()) + }, + Self::Inline32 { len, buf } => unsafe { + String::from_utf8_unchecked(buf[..len as usize].to_vec()) }, } } @@ -709,7 +786,9 @@ impl Repr { Bytes::from(Arc::try_unwrap(arc_string).unwrap_or_else(|arc| (*arc).clone())) } Self::StaticStr(s) => Bytes::from_static(s.as_bytes()), - Self::Inline { len, buf } => Bytes::from(buf[..len].to_vec()), + Self::Inline8 { len, buf } => Bytes::from(buf[..len as usize].to_vec()), + Self::Inline16 { len, buf } => Bytes::from(buf[..len as usize].to_vec()), + Self::Inline32 { len, buf } => Bytes::from(buf[..len as usize].to_vec()), } } @@ -722,7 +801,15 @@ impl Repr { Self::ArcStr(arc_str) => Self::ArcStr(Arc::clone(arc_str)), Self::ArcString(arc_string) => Self::ArcString(Arc::clone(arc_string)), Self::StaticStr(s) => Self::StaticStr(s), - Self::Inline { len, buf } => Self::Inline { + Self::Inline8 { len, buf } => Self::Inline8 { + len: *len, + buf: *buf, + }, + Self::Inline16 { len, buf } => Self::Inline16 { + len: *len, + buf: *buf, + }, + Self::Inline32 { len, buf } => Self::Inline32 { len: *len, buf: *buf, }, @@ -768,14 +855,30 @@ impl Repr { Repr::StaticStr(s) => Self::StaticStr(unsafe { core::str::from_utf8_unchecked(&s.as_bytes()[sub_offset..sub_offset + sub_len]) }), - Repr::Inline { len: _, buf } => Self::Inline { - len: sub_len, - buf: { - let mut new_buf = [0; INLINE_CAP]; - new_buf[..sub_len].copy_from_slice(&buf[sub_offset..sub_offset + sub_len]); - new_buf - }, - }, + Repr::Inline8 { len, buf } => { + let mut new_buf = [0u8; 8]; + new_buf[..sub_len].copy_from_slice(&buf[sub_offset..sub_offset + sub_len]); + Self::Inline8 { + len: *len, + buf: new_buf, + } + } + Repr::Inline16 { len, buf } => { + let mut new_buf = [0u8; 16]; + new_buf[..sub_len].copy_from_slice(&buf[sub_offset..sub_offset + sub_len]); + Self::Inline16 { + len: *len, + buf: new_buf, + } + } + Repr::Inline32 { len, buf } => { + let mut new_buf = [0u8; 32]; + new_buf[..sub_len].copy_from_slice(&buf[sub_offset..sub_offset + sub_len]); + Self::Inline32 { + len: *len, + buf: new_buf, + } + } } } } @@ -789,10 +892,15 @@ impl AsRef<[u8]> for Repr { Self::ArcStr(arc_str) => arc_str.as_bytes(), Self::ArcString(arc_string) => arc_string.as_bytes(), Self::StaticStr(s) => s.as_bytes(), - Self::Inline { len, buf } => &buf[..*len], + Self::Inline8 { len, buf, .. } => &buf[..*len as usize], + Self::Inline16 { len, buf, .. } => &buf[..*len as usize], + Self::Inline32 { len, buf, .. } => &buf[..*len as usize], } } } + +mod inline_size; + #[cfg(feature = "redis")] mod redis;