From 0b21fd613b39b6fa143a9490fa1f0418e71c1f57 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sat, 27 May 2023 12:55:01 -0400 Subject: [PATCH 1/2] Add a test --- tests/vs-std-write.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/vs-std-write.rs b/tests/vs-std-write.rs index 91cf783..ca4376c 100644 --- a/tests/vs-std-write.rs +++ b/tests/vs-std-write.rs @@ -364,3 +364,13 @@ fn hex() { // ::fmt(-128) } + +#[test] +fn char() { + // Miri is slow, so step over the range of valid chars sparsely + let step = if cfg!(miri) { 1 << 16 } else { 1 }; + + for c in ('\0'..=char::MAX).step_by(step) { + cmp!("{}", c); + } +} From 005fcf2531d5140688053643a4f0ad918b1ef662 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sat, 27 May 2023 12:55:15 -0400 Subject: [PATCH 2/2] Use MaybeUninit instead of mem::uninitialized --- write/src/lib.rs | 69 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/write/src/lib.rs b/write/src/lib.rs index cede77d..27834ee 100644 --- a/write/src/lib.rs +++ b/write/src/lib.rs @@ -9,10 +9,7 @@ #[cfg(feature = "std")] use core::convert::Infallible; -#[allow(deprecated)] -unsafe fn uninitialized() -> T { - core::mem::uninitialized() -} +use core::mem::MaybeUninit; /// A collection of methods that are required / used to format a message into a stream. #[allow(non_camel_case_types)] @@ -32,8 +29,9 @@ pub trait uWrite { /// entire byte sequence was successfully written, and this method will not return until all /// data has been written or an error occurs. fn write_char(&mut self, c: char) -> Result<(), Self::Error> { - let mut buf: [u8; 4] = unsafe { uninitialized() }; - self.write_str(c.encode_utf8(&mut buf)) + let mut buf: MaybeUninit<[u8; 4]> = MaybeUninit::uninit(); + let encoded = unsafe { encode_utf8_raw(c as u32, &mut buf) }; + self.write_str(encoded) } } @@ -46,3 +44,62 @@ impl uWrite for String { Ok(()) } } + +#[inline] +fn encode_utf8_raw(code: u32, dst: &mut MaybeUninit<[u8; 4]>) -> &str { + let len = len_utf8(code); + + unsafe { + let dst = dst.as_mut_ptr(); + let a = dst as *mut u8; + let b = a.add(1); + let c = a.add(2); + let d = a.add(3); + match len { + 1 => { + *a = code as u8; + } + 2 => { + *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + *b = (code & 0x3F) as u8 | TAG_CONT; + } + 3 => { + *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + *b = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *c = (code & 0x3F) as u8 | TAG_CONT; + } + 4 => { + *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + *b = (code >> 12 & 0x3F) as u8 | TAG_CONT; + *c = (code >> 6 & 0x3F) as u8 | TAG_CONT; + *d = (code & 0x3F) as u8 | TAG_CONT; + } + _ => unreachable!(), + }; + + let bytes = core::slice::from_raw_parts(dst as *const u8, len); + core::str::from_utf8_unchecked(bytes) + } +} + +#[inline] +fn len_utf8(code: u32) -> usize { + if code < MAX_ONE_B { + 1 + } else if code < MAX_TWO_B { + 2 + } else if code < MAX_THREE_B { + 3 + } else { + 4 + } +} + +// UTF-8 ranges and tags for encoding characters +const TAG_CONT: u8 = 0b1000_0000; +const TAG_TWO_B: u8 = 0b1100_0000; +const TAG_THREE_B: u8 = 0b1110_0000; +const TAG_FOUR_B: u8 = 0b1111_0000; +const MAX_ONE_B: u32 = 0x80; +const MAX_TWO_B: u32 = 0x800; +const MAX_THREE_B: u32 = 0x10000;