Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test char formatting, replace mem::uninitialized with MaybeUninit #57

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tests/vs-std-write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,13 @@ fn hex() {

// <i8 as std::fmt::Display>::fmt(-128)
}

#[test]
fn char() {
// Miri is slow, so step over the range of valid chars sparsely
let step = if cfg!(miri) { 1 << 16 } else { 1 };

for c in ('\0'..=char::MAX).step_by(step) {
cmp!("{}", c);
}
}
69 changes: 63 additions & 6 deletions write/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
#[cfg(feature = "std")]
use core::convert::Infallible;

#[allow(deprecated)]
unsafe fn uninitialized<T>() -> T {
core::mem::uninitialized()
}
use core::mem::MaybeUninit;

/// A collection of methods that are required / used to format a message into a stream.
#[allow(non_camel_case_types)]
Expand All @@ -32,8 +29,9 @@ pub trait uWrite {
/// entire byte sequence was successfully written, and this method will not return until all
/// data has been written or an error occurs.
fn write_char(&mut self, c: char) -> Result<(), Self::Error> {
let mut buf: [u8; 4] = unsafe { uninitialized() };
self.write_str(c.encode_utf8(&mut buf))
let mut buf: MaybeUninit<[u8; 4]> = MaybeUninit::uninit();
let encoded = unsafe { encode_utf8_raw(c as u32, &mut buf) };
self.write_str(encoded)
}
}

Expand All @@ -46,3 +44,62 @@ impl uWrite for String {
Ok(())
}
}

#[inline]
fn encode_utf8_raw(code: u32, dst: &mut MaybeUninit<[u8; 4]>) -> &str {
let len = len_utf8(code);

unsafe {
let dst = dst.as_mut_ptr();
let a = dst as *mut u8;
let b = a.add(1);
let c = a.add(2);
let d = a.add(3);
match len {
1 => {
*a = code as u8;
}
2 => {
*a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
*b = (code & 0x3F) as u8 | TAG_CONT;
}
3 => {
*a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
*b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*c = (code & 0x3F) as u8 | TAG_CONT;
}
4 => {
*a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
*b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*d = (code & 0x3F) as u8 | TAG_CONT;
}
_ => unreachable!(),
};

let bytes = core::slice::from_raw_parts(dst as *const u8, len);
core::str::from_utf8_unchecked(bytes)
}
}

#[inline]
fn len_utf8(code: u32) -> usize {
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
}
}

// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8 = 0b1000_0000;
const TAG_TWO_B: u8 = 0b1100_0000;
const TAG_THREE_B: u8 = 0b1110_0000;
const TAG_FOUR_B: u8 = 0b1111_0000;
const MAX_ONE_B: u32 = 0x80;
const MAX_TWO_B: u32 = 0x800;
const MAX_THREE_B: u32 = 0x10000;