-
-
Notifications
You must be signed in to change notification settings - Fork 132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Store pointer bytes in pointers for SB/provenance #272
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,18 +70,95 @@ use crate::alloc::alloc::{alloc, dealloc, Layout}; | |
#[cfg(no_from_ne_bytes)] | ||
use crate::backport::FromNeBytes; | ||
use core::mem; | ||
use core::num::{NonZeroU64, NonZeroUsize}; | ||
use core::num::NonZeroUsize; | ||
use core::ptr; | ||
use core::ptr::NonNull; | ||
use core::slice; | ||
use core::str; | ||
|
||
#[repr(transparent)] | ||
pub(crate) struct Identifier { | ||
repr: NonZeroU64, | ||
repr: Repr, | ||
} | ||
|
||
use crate::identifier::repr::Repr; | ||
|
||
mod repr { | ||
use core::ptr::NonNull; | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
#[derive(PartialEq, Eq, Clone, Copy)] | ||
#[repr(C)] | ||
pub struct Repr { | ||
ptr: NonNull<u8>, | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
#[derive(PartialEq, Eq, Clone, Copy)] | ||
#[repr(C)] | ||
pub struct Repr { | ||
ptr: NonNull<u8>, | ||
remainder: usize, | ||
} | ||
|
||
impl Repr { | ||
#[cfg(target_pointer_width = "32")] | ||
pub fn as_u64(self) -> u64 { | ||
self.ptr.as_ptr() as u64 + ((self.remainder as u64) << 32) | ||
} | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
pub fn as_u64(self) -> u64 { | ||
self.ptr.as_ptr() as u64 | ||
} | ||
|
||
pub fn as_ptr(self) -> *const u8 { | ||
self.ptr.as_ptr() | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
pub const fn from_ptr(ptr: NonNull<u8>) -> Self { | ||
Repr { | ||
ptr: ptr, | ||
remainder: 0, | ||
} | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
pub unsafe fn from_inline(bytes: u64) -> Repr { | ||
Repr { | ||
ptr: unsafe { NonNull::new_unchecked(bytes as usize as *mut u8) }, | ||
remainder: (bytes >> 32) as usize, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Due to the lack of as_u64, I don't see anywhere remainder is ever read. Is it actually a 64 bit value or is it just a isize? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is read both in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, that was exceedingly unclear. Maybe that cast chain could be moved to an impl on Repr? |
||
} | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
pub fn is_inline(self) -> bool { | ||
self.ptr.as_ptr() as usize >> 31 == 0 | ||
} | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
pub const fn from_ptr(ptr: NonNull<u8>) -> Self { | ||
Repr { ptr } | ||
} | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
pub unsafe fn from_inline(bytes: u64) -> Repr { | ||
Repr { | ||
ptr: unsafe { NonNull::new_unchecked(bytes as usize as *mut u8) }, | ||
} | ||
} | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
pub fn is_inline(self) -> bool { | ||
// `test rdi, rdi` -- basically: `self as i64 >= 0` | ||
self.ptr.as_ptr() as usize >> 63 == 0 | ||
} | ||
} | ||
} | ||
|
||
impl Identifier { | ||
const EMPTY: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(!0) }; | ||
const EMPTY: Repr = Repr::from_ptr(unsafe { NonNull::new_unchecked(!0usize as *mut u8) }); | ||
|
||
pub(crate) const fn empty() -> Self { | ||
// `mov rax, -1` | ||
|
@@ -100,7 +177,7 @@ impl Identifier { | |
unsafe { ptr::copy_nonoverlapping(string.as_ptr(), bytes.as_mut_ptr(), len) }; | ||
// SAFETY: it's nonzero because the input string was at least 1 | ||
// byte of ASCII and did not contain \0. | ||
unsafe { NonZeroU64::new_unchecked(u64::from_ne_bytes(bytes)) } | ||
unsafe { Repr::from_inline(u64::from_ne_bytes(bytes)) } | ||
} | ||
9..=0xff_ffff_ffff_ffff => { | ||
// SAFETY: len is in a range that does not contain 0. | ||
|
@@ -141,8 +218,7 @@ impl Identifier { | |
} | ||
|
||
fn is_inline(&self) -> bool { | ||
// `test rdi, rdi` -- basically: `repr as i64 >= 0` | ||
self.repr.get() >> 63 == 0 | ||
self.repr.is_inline() | ||
} | ||
|
||
fn is_empty_or_inline(&self) -> bool { | ||
|
@@ -228,38 +304,42 @@ impl PartialEq for Identifier { | |
// insignificant 0 in the least significant bit. We take advantage of that | ||
// unneeded bit to rotate a 1 into the most significant bit to make the repr | ||
// distinguishable from ASCII bytes. | ||
fn ptr_to_repr(ptr: *mut u8) -> NonZeroU64 { | ||
fn ptr_to_repr(ptr: *mut u8) -> Repr { | ||
// `mov eax, 1` | ||
// `shld rax, rdi, 63` | ||
let repr = (ptr as u64 | 1).rotate_right(1); | ||
let prev = ptr as usize; | ||
let new = (prev | 1).rotate_right(1); | ||
|
||
// SAFETY: the most significant bit of repr is known to be set, so the value | ||
// is not zero. | ||
unsafe { NonZeroU64::new_unchecked(repr) } | ||
let diff = new.wrapping_sub(prev); | ||
Repr::from_ptr(unsafe { NonNull::new_unchecked(ptr.wrapping_add(diff)) }) | ||
} | ||
|
||
// Shift out the 1 previously placed into the most significant bit of the least | ||
// significant byte. Shift in a low 0 bit to reconstruct the original 2-byte | ||
// aligned pointer. | ||
fn repr_to_ptr(repr: NonZeroU64) -> *const u8 { | ||
fn repr_to_ptr(repr: Repr) -> *const u8 { | ||
// `lea rax, [rdi + rdi]` | ||
(repr.get() << 1) as *const u8 | ||
let prev = repr.as_ptr() as usize; | ||
let new = prev << 1; | ||
let diff = new.wrapping_sub(prev); | ||
repr.as_ptr().wrapping_add(diff) | ||
} | ||
|
||
fn repr_to_ptr_mut(repr: NonZeroU64) -> *mut u8 { | ||
fn repr_to_ptr_mut(repr: Repr) -> *mut u8 { | ||
repr_to_ptr(repr) as *mut u8 | ||
} | ||
|
||
// Compute the length of the inline string, assuming the argument is in short | ||
// string representation. Short strings are stored as 1 to 8 nonzero ASCII | ||
// bytes, followed by \0 padding for the remaining bytes. | ||
fn inline_len(repr: NonZeroU64) -> NonZeroUsize { | ||
fn inline_len(repr: Repr) -> NonZeroUsize { | ||
// Rustc >=1.53 has intrinsics for counting zeros on a non-zeroable integer. | ||
// On many architectures these are more efficient than counting on ordinary | ||
// zeroable integers (bsf vs cttz). On rustc <1.53 without those intrinsics, | ||
// we count zeros in the u64 rather than the NonZeroU64. | ||
#[cfg(no_nonzero_bitscan)] | ||
let repr = repr.get(); | ||
let repr = repr.as_u64(); | ||
|
||
#[cfg(target_endian = "little")] | ||
let zero_bits_on_string_end = repr.leading_zeros(); | ||
|
@@ -275,8 +355,8 @@ fn inline_len(repr: NonZeroU64) -> NonZeroUsize { | |
|
||
// SAFETY: repr must be in the inline representation, i.e. at least 1 and at | ||
// most 8 nonzero ASCII bytes padded on the end with \0 bytes. | ||
unsafe fn inline_as_str(repr: &NonZeroU64) -> &str { | ||
let ptr = repr as *const NonZeroU64 as *const u8; | ||
unsafe fn inline_as_str(repr: &Repr) -> &str { | ||
let ptr = repr as *const Repr as *const u8; | ||
let len = inline_len(*repr).get(); | ||
// SAFETY: we are viewing the nonzero ASCII prefix of the inline repr's | ||
// contents as a slice of bytes. Input/output lifetimes are correctly | ||
|
@@ -335,7 +415,7 @@ unsafe fn decode_len(ptr: *const u8) -> NonZeroUsize { | |
|
||
// SAFETY: repr must be in the heap allocated representation, with varint header | ||
// and string contents already written. | ||
unsafe fn ptr_as_str(repr: &NonZeroU64) -> &str { | ||
unsafe fn ptr_as_str(repr: &Repr) -> &str { | ||
let ptr = repr_to_ptr(*repr); | ||
let len = unsafe { decode_len(ptr) }; | ||
let header = bytes_for_varint(len); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this assumes the target is little endian. On big endian, the bytes at the lower address in memory are more significant, so the less significant half of
bytes
would be holding the end of the inline string, which would all be 0 bytes for strings length 4 or shorter -- see the call site here:semver/src/identifier.rs
Lines 174 to 180 in 0b38170
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
--target mips64-unknown-linux-gnuabi64
works pretty well for little-endian testing.