diff --git a/proc_macro/src/bridge/arena.rs b/proc_macro/src/bridge/arena.rs new file mode 100644 index 000000000..fa72d2816 --- /dev/null +++ b/proc_macro/src/bridge/arena.rs @@ -0,0 +1,113 @@ +//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +//! +//! This is unfortunately a minimal re-implementation rather than a dependency +//! as it is difficult to depend on crates from within `proc_macro`, due to it +//! being built at the same time as `std`. + +use std::cell::{Cell, RefCell}; +use std::cmp; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ptr; +use std::slice; +use std::str; + +// The arenas start with PAGE-sized chunks, and then each new chunk is twice as +// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon +// we stop growing. This scales well, from arenas that are barely used up to +// arenas that are used for 100s of MiBs. Note also that the chosen sizes match +// the usual sizes of pages and huge pages on Linux. +const PAGE: usize = 4096; +const HUGE_PAGE: usize = 2 * 1024 * 1024; + +/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`. +/// +/// This is unfortunately a complete re-implementation rather than a dependency +/// as it is difficult to depend on crates from within `proc_macro`, due to it +/// being built at the same time as `std`. +/// +/// This arena doesn't have support for allocating anything other than byte +/// slices, as that is all that is necessary. +pub(crate) struct Arena { + start: Cell<*mut MaybeUninit>, + end: Cell<*mut MaybeUninit>, + chunks: RefCell]>>>, +} + +impl Arena { + pub(crate) fn new() -> Self { + Arena { + start: Cell::new(ptr::null_mut()), + end: Cell::new(ptr::null_mut()), + chunks: RefCell::new(Vec::new()), + } + } + + /// Add a new chunk with at least `additional` free bytes. + #[inline(never)] + #[cold] + fn grow(&self, additional: usize) { + let mut chunks = self.chunks.borrow_mut(); + let mut new_cap; + if let Some(last_chunk) = chunks.last_mut() { + // If the previous chunk's len is less than HUGE_PAGE + // bytes, then this chunk will be least double the previous + // chunk's size. + new_cap = last_chunk.len().min(HUGE_PAGE / 2); + new_cap *= 2; + } else { + new_cap = PAGE; + } + // Also ensure that this chunk can fit `additional`. + new_cap = cmp::max(additional, new_cap); + + let mut chunk = Box::new_uninit_slice(new_cap); + let Range { start, end } = chunk.as_mut_ptr_range(); + self.start.set(start); + self.end.set(end); + chunks.push(chunk); + } + + /// Allocates a byte slice with specified size from the current memory + /// chunk. Returns `None` if there is no free space left to satisfy the + /// request. + fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit]> { + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); + + let new_end = end.checked_sub(bytes)?; + if start <= new_end { + let new_end = old_end.with_addr(new_end); + self.end.set(new_end); + // SAFETY: `bytes` bytes starting at `new_end` were just reserved. + Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) }) + } else { + None + } + } + + fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit] { + if bytes == 0 { + return &mut []; + } + + loop { + if let Some(a) = self.alloc_raw_without_grow(bytes) { + break a; + } + // No free space left. Allocate a new chunk to satisfy the request. + // On failure the grow will panic or abort. + self.grow(bytes); + } + } + + pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str { + let alloc = self.alloc_raw(string.len()); + let bytes = MaybeUninit::write_slice(alloc, string.as_bytes()); + + // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, + // and immediately convert the clone back to `&str`. + unsafe { str::from_utf8_unchecked_mut(bytes) } + } +} diff --git a/proc_macro/src/bridge/client.rs b/proc_macro/src/bridge/client.rs index 8254bd6e5..1516f084a 100644 --- a/proc_macro/src/bridge/client.rs +++ b/proc_macro/src/bridge/client.rs @@ -175,13 +175,11 @@ define_handles! { 'owned: FreeFunctions, TokenStream, - Literal, SourceFile, MultiSpan, Diagnostic, 'interned: - Ident, Span, } @@ -197,25 +195,6 @@ impl Clone for TokenStream { } } -impl Clone for Literal { - fn clone(&self) -> Self { - self.clone() - } -} - -impl fmt::Debug for Literal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Literal") - // format the kind without quotes, as in `kind: Float` - .field("kind", &format_args!("{}", &self.debug_kind())) - .field("symbol", &self.symbol()) - // format `Some("...")` on one line even in {:#?} mode - .field("suffix", &format_args!("{:?}", &self.suffix())) - .field("span", &self.span()) - .finish() - } -} - impl Clone for SourceFile { fn clone(&self) -> Self { self.clone() @@ -242,6 +221,8 @@ impl fmt::Debug for Span { } } +pub(crate) use super::symbol::Symbol; + macro_rules! define_client_side { ($($name:ident { $(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)* @@ -405,6 +386,9 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( panic::catch_unwind(panic::AssertUnwindSafe(|| { maybe_install_panic_hook(force_show_panics); + // Make sure the symbol store is empty before decoding inputs. + Symbol::invalidate_all(); + let reader = &mut &buf[..]; let (globals, input) = <(ExpnGlobals, A)>::decode(reader, &mut ()); @@ -438,6 +422,10 @@ fn run_client DecodeMut<'a, 's, ()>, R: Encode<()>>( buf.clear(); Err::<(), _>(e).encode(&mut buf, &mut ()); }); + + // Now that a response has been serialized, invalidate all symbols + // registered with the interner. + Symbol::invalidate_all(); buf } diff --git a/proc_macro/src/bridge/fxhash.rs b/proc_macro/src/bridge/fxhash.rs new file mode 100644 index 000000000..4b1e412e2 --- /dev/null +++ b/proc_macro/src/bridge/fxhash.rs @@ -0,0 +1,117 @@ +//! This is a copy of the `rustc_hash` crate, adapted to work as a module. +//! +//! If in the future it becomes more reasonable to add dependencies to +//! `proc_macro`, this module should be removed and replaced with a dependency +//! on the `rustc_hash` crate. + +use std::collections::HashMap; +use std::convert::TryInto; +use std::default::Default; +use std::hash::BuildHasherDefault; +use std::hash::Hasher; +use std::mem::size_of; +use std::ops::BitXor; + +/// Type alias for a hashmap using the `fx` hash algorithm. +pub type FxHashMap = HashMap>; + +/// A speedy hash algorithm for use within rustc. The hashmap in liballoc +/// by default uses SipHash which isn't quite as speedy as we want. In the +/// compiler we're not really worried about DOS attempts, so we use a fast +/// non-cryptographic hash. +/// +/// This is the same as the algorithm used by Firefox -- which is a homespun +/// one not based on any widely-known algorithm -- though modified to produce +/// 64-bit hash values instead of 32-bit hash values. It consistently +/// out-performs an FNV-based hash within rustc itself -- the collision rate is +/// similar or slightly worse than FNV, but the speed of the hash function +/// itself is much higher because it works on up to 8 bytes at a time. +pub struct FxHasher { + hash: usize, +} + +#[cfg(target_pointer_width = "32")] +const K: usize = 0x9e3779b9; +#[cfg(target_pointer_width = "64")] +const K: usize = 0x517cc1b727220a95; + +impl Default for FxHasher { + #[inline] + fn default() -> FxHasher { + FxHasher { hash: 0 } + } +} + +impl FxHasher { + #[inline] + fn add_to_hash(&mut self, i: usize) { + self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); + } +} + +impl Hasher for FxHasher { + #[inline] + fn write(&mut self, mut bytes: &[u8]) { + #[cfg(target_pointer_width = "32")] + let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); + #[cfg(target_pointer_width = "64")] + let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); + + let mut hash = FxHasher { hash: self.hash }; + assert!(size_of::() <= 8); + while bytes.len() >= size_of::() { + hash.add_to_hash(read_usize(bytes) as usize); + bytes = &bytes[size_of::()..]; + } + if (size_of::() > 4) && (bytes.len() >= 4) { + hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); + bytes = &bytes[4..]; + } + if (size_of::() > 2) && bytes.len() >= 2 { + hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); + bytes = &bytes[2..]; + } + if (size_of::() > 1) && bytes.len() >= 1 { + hash.add_to_hash(bytes[0] as usize); + } + self.hash = hash.hash; + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.add_to_hash(i as usize); + } + + #[cfg(target_pointer_width = "32")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + self.add_to_hash((i >> 32) as usize); + } + + #[cfg(target_pointer_width = "64")] + #[inline] + fn write_u64(&mut self, i: u64) { + self.add_to_hash(i as usize); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.add_to_hash(i); + } + + #[inline] + fn finish(&self) -> u64 { + self.hash as u64 + } +} diff --git a/proc_macro/src/bridge/handle.rs b/proc_macro/src/bridge/handle.rs index c219a9465..00954107b 100644 --- a/proc_macro/src/bridge/handle.rs +++ b/proc_macro/src/bridge/handle.rs @@ -1,11 +1,13 @@ //! Server-side handles and storage for per-handle data. -use std::collections::{BTreeMap, HashMap}; -use std::hash::{BuildHasher, Hash}; +use std::collections::BTreeMap; +use std::hash::Hash; use std::num::NonZeroU32; use std::ops::{Index, IndexMut}; use std::sync::atomic::{AtomicUsize, Ordering}; +use super::fxhash::FxHashMap; + pub(super) type Handle = NonZeroU32; /// A store that associates values of type `T` with numeric handles. A value can @@ -51,31 +53,15 @@ impl IndexMut for OwnedStore { } } -// HACK(eddyb) deterministic `std::collections::hash_map::RandomState` replacement -// that doesn't require adding any dependencies to `proc_macro` (like `rustc-hash`). -#[derive(Clone)] -struct NonRandomState; - -impl BuildHasher for NonRandomState { - type Hasher = std::collections::hash_map::DefaultHasher; - #[inline] - fn build_hasher(&self) -> Self::Hasher { - Self::Hasher::new() - } -} - /// Like `OwnedStore`, but avoids storing any value more than once. pub(super) struct InternedStore { owned: OwnedStore, - interner: HashMap, + interner: FxHashMap, } impl InternedStore { pub(super) fn new(counter: &'static AtomicUsize) -> Self { - InternedStore { - owned: OwnedStore::new(counter), - interner: HashMap::with_hasher(NonRandomState), - } + InternedStore { owned: OwnedStore::new(counter), interner: FxHashMap::default() } } pub(super) fn alloc(&mut self, x: T) -> Handle { diff --git a/proc_macro/src/bridge/mod.rs b/proc_macro/src/bridge/mod.rs index 048ba3a8f..5cde966bf 100644 --- a/proc_macro/src/bridge/mod.rs +++ b/proc_macro/src/bridge/mod.rs @@ -56,6 +56,7 @@ macro_rules! with_api { fn drop($self: $S::FreeFunctions); fn track_env_var(var: &str, value: Option<&str>); fn track_path(path: &str); + fn literal_from_str(s: &str) -> Result, ()>; }, TokenStream { fn drop($self: $S::TokenStream); @@ -65,11 +66,11 @@ macro_rules! with_api { fn from_str(src: &str) -> $S::TokenStream; fn to_string($self: &$S::TokenStream) -> String; fn from_token_tree( - tree: TokenTree<$S::TokenStream, $S::Span, $S::Ident, $S::Literal>, + tree: TokenTree<$S::TokenStream, $S::Span, $S::Symbol>, ) -> $S::TokenStream; fn concat_trees( base: Option<$S::TokenStream>, - trees: Vec>, + trees: Vec>, ) -> $S::TokenStream; fn concat_streams( base: Option<$S::TokenStream>, @@ -77,36 +78,7 @@ macro_rules! with_api { ) -> $S::TokenStream; fn into_trees( $self: $S::TokenStream - ) -> Vec>; - }, - Ident { - fn new(string: &str, span: $S::Span, is_raw: bool) -> $S::Ident; - fn span($self: $S::Ident) -> $S::Span; - fn with_span($self: $S::Ident, span: $S::Span) -> $S::Ident; - }, - Literal { - fn drop($self: $S::Literal); - fn clone($self: &$S::Literal) -> $S::Literal; - fn from_str(s: &str) -> Result<$S::Literal, ()>; - fn to_string($self: &$S::Literal) -> String; - fn debug_kind($self: &$S::Literal) -> String; - fn symbol($self: &$S::Literal) -> String; - fn suffix($self: &$S::Literal) -> Option; - fn integer(n: &str) -> $S::Literal; - fn typed_integer(n: &str, kind: &str) -> $S::Literal; - fn float(n: &str) -> $S::Literal; - fn f32(n: &str) -> $S::Literal; - fn f64(n: &str) -> $S::Literal; - fn string(string: &str) -> $S::Literal; - fn character(ch: char) -> $S::Literal; - fn byte_string(bytes: &[u8]) -> $S::Literal; - fn span($self: &$S::Literal) -> $S::Span; - fn set_span($self: &mut $S::Literal, span: $S::Span); - fn subspan( - $self: &$S::Literal, - start: Bound, - end: Bound, - ) -> Option<$S::Span>; + ) -> Vec>; }, SourceFile { fn drop($self: $S::SourceFile); @@ -141,11 +113,15 @@ macro_rules! with_api { fn before($self: $S::Span) -> $S::Span; fn after($self: $S::Span) -> $S::Span; fn join($self: $S::Span, other: $S::Span) -> Option<$S::Span>; + fn subspan($self: $S::Span, start: Bound, end: Bound) -> Option<$S::Span>; fn resolved_at($self: $S::Span, at: $S::Span) -> $S::Span; fn source_text($self: $S::Span) -> Option; fn save_span($self: $S::Span) -> usize; fn recover_proc_macro_span(id: usize) -> $S::Span; }, + Symbol { + fn normalize_and_validate_ident(string: &str) -> Result<$S::Symbol, ()>; + }, } }; } @@ -170,6 +146,8 @@ macro_rules! reverse_decode { } } +#[allow(unsafe_code)] +mod arena; #[allow(unsafe_code)] mod buffer; #[forbid(unsafe_code)] @@ -177,6 +155,8 @@ pub mod client; #[allow(unsafe_code)] mod closure; #[forbid(unsafe_code)] +mod fxhash; +#[forbid(unsafe_code)] mod handle; #[macro_use] #[forbid(unsafe_code)] @@ -187,6 +167,8 @@ mod scoped_cell; mod selfless_reify; #[forbid(unsafe_code)] pub mod server; +#[allow(unsafe_code)] +mod symbol; use buffer::Buffer; pub use rpc::PanicMessage; @@ -328,6 +310,7 @@ mark_noop! { u8, usize, Delimiter, + LitKind, Level, LineColumn, Spacing, @@ -357,6 +340,33 @@ rpc_encode_decode!( } ); +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(u8), + ByteStr, + ByteStrRaw(u8), + Err, +} + +rpc_encode_decode!( + enum LitKind { + Byte, + Char, + Integer, + Float, + Str, + StrRaw(n), + ByteStr, + ByteStrRaw(n), + Err, + } +); + macro_rules! mark_compound { (struct $name:ident <$($T:ident),+> { $($field:ident),* $(,)? }) => { impl<$($T: Mark),+> Mark for $name <$($T),+> { @@ -464,16 +474,35 @@ pub struct Punct { compound_traits!(struct Punct { ch, joint, span }); +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct Ident { + pub sym: Symbol, + pub is_raw: bool, + pub span: Span, +} + +compound_traits!(struct Ident { sym, is_raw, span }); + +#[derive(Clone, Eq, PartialEq)] +pub struct Literal { + pub kind: LitKind, + pub symbol: Symbol, + pub suffix: Option, + pub span: Span, +} + +compound_traits!(struct Literal { kind, symbol, suffix, span }); + #[derive(Clone)] -pub enum TokenTree { +pub enum TokenTree { Group(Group), Punct(Punct), - Ident(Ident), - Literal(Literal), + Ident(Ident), + Literal(Literal), } compound_traits!( - enum TokenTree { + enum TokenTree { Group(tt), Punct(tt), Ident(tt), diff --git a/proc_macro/src/bridge/server.rs b/proc_macro/src/bridge/server.rs index ea8b833b4..d46e32595 100644 --- a/proc_macro/src/bridge/server.rs +++ b/proc_macro/src/bridge/server.rs @@ -8,12 +8,11 @@ use super::client::HandleStore; pub trait Types { type FreeFunctions: 'static; type TokenStream: 'static + Clone; - type Ident: 'static + Copy + Eq + Hash; - type Literal: 'static + Clone; type SourceFile: 'static + Clone; type MultiSpan: 'static; type Diagnostic: 'static; type Span: 'static + Copy + Eq + Hash; + type Symbol: 'static; } /// Declare an associated fn of one of the traits below, adding necessary @@ -38,6 +37,12 @@ macro_rules! declare_server_traits { pub trait Server: Types $(+ $name)* { fn globals(&mut self) -> ExpnGlobals; + + /// Intern a symbol received from RPC + fn intern_symbol(ident: &str) -> Self::Symbol; + + /// Recover the string value of a symbol, and invoke a callback with it. + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)); } } } @@ -49,6 +54,12 @@ impl Server for MarkedTypes { fn globals(&mut self) -> ExpnGlobals { <_>::mark(Server::globals(&mut self.0)) } + fn intern_symbol(ident: &str) -> Self::Symbol { + <_>::mark(S::intern_symbol(ident)) + } + fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) { + S::with_symbol_string(symbol.unmark(), f) + } } macro_rules! define_mark_types_impls { @@ -81,11 +92,13 @@ macro_rules! define_dispatcher_impl { pub trait DispatcherTrait { // HACK(eddyb) these are here to allow `Self::$name` to work below. $(type $name;)* + fn dispatch(&mut self, buf: Buffer) -> Buffer; } impl DispatcherTrait for Dispatcher> { $(type $name = as Types>::$name;)* + fn dispatch(&mut self, mut buf: Buffer) -> Buffer { let Dispatcher { handle_store, server } = self; diff --git a/proc_macro/src/bridge/symbol.rs b/proc_macro/src/bridge/symbol.rs new file mode 100644 index 000000000..930c11145 --- /dev/null +++ b/proc_macro/src/bridge/symbol.rs @@ -0,0 +1,205 @@ +//! Client-side interner used for symbols. +//! +//! This is roughly based on the symbol interner from `rustc_span` and the +//! DroplessArena from `rustc_arena`. It is unfortunately a complete +//! copy/re-implementation rather than a dependency as it is difficult to depend +//! on crates from within `proc_macro`, due to it being built at the same time +//! as `std`. +//! +//! If at some point in the future it becomes easier to add dependencies to +//! proc_macro, this module should probably be removed or simplified. + +use std::cell::RefCell; +use std::num::NonZeroU32; +use std::str; + +use super::*; + +/// Handle for a symbol string stored within the Interner. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Symbol(NonZeroU32); + +impl !Send for Symbol {} +impl !Sync for Symbol {} + +impl Symbol { + /// Intern a new `Symbol` + pub(crate) fn new(string: &str) -> Self { + INTERNER.with_borrow_mut(|i| i.intern(string)) + } + + /// Create a new `Symbol` for an identifier. + /// + /// Validates and normalizes before converting it to a symbol. + pub(crate) fn new_ident(string: &str, is_raw: bool) -> Self { + // Fast-path: check if this is a valid ASCII identifier + if Self::is_valid_ascii_ident(string.as_bytes()) { + if is_raw && !Self::can_be_raw(string) { + panic!("`{}` cannot be a raw identifier", string); + } + return Self::new(string); + } + + // Slow-path: If the string is already ASCII we're done, otherwise ask + // our server to do this for us over RPC. + // We don't need to check for identifiers which can't be raw here, + // because all of them are ASCII. + if string.is_ascii() { + Err(()) + } else { + client::Symbol::normalize_and_validate_ident(string) + } + .unwrap_or_else(|_| panic!("`{:?}` is not a valid identifier", string)) + } + + /// Run a callback with the symbol's string value. + pub(crate) fn with(self, f: impl FnOnce(&str) -> R) -> R { + INTERNER.with_borrow(|i| f(i.get(self))) + } + + /// Clear out the thread-local symbol interner, making all previously + /// created symbols invalid such that `with` will panic when called on them. + pub(crate) fn invalidate_all() { + INTERNER.with_borrow_mut(|i| i.clear()); + } + + /// Check if the ident is a valid ASCII identifier. + /// + /// This is a short-circuit which is cheap to implement within the + /// proc-macro client to avoid RPC when creating simple idents, but may + /// return `false` for a valid identifier if it contains non-ASCII + /// characters. + fn is_valid_ascii_ident(bytes: &[u8]) -> bool { + matches!(bytes.first(), Some(b'_' | b'a'..=b'z' | b'A'..=b'Z')) + && bytes[1..] + .iter() + .all(|b| matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')) + } + + // Mimics the behaviour of `Symbol::can_be_raw` from `rustc_span` + fn can_be_raw(string: &str) -> bool { + match string { + "_" | "super" | "self" | "Self" | "crate" => false, + _ => true, + } + } +} + +impl fmt::Debug for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Debug::fmt(s, f)) + } +} + +impl ToString for Symbol { + fn to_string(&self) -> String { + self.with(|s| s.to_owned()) + } +} + +impl fmt::Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.with(|s| fmt::Display::fmt(s, f)) + } +} + +impl Encode for Symbol { + fn encode(self, w: &mut Writer, s: &mut S) { + self.with(|sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, client::HandleStore>> + for Marked +{ + fn decode(r: &mut Reader<'_>, s: &mut client::HandleStore>) -> Self { + Mark::mark(S::intern_symbol(<&str>::decode(r, s))) + } +} + +impl Encode>> + for Marked +{ + fn encode(self, w: &mut Writer, s: &mut client::HandleStore>) { + S::with_symbol_string(&self.unmark(), |sym| sym.encode(w, s)) + } +} + +impl DecodeMut<'_, '_, S> for Symbol { + fn decode(r: &mut Reader<'_>, s: &mut S) -> Self { + Symbol::new(<&str>::decode(r, s)) + } +} + +thread_local! { + static INTERNER: RefCell = RefCell::new(Interner { + arena: arena::Arena::new(), + names: fxhash::FxHashMap::default(), + strings: Vec::new(), + // Start with a base of 1 to make sure that `NonZeroU32` works. + sym_base: NonZeroU32::new(1).unwrap(), + }); +} + +/// Basic interner for a `Symbol`, inspired by the one in `rustc_span`. +struct Interner { + arena: arena::Arena, + // SAFETY: These `'static` lifetimes are actually references to data owned + // by the Arena. This is safe, as we never return them as static references + // from `Interner`. + names: fxhash::FxHashMap<&'static str, Symbol>, + strings: Vec<&'static str>, + // The offset to apply to symbol names stored in the interner. This is used + // to ensure that symbol names are not re-used after the interner is + // cleared. + sym_base: NonZeroU32, +} + +impl Interner { + fn intern(&mut self, string: &str) -> Symbol { + if let Some(&name) = self.names.get(string) { + return name; + } + + let name = Symbol( + self.sym_base + .checked_add(self.strings.len() as u32) + .expect("`proc_macro` symbol name overflow"), + ); + + let string: &str = self.arena.alloc_str(string); + + // SAFETY: we can extend the arena allocation to `'static` because we + // only access these while the arena is still alive. + let string: &'static str = unsafe { &*(string as *const str) }; + self.strings.push(string); + self.names.insert(string, name); + name + } + + /// Read a symbol's value from the store while it is held. + fn get(&self, symbol: Symbol) -> &str { + // NOTE: Subtract out the offset which was added to make the symbol + // nonzero and prevent symbol name re-use. + let name = symbol + .0 + .get() + .checked_sub(self.sym_base.get()) + .expect("use-after-free of `proc_macro` symbol"); + self.strings[name as usize] + } + + /// Clear all symbols from the store, invalidating them such that `get` will + /// panic if they are accessed in the future. + fn clear(&mut self) { + // NOTE: Be careful not to panic here, as we may be called on the client + // when a `catch_unwind` isn't installed. + self.sym_base = self.sym_base.saturating_add(self.strings.len() as u32); + self.names.clear(); + self.strings.clear(); + + // SAFETY: This is cleared after the names and strings tables are + // cleared out, so no references into the arena should remain. + self.arena = arena::Arena::new(); + } +} diff --git a/proc_macro/src/lib.rs b/proc_macro/src/lib.rs index 9ab5061c6..5cf16bdd0 100644 --- a/proc_macro/src/lib.rs +++ b/proc_macro/src/lib.rs @@ -24,10 +24,14 @@ #![feature(staged_api)] #![feature(allow_internal_unstable)] #![feature(decl_macro)] +#![feature(local_key_cell_methods)] +#![feature(maybe_uninit_write_slice)] #![feature(negative_impls)] +#![feature(new_uninit)] #![feature(restricted_std)] #![feature(rustc_attrs)] #![feature(min_specialization)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #[unstable(feature = "proc_macro_internals", issue = "27812")] @@ -211,12 +215,7 @@ pub use quote::{quote, quote_span}; fn tree_to_bridge_tree( tree: TokenTree, -) -> bridge::TokenTree< - bridge::client::TokenStream, - bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, -> { +) -> bridge::TokenTree { match tree { TokenTree::Group(tt) => bridge::TokenTree::Group(tt.0), TokenTree::Punct(tt) => bridge::TokenTree::Punct(tt.0), @@ -240,8 +239,7 @@ struct ConcatTreesHelper { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, + bridge::client::Symbol, >, >, } @@ -367,8 +365,7 @@ pub mod token_stream { bridge::TokenTree< bridge::client::TokenStream, bridge::client::Span, - bridge::client::Ident, - bridge::client::Literal, + bridge::client::Symbol, >, >, ); @@ -1004,6 +1001,13 @@ impl Punct { } } +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] +impl ToString for Punct { + fn to_string(&self) -> String { + self.as_char().to_string() + } +} + /// Prints the punctuation character as a string that should be losslessly convertible /// back into the same character. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] @@ -1041,7 +1045,7 @@ impl PartialEq for char { /// An identifier (`ident`). #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Ident(bridge::client::Ident); +pub struct Ident(bridge::Ident); impl Ident { /// Creates a new `Ident` with the given `string` as well as the specified @@ -1065,7 +1069,11 @@ impl Ident { /// tokens, requires a `Span` to be specified at construction. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn new(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, false)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, false), + is_raw: false, + span: span.0, + }) } /// Same as `Ident::new`, but creates a raw identifier (`r#ident`). @@ -1074,38 +1082,45 @@ impl Ident { /// (e.g. `self`, `super`) are not supported, and will cause a panic. #[stable(feature = "proc_macro_raw_ident", since = "1.47.0")] pub fn new_raw(string: &str, span: Span) -> Ident { - Ident(bridge::client::Ident::new(string, span.0, true)) + Ident(bridge::Ident { + sym: bridge::client::Symbol::new_ident(string, true), + is_raw: true, + span: span.0, + }) } /// Returns the span of this `Ident`, encompassing the entire string returned - /// by [`to_string`](Self::to_string). + /// by [`to_string`](ToString::to_string). #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span of this `Ident`, possibly changing its hygiene context. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0 = self.0.with_span(span.0); + self.0.span = span.0; } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +/// Converts the identifier to a string that should be losslessly convertible +/// back into the same identifier. +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Ident { fn to_string(&self) -> String { - TokenStream::from(TokenTree::from(self.clone())).to_string() + self.0.sym.with(|sym| if self.0.is_raw { ["r#", sym].concat() } else { sym.to_owned() }) } } -/// Prints the identifier as a string that should be losslessly convertible -/// back into the same identifier. +/// Prints the identifier as a string that should be losslessly convertible back +/// into the same identifier. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + if self.0.is_raw { + f.write_str("r#")?; + } + fmt::Display::fmt(&self.0.sym, f) } } @@ -1125,7 +1140,7 @@ impl fmt::Debug for Ident { /// Boolean literals like `true` and `false` do not belong here, they are `Ident`s. #[derive(Clone)] #[stable(feature = "proc_macro_lib2", since = "1.29.0")] -pub struct Literal(bridge::client::Literal); +pub struct Literal(bridge::Literal); macro_rules! suffixed_int_literals { ($($name:ident => $kind:ident,)*) => ($( @@ -1142,7 +1157,12 @@ macro_rules! suffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::typed_integer(&n.to_string(), stringify!($kind))) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: Some(bridge::client::Symbol::new(stringify!($kind))), + span: Span::call_site().0, + }) } )*) } @@ -1164,12 +1184,26 @@ macro_rules! unsuffixed_int_literals { /// below. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn $name(n: $kind) -> Literal { - Literal(bridge::client::Literal::integer(&n.to_string())) + Literal(bridge::Literal { + kind: bridge::LitKind::Integer, + symbol: bridge::client::Symbol::new(&n.to_string()), + suffix: None, + span: Span::call_site().0, + }) } )*) } impl Literal { + fn new(kind: bridge::LitKind, value: &str, suffix: Option<&str>) -> Self { + Literal(bridge::Literal { + kind, + symbol: bridge::client::Symbol::new(value), + suffix: suffix.map(bridge::client::Symbol::new), + span: Span::call_site().0, + }) + } + suffixed_int_literals! { u8_suffixed => u8, u16_suffixed => u16, @@ -1221,7 +1255,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1242,7 +1276,7 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f32(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f32")) } /// Creates a new unsuffixed floating-point literal. @@ -1266,7 +1300,7 @@ impl Literal { if !repr.contains('.') { repr.push_str(".0"); } - Literal(bridge::client::Literal::float(&repr)) + Literal::new(bridge::LitKind::Float, &repr, None) } /// Creates a new suffixed floating-point literal. @@ -1287,37 +1321,49 @@ impl Literal { if !n.is_finite() { panic!("Invalid float literal {n}"); } - Literal(bridge::client::Literal::f64(&n.to_string())) + Literal::new(bridge::LitKind::Float, &n.to_string(), Some("f64")) } /// String literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn string(string: &str) -> Literal { - Literal(bridge::client::Literal::string(string)) + let quoted = format!("{:?}", string); + assert!(quoted.starts_with('"') && quoted.ends_with('"')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Str, symbol, None) } /// Character literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn character(ch: char) -> Literal { - Literal(bridge::client::Literal::character(ch)) + let quoted = format!("{:?}", ch); + assert!(quoted.starts_with('\'') && quoted.ends_with('\'')); + let symbol = "ed[1..quoted.len() - 1]; + Literal::new(bridge::LitKind::Char, symbol, None) } /// Byte string literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn byte_string(bytes: &[u8]) -> Literal { - Literal(bridge::client::Literal::byte_string(bytes)) + let string = bytes + .iter() + .cloned() + .flat_map(std::ascii::escape_default) + .map(Into::::into) + .collect::(); + Literal::new(bridge::LitKind::ByteStr, &string, None) } /// Returns the span encompassing this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn span(&self) -> Span { - Span(self.0.span()) + Span(self.0.span) } /// Configures the span associated for this literal. #[stable(feature = "proc_macro_lib2", since = "1.29.0")] pub fn set_span(&mut self, span: Span) { - self.0.set_span(span.0); + self.0.span = span.0; } /// Returns a `Span` that is a subset of `self.span()` containing only the @@ -1333,7 +1379,50 @@ impl Literal { // was 'c' or whether it was '\u{63}'. #[unstable(feature = "proc_macro_span", issue = "54725")] pub fn subspan>(&self, range: R) -> Option { - self.0.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) + self.0.span.subspan(range.start_bound().cloned(), range.end_bound().cloned()).map(Span) + } + + fn with_symbol_and_suffix(&self, f: impl FnOnce(&str, &str) -> R) -> R { + self.0.symbol.with(|symbol| match self.0.suffix { + Some(suffix) => suffix.with(|suffix| f(symbol, suffix)), + None => f(symbol, ""), + }) + } + + /// Invokes the callback with a `&[&str]` consisting of each part of the + /// literal's representation. This is done to allow the `ToString` and + /// `Display` implementations to borrow references to symbol values, and + /// both be optimized to reduce overhead. + fn with_stringify_parts(&self, f: impl FnOnce(&[&str]) -> R) -> R { + /// Returns a string containing exactly `num` '#' characters. + /// Uses a 256-character source string literal which is always safe to + /// index with a `u8` index. + fn get_hashes_str(num: u8) -> &'static str { + const HASHES: &str = "\ + ################################################################\ + ################################################################\ + ################################################################\ + ################################################################\ + "; + const _: () = assert!(HASHES.len() == 256); + &HASHES[..num as usize] + } + + self.with_symbol_and_suffix(|symbol, suffix| match self.0.kind { + bridge::LitKind::Byte => f(&["b'", symbol, "'", suffix]), + bridge::LitKind::Char => f(&["'", symbol, "'", suffix]), + bridge::LitKind::Str => f(&["\"", symbol, "\"", suffix]), + bridge::LitKind::StrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["r", hashes, "\"", symbol, "\"", hashes, suffix]) + } + bridge::LitKind::ByteStr => f(&["b\"", symbol, "\"", suffix]), + bridge::LitKind::ByteStrRaw(n) => { + let hashes = get_hashes_str(n); + f(&["br", hashes, "\"", symbol, "\"", hashes, suffix]) + } + _ => f(&[symbol, suffix]), + }) } } @@ -1352,19 +1441,17 @@ impl FromStr for Literal { type Err = LexError; fn from_str(src: &str) -> Result { - match bridge::client::Literal::from_str(src) { + match bridge::client::FreeFunctions::literal_from_str(src) { Ok(literal) => Ok(Literal(literal)), Err(()) => Err(LexError), } } } -// N.B., the bridge only provides `to_string`, implement `fmt::Display` -// based on it (the reverse of the usual relationship between the two). -#[stable(feature = "proc_macro_lib", since = "1.15.0")] +#[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl ToString for Literal { fn to_string(&self) -> String { - self.0.to_string() + self.with_stringify_parts(|parts| parts.concat()) } } @@ -1373,14 +1460,26 @@ impl ToString for Literal { #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.to_string()) + self.with_stringify_parts(|parts| { + for part in parts { + fmt::Display::fmt(part, f)?; + } + Ok(()) + }) } } #[stable(feature = "proc_macro_lib2", since = "1.29.0")] impl fmt::Debug for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) + f.debug_struct("Literal") + // format the kind on one line even in {:#?} mode + .field("kind", &format_args!("{:?}", &self.0.kind)) + .field("symbol", &self.0.symbol) + // format `Some("...")` on one line even in {:#?} mode + .field("suffix", &format_args!("{:?}", &self.0.suffix)) + .field("span", &self.0.span) + .finish() } }