diff --git a/compiler/rustc_codegen_gcc/src/builder.rs b/compiler/rustc_codegen_gcc/src/builder.rs index 41f88f119e2a2..225804a765757 100644 --- a/compiler/rustc_codegen_gcc/src/builder.rs +++ b/compiler/rustc_codegen_gcc/src/builder.rs @@ -783,11 +783,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> { // TODO(antoyo) } - fn type_metadata(&mut self, _function: RValue<'gcc>, _typeid: String) { + fn llvm_cfi_type_metadata(&mut self, _function: RValue<'gcc>, _typeid: String) { // Unsupported. } - fn typeid_metadata(&mut self, _typeid: String) -> RValue<'gcc> { + fn llvm_cfi_typeid_metadata(&mut self, _typeid: String) -> RValue<'gcc> { // Unsupported. self.context.new_rvalue_from_int(self.int_type, 0) } diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 88b87951ecd35..8858e7f900b4a 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -621,8 +621,9 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } } - fn type_metadata(&mut self, function: &'ll Value, typeid: String) { - let typeid_metadata = self.typeid_metadata(typeid); + // FIXME(eddyb) this does not belong in `Builder`, it's global. + fn llvm_cfi_type_metadata(&mut self, function: &'ll Value, typeid: String) { + let typeid_metadata = self.llvm_cfi_typeid_metadata(typeid); let v = [self.const_usize(0), typeid_metadata]; unsafe { llvm::LLVMGlobalSetMetadata( @@ -637,7 +638,8 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> { } } - fn typeid_metadata(&mut self, typeid: String) -> Self::Value { + // FIXME(eddyb) this does not belong in `Builder`, it's global. + fn llvm_cfi_typeid_metadata(&mut self, typeid: String) -> Self::Value { unsafe { llvm::LLVMMDStringInContext( self.cx.llcx, diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index b7d760bfbabe1..78bcecbc609fc 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -20,7 +20,7 @@ use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths}; use rustc_middle::ty::{self, Instance, Ty, TypeFoldable}; use rustc_span::source_map::Span; use rustc_span::{sym, Symbol}; -use rustc_symbol_mangling::typeid_for_fnabi; +use rustc_symbol_mangling::llvm_cfi_typeid_for_fn_abi; use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode}; use rustc_target::abi::{self, HasDataLayout, WrappingRange}; use rustc_target::spec::abi::Abi; @@ -908,8 +908,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { // Emit type metadata and checks. // FIXME(rcvalle): Add support for generalized identifiers. // FIXME(rcvalle): Create distinct unnamed MDNodes for internal identifiers. - let typeid = typeid_for_fnabi(bx.tcx(), fn_abi); - let typeid_metadata = bx.typeid_metadata(typeid); + let typeid = llvm_cfi_typeid_for_fn_abi(bx.tcx(), fn_abi); + let typeid_metadata = bx.llvm_cfi_typeid_metadata(typeid); // Test whether the function pointer is associated with the type identifier. let cond = bx.type_test(fn_ptr, typeid_metadata); diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs index 6c139df0a8555..d0ca44d3472a5 100644 --- a/compiler/rustc_codegen_ssa/src/mir/mod.rs +++ b/compiler/rustc_codegen_ssa/src/mir/mod.rs @@ -3,7 +3,7 @@ use rustc_middle::mir; use rustc_middle::mir::interpret::ErrorHandled; use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, TyAndLayout}; use rustc_middle::ty::{self, Instance, Ty, TypeFoldable}; -use rustc_symbol_mangling::typeid_for_fnabi; +use rustc_symbol_mangling::llvm_cfi_typeid_for_fn_abi; use rustc_target::abi::call::{FnAbi, PassMode}; use std::iter; @@ -252,8 +252,8 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( // For backends that support CFI using type membership (i.e., testing whether a given pointer // is associated with a type identifier). if cx.tcx().sess.is_sanitizer_cfi_enabled() { - let typeid = typeid_for_fnabi(cx.tcx(), fn_abi); - bx.type_metadata(llfn, typeid); + let typeid = llvm_cfi_typeid_for_fn_abi(cx.tcx(), fn_abi); + bx.llvm_cfi_type_metadata(llfn, typeid); } } diff --git a/compiler/rustc_codegen_ssa/src/traits/builder.rs b/compiler/rustc_codegen_ssa/src/traits/builder.rs index 37f2bfd3c4fbd..5739dde26fefd 100644 --- a/compiler/rustc_codegen_ssa/src/traits/builder.rs +++ b/compiler/rustc_codegen_ssa/src/traits/builder.rs @@ -160,8 +160,10 @@ pub trait BuilderMethods<'a, 'tcx>: fn range_metadata(&mut self, load: Self::Value, range: WrappingRange); fn nonnull_metadata(&mut self, load: Self::Value); - fn type_metadata(&mut self, function: Self::Function, typeid: String); - fn typeid_metadata(&mut self, typeid: String) -> Self::Value; + + // FIXME(eddyb) these do not belong in `Builder`, they're global. + fn llvm_cfi_type_metadata(&mut self, function: Self::Function, typeid: String); + fn llvm_cfi_typeid_metadata(&mut self, typeid: String) -> Self::Value; fn store(&mut self, val: Self::Value, ptr: Self::Value, align: Align) -> Self::Value; fn store_with_flags( diff --git a/compiler/rustc_const_eval/src/const_eval/mod.rs b/compiler/rustc_const_eval/src/const_eval/mod.rs index 6fd7f707e7e5d..85cc559fe8e36 100644 --- a/compiler/rustc_const_eval/src/const_eval/mod.rs +++ b/compiler/rustc_const_eval/src/const_eval/mod.rs @@ -2,16 +2,16 @@ use std::convert::TryFrom; +use rustc_hir::lang_items::LangItem; use rustc_hir::Mutability; -use rustc_middle::ty::{self, TyCtxt}; -use rustc_middle::{ - mir::{self, interpret::ConstAlloc}, - ty::ScalarInt, -}; +use rustc_middle::mir::{self, interpret::ConstAlloc}; +use rustc_middle::ty::layout::LayoutOf; +use rustc_middle::ty::{self, ScalarInt, Ty, TyCtxt}; use rustc_span::{source_map::DUMMY_SP, symbol::Symbol}; +use rustc_target::abi::Size; use crate::interpret::{ - intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy, + self, intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, InterpResult, MPlaceTy, MemPlaceMeta, Scalar, }; @@ -39,6 +39,87 @@ pub(crate) fn const_caller_location( ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx)) } +pub(crate) fn const_type_id<'tcx>( + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ty: Ty<'tcx>, +) -> ConstValue<'tcx> { + trace!("const_type_id: {}", ty); + + // Compute (logical) `TypeId` field values, before trying to encode them. + let hash = tcx.type_id_hash(ty); + let mangling = tcx.type_id_mangling(param_env.and(ty)).name; + + let mut ecx = mk_eval_cx(tcx, DUMMY_SP, param_env, false); + + let type_id_ty = tcx.type_of(tcx.require_lang_item(LangItem::TypeId, None)); + let type_id_layout = ecx.layout_of(type_id_ty).unwrap(); + + // Encode `TypeId` field values, before putting together the allocation. + let hash_val = Scalar::from_u64(hash); + let mangling_val = { + let mangling_len = u64::try_from(mangling.len()).unwrap(); + let mangling_len_val = Scalar::from_machine_usize(mangling_len, &ecx); + + // The field is `mangling: &TypeManglingStr`, get `TypeManglingStr` from it. + let mangling_field_ty = type_id_layout.field(&ecx, 1).ty; + let type_mangling_str_ty = mangling_field_ty.builtin_deref(true).unwrap().ty; + + // Allocate memory for `TypeManglingStr` struct. + let type_mangling_str_layout = ecx.layout_of(type_mangling_str_ty).unwrap(); + let type_mangling_str_place = { + // NOTE(eddyb) this similar to the `ecx.allocate(...)` used below + // for `type_id_place`, except with an additional size for the + // string bytes (`mangling`) being added to the `TypeManglingStr` + // (which is unsized, using an `extern { type }` tail). + let layout = type_mangling_str_layout; + let size = layout.size + Size::from_bytes(mangling_len); + let ptr = ecx + .allocate_ptr(size, layout.align.abi, interpret::MemoryKind::IntrinsicGlobal) + .unwrap(); + MPlaceTy::from_aligned_ptr(ptr.into(), layout) + }; + + // Initialize `TypeManglingStr` fields. + ecx.write_scalar( + mangling_len_val, + &ecx.mplace_field(&type_mangling_str_place, 0).unwrap().into(), + ) + .unwrap(); + ecx.write_bytes_ptr( + ecx.mplace_field(&type_mangling_str_place, 1).unwrap().ptr, + mangling.bytes(), + ) + .unwrap(); + + // `&TypeManglingStr` has no metadata, thanks to the length being stored + // behind the reference (in the first field of `TypeManglingStr`). + type_mangling_str_place.to_ref(&ecx).to_scalar().unwrap() + }; + + // FIXME(eddyb) everything below would be unnecessary if `ConstValue` could + // hold a pair of `Scalar`s, or if we moved to valtrees. + + // Allocate memory for `TypeId` struct. + let type_id_place = + ecx.allocate(type_id_layout, interpret::MemoryKind::IntrinsicGlobal).unwrap(); + + // Initialize `TypeId` fields. + ecx.write_scalar(hash_val, &ecx.mplace_field(&type_id_place, 0).unwrap().into()).unwrap(); + ecx.write_scalar(mangling_val, &ecx.mplace_field(&type_id_place, 1).unwrap().into()).unwrap(); + + // Convert the `TypeId` allocation from being in `ecx`, to a global `ConstValue`. + if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &type_id_place).is_err() { + bug!("intern_const_alloc_recursive should not error in this case") + } + let (type_id_alloc_id, type_id_offset) = + type_id_place.ptr.into_pointer_or_addr().unwrap().into_parts(); + ConstValue::ByRef { + alloc: tcx.global_alloc(type_id_alloc_id).unwrap_memory(), + offset: type_id_offset, + } +} + /// Convert an evaluated constant to a type level constant pub(crate) fn const_to_valtree<'tcx>( tcx: TyCtxt<'tcx>, diff --git a/compiler/rustc_const_eval/src/interpret/intern.rs b/compiler/rustc_const_eval/src/interpret/intern.rs index f1acb9e41c4ce..5684cf9d96a86 100644 --- a/compiler/rustc_const_eval/src/interpret/intern.rs +++ b/compiler/rustc_const_eval/src/interpret/intern.rs @@ -106,7 +106,7 @@ fn intern_shallow<'rt, 'mir, 'tcx, M: CompileTimeMachine<'mir, 'tcx, const_eval: match kind { MemoryKind::Stack | MemoryKind::Machine(const_eval::MemoryKind::Heap) - | MemoryKind::CallerLocation => {} + | MemoryKind::IntrinsicGlobal => {} } // Set allocation mutability as appropriate. This is used by LLVM to put things into // read-only memory, and also by Miri when evaluating other globals that diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs index 3cc237faf695c..5192363c09344 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics.rs @@ -5,6 +5,7 @@ use std::convert::TryFrom; use rustc_hir::def_id::DefId; +use rustc_hir::lang_items::LangItem; use rustc_middle::mir::{ self, interpret::{ConstValue, GlobalId, InterpResult, Scalar}, @@ -69,7 +70,7 @@ crate fn eval_nullary_intrinsic<'tcx>( } sym::type_id => { ensure_monomorphic_enough(tcx, tp_ty)?; - ConstValue::from_u64(tcx.type_id_hash(tp_ty)) + crate::const_eval::const_type_id(tcx, param_env, tp_ty) } sym::variant_count => match tp_ty.kind() { // Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough. @@ -166,7 +167,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { let ty = match intrinsic_name { sym::pref_align_of | sym::variant_count => self.tcx.types.usize, sym::needs_drop => self.tcx.types.bool, - sym::type_id => self.tcx.types.u64, + sym::type_id => self + .tcx + .type_of(self.tcx.require_lang_item(LangItem::TypeId, Some(self.tcx.span))), sym::type_name => self.tcx.mk_static_str(), _ => bug!("already checked for nullary intrinsics"), }; diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs b/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs index 058903dcdee55..853da09a2a277 100644 --- a/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs +++ b/compiler/rustc_const_eval/src/interpret/intrinsics/caller_location.rs @@ -82,17 +82,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { ) -> MPlaceTy<'tcx, M::PointerTag> { let loc_details = &self.tcx.sess.opts.debugging_opts.location_detail; let file = if loc_details.file { - self.allocate_str(filename.as_str(), MemoryKind::CallerLocation, Mutability::Not) + self.allocate_str(filename.as_str(), MemoryKind::IntrinsicGlobal, Mutability::Not) } else { // FIXME: This creates a new allocation each time. It might be preferable to // perform this allocation only once, and re-use the `MPlaceTy`. // See https://github.com/rust-lang/rust/pull/89920#discussion_r730012398 - self.allocate_str("", MemoryKind::CallerLocation, Mutability::Not) + self.allocate_str("", MemoryKind::IntrinsicGlobal, Mutability::Not) }; let line = if loc_details.line { Scalar::from_u32(line) } else { Scalar::from_u32(0) }; let col = if loc_details.column { Scalar::from_u32(col) } else { Scalar::from_u32(0) }; - // Allocate memory for `CallerLocation` struct. + // Allocate memory for `panic::Location` struct. let loc_ty = self .tcx .type_of(self.tcx.require_lang_item(LangItem::PanicLocation, None)) @@ -100,7 +100,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { let loc_layout = self.layout_of(loc_ty).unwrap(); // This can fail if rustc runs out of memory right here. Trying to emit an error would be // pointless, since that would require allocating more memory than a Location. - let location = self.allocate(loc_layout, MemoryKind::CallerLocation).unwrap(); + let location = self.allocate(loc_layout, MemoryKind::IntrinsicGlobal).unwrap(); // Initialize fields. self.write_immediate(file.to_ref(self), &self.mplace_field(&location, 0).unwrap().into()) diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index a02115a110b7b..de1af4cf98d2e 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -29,8 +29,8 @@ use super::{ pub enum MemoryKind { /// Stack memory. Error if deallocated except during a stack pop. Stack, - /// Memory allocated by `caller_location` intrinsic. Error if ever deallocated. - CallerLocation, + /// Global memory allocated by an intrinsic. Error if ever deallocated. + IntrinsicGlobal, /// Additional memory kinds a machine wishes to distinguish from the builtin ones. Machine(T), } @@ -40,7 +40,7 @@ impl MayLeak for MemoryKind { fn may_leak(self) -> bool { match self { MemoryKind::Stack => false, - MemoryKind::CallerLocation => true, + MemoryKind::IntrinsicGlobal => true, MemoryKind::Machine(k) => k.may_leak(), } } @@ -50,7 +50,7 @@ impl fmt::Display for MemoryKind { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MemoryKind::Stack => write!(f, "stack variable"), - MemoryKind::CallerLocation => write!(f, "caller location"), + MemoryKind::IntrinsicGlobal => write!(f, "global memory (from intrinsic)"), MemoryKind::Machine(m) => write!(f, "{}", m), } } diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index b8df163214435..f0c68591dfcbb 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -217,7 +217,7 @@ language_item_table! { IndexMut(Op), sym::index_mut, index_mut_trait, Target::Trait, GenericRequirement::Exact(1); UnsafeCell, sym::unsafe_cell, unsafe_cell_type, Target::Struct, GenericRequirement::None; - VaList, sym::va_list, va_list, Target::Struct, GenericRequirement::None; + VaList, sym::va_list, va_list_type, Target::Struct, GenericRequirement::None; Deref, sym::deref, deref_trait, Target::Trait, GenericRequirement::Exact(0); DerefMut, sym::deref_mut, deref_mut_trait, Target::Trait, GenericRequirement::Exact(0); @@ -326,6 +326,8 @@ language_item_table! { Range, sym::Range, range_struct, Target::Struct, GenericRequirement::None; RangeToInclusive, sym::RangeToInclusive, range_to_inclusive_struct, Target::Struct, GenericRequirement::None; RangeTo, sym::RangeTo, range_to_struct, Target::Struct, GenericRequirement::None; + + TypeId, sym::TypeId, type_id_struct, Target::Struct, GenericRequirement::Exact(0); } pub enum GenericRequirement { diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index 89761bf4e27a0..e5f88843be879 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -995,14 +995,58 @@ rustc_queries! { desc { |tcx| "generating MIR shim for `{}`", tcx.def_path_str(key.def_id()) } } - /// The `symbol_name` query provides the symbol name for calling a - /// given instance from the local crate. In particular, it will also - /// look up the correct symbol name of instances from upstream crates. + /// The `symbol_name` query provides the symbol name for the given instance. + /// + /// Both `static` and `fn` instances have symbol names, whether definitions + /// (on the Rust side, either from the local crate or an upstream one), or + /// imports in a "foreign block" (`extern {...}`). + /// + /// This symbol name is the canonical one for that instance, and must be + /// used for both linker-level exports (definitions) and imports (uses), + /// of that instance (i.e. it's the sole connection the linker sees). + /// + /// By default, Rust definitions have mangled symbols, to avoid conflicts, + /// and to allow for many instances ("monomorphizations") of generic `fn`s. + /// The exact choice of mangling can vary, and not all type information from + /// the instance may always be present in a form that allows demangling back + /// to a human-readable form. See also the `symbol_mangling_version` query + /// and the `rustc_symbol_mangling` crate. + /// + /// Note however that `fn` lifetime parameters are erased (and so they never + /// participate in monomorphization), meaning mangled Rust symbol names will + /// never contain information about such lifetimes (mangled lifetimes only + /// occur for higher-ranked types, e.g. `foo:: fn(&'a X)>`). query symbol_name(key: ty::Instance<'tcx>) -> ty::SymbolName<'tcx> { desc { "computing the symbol for `{}`", key } cache_on_disk_if { true } } + /// The `type_id_mangling` query provides the Rust mangling of the given type, + /// for use in `TypeId`, as a guard against `type_id_hash` collisions. + /// + /// Unlike the `symbol_name` query, the mangling used for types doesn't vary + /// between crates, and encodes all the type information "structurally" + /// (i.e. lossy encodings such as hashing aren't allowed, as that would + /// effectively defeat the purpose of guarding against hash collisions). + /// + /// If this is used outside of `TypeId`, some additional caveats apply: + /// * it's not a full symbol, so it could collide with unrelated exports, + /// if used directly as a linker symbol without a prefix and/or suffix + /// * mangling features such as compression (e.g. `v0` backrefs) mean that + /// it cannot be trivially embedded in a larger mangled Rust symbol - for + /// that usecase, prefer using `symbol_name` with an instance of a either + /// a custom `InstanceDef`, or at least a generic lang item (`fn`, though + /// associated `const` may work better for a type-dependent `static`) + /// * every Rust mangling erases most lifetimes, with the only exception + /// being those found in higher-ranked types (e.g. `for<'a> fn(&'a X)`) + // + // FIXME(eddyb) this shouldn't be using `ty::SymbolName`, but `&'tcx str`, + // or `ty::SymbolName` should be renamed to "tcx-interned string". + query type_id_mangling(key: ty::ParamEnvAnd<'tcx, Ty<'tcx>>) -> ty::SymbolName<'tcx> { + desc { "computing the type mangling of `{}`", key.value } + cache_on_disk_if { true } + } + query opt_def_kind(def_id: DefId) -> Option { desc { |tcx| "looking up definition kind of `{}`", tcx.def_path_str(def_id) } separate_provide_extern diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index dc4d10f699c75..3f949926e5328 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -266,6 +266,7 @@ symbols! { Ty, TyCtxt, TyKind, + TypeId, Unknown, UnsafeArg, Vec, diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index a5503b04ff629..2e16857f4ce13 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -125,13 +125,10 @@ pub fn symbol_name_for_instance_in_crate<'tcx>( } pub fn provide(providers: &mut Providers) { - *providers = Providers { symbol_name: symbol_name_provider, ..*providers }; + *providers = Providers { symbol_name, type_id_mangling, ..*providers }; } -// The `symbol_name` query provides the symbol name for calling a given -// instance from the local crate. In particular, it will also look up the -// correct symbol name of instances from upstream crates. -fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> { +fn symbol_name<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName<'tcx> { let symbol_name = compute_symbol_name(tcx, instance, || { // This closure determines the instantiating crate for instances that // need an instantiating-crate-suffix for their symbol name, in order @@ -150,9 +147,48 @@ fn symbol_name_provider<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty ty::SymbolName::new(tcx, &symbol_name) } -/// This function computes the typeid for the given function ABI. -pub fn typeid_for_fnabi<'tcx>(tcx: TyCtxt<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> String { - v0::mangle_typeid_for_fnabi(tcx, fn_abi) +fn type_id_mangling<'tcx>( + tcx: TyCtxt<'tcx>, + query: ty::ParamEnvAnd<'tcx, Ty<'tcx>>, +) -> ty::SymbolName<'tcx> { + let (param_env, ty) = query.into_parts(); + ty::SymbolName::new(tcx, &v0::mangle_type(tcx, param_env, ty)) +} + +/// This function computes the LLVM CFI typeid for the given `FnAbi`. +pub fn llvm_cfi_typeid_for_fn_abi<'tcx>( + _tcx: TyCtxt<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, +) -> String { + // LLVM uses type metadata to allow IR modules to aggregate pointers by their types.[1] This + // type metadata is used by LLVM Control Flow Integrity to test whether a given pointer is + // associated with a type identifier (i.e., test type membership). + // + // Clang uses the Itanium C++ ABI's[2] virtual tables and RTTI typeinfo structure name[3] as + // type metadata identifiers for function pointers. The typeinfo name encoding is a + // two-character code (i.e., “TS”) prefixed to the type encoding for the function. + // + // For cross-language LLVM CFI support, a compatible encoding must be used by either + // + // a. Using a superset of types that encompasses types used by Clang (i.e., Itanium C++ ABI's + // type encodings[4]), or at least types used at the FFI boundary. + // b. Reducing the types to the least common denominator between types used by Clang (or at + // least types used at the FFI boundary) and Rust compilers (if even possible). + // c. Creating a new ABI for cross-language CFI and using it for Clang and Rust compilers (and + // possibly other compilers). + // + // Option (b) may weaken the protection for Rust-compiled only code, so it should be provided + // as an alternative to a Rust-specific encoding for when mixing Rust and C and C++ -compiled + // code. Option (c) would require changes to Clang to use the new ABI. + // + // [1] https://llvm.org/docs/TypeMetadata.html + // [2] https://itanium-cxx-abi.github.io/cxx-abi/abi.html + // [3] https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-special-vtables + // [4] https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-type + // + // FIXME(rcvalle): See comment above. + let arg_count = fn_abi.args.len() + fn_abi.ret.is_indirect() as usize; + format!("typeid{}", arg_count) } /// Computes the symbol name for the given instance. This function will call diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs index c8fdf363f053e..6727032cd0deb 100644 --- a/compiler/rustc_symbol_mangling/src/v0.rs +++ b/compiler/rustc_symbol_mangling/src/v0.rs @@ -11,7 +11,6 @@ use rustc_middle::ty::print::{Print, Printer}; use rustc_middle::ty::subst::{GenericArg, GenericArgKind, Subst}; use rustc_middle::ty::{self, FloatTy, Instance, IntTy, Ty, TyCtxt, TypeFoldable, UintTy}; use rustc_span::symbol::kw; -use rustc_target::abi::call::FnAbi; use rustc_target::abi::Integer; use rustc_target::spec::abi::Abi; @@ -28,16 +27,11 @@ pub(super) fn mangle<'tcx>( // FIXME(eddyb) this should ideally not be needed. let substs = tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), instance.substs); - let prefix = "_R"; - let mut cx = &mut SymbolMangler { - tcx, - start_offset: prefix.len(), - paths: FxHashMap::default(), - types: FxHashMap::default(), - consts: FxHashMap::default(), - binders: vec![], - out: String::from(prefix), - }; + let mut cx = &mut SymbolMangler::new(tcx); + + // The `_R` prefix indicates a Rust mangled symbol. + cx.push("_R"); + cx.start_offset = cx.out.len(); // Append `::{shim:...#0}` to shims that can coexist with a non-shim instance. let shim_kind = match instance.def { @@ -58,39 +52,17 @@ pub(super) fn mangle<'tcx>( std::mem::take(&mut cx.out) } -pub(super) fn mangle_typeid_for_fnabi<'tcx>( - _tcx: TyCtxt<'tcx>, - fn_abi: &FnAbi<'tcx, Ty<'tcx>>, +pub(super) fn mangle_type<'tcx>( + tcx: TyCtxt<'tcx>, + param_env: ty::ParamEnv<'tcx>, + ty: Ty<'tcx>, ) -> String { - // LLVM uses type metadata to allow IR modules to aggregate pointers by their types.[1] This - // type metadata is used by LLVM Control Flow Integrity to test whether a given pointer is - // associated with a type identifier (i.e., test type membership). - // - // Clang uses the Itanium C++ ABI's[2] virtual tables and RTTI typeinfo structure name[3] as - // type metadata identifiers for function pointers. The typeinfo name encoding is a - // two-character code (i.e., “TS”) prefixed to the type encoding for the function. - // - // For cross-language LLVM CFI support, a compatible encoding must be used by either - // - // a. Using a superset of types that encompasses types used by Clang (i.e., Itanium C++ ABI's - // type encodings[4]), or at least types used at the FFI boundary. - // b. Reducing the types to the least common denominator between types used by Clang (or at - // least types used at the FFI boundary) and Rust compilers (if even possible). - // c. Creating a new ABI for cross-language CFI and using it for Clang and Rust compilers (and - // possibly other compilers). - // - // Option (b) may weaken the protection for Rust-compiled only code, so it should be provided - // as an alternative to a Rust-specific encoding for when mixing Rust and C and C++ -compiled - // code. Option (c) would require changes to Clang to use the new ABI. - // - // [1] https://llvm.org/docs/TypeMetadata.html - // [2] https://itanium-cxx-abi.github.io/cxx-abi/abi.html - // [3] https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-special-vtables - // [4] https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-type - // - // FIXME(rcvalle): See comment above. - let arg_count = fn_abi.args.len() + fn_abi.ret.is_indirect() as usize; - format!("typeid{}", arg_count) + let param_env = param_env.with_reveal_all_normalized(tcx); + let ty = tcx.normalize_erasing_regions(param_env, ty); + + let mut cx = SymbolMangler::new(tcx); + cx.print_type(ty).unwrap(); + cx.out } struct BinderLevel { @@ -121,6 +93,18 @@ struct SymbolMangler<'tcx> { } impl<'tcx> SymbolMangler<'tcx> { + fn new(tcx: TyCtxt<'tcx>) -> Self { + Self { + tcx, + start_offset: 0, + paths: FxHashMap::default(), + types: FxHashMap::default(), + consts: FxHashMap::default(), + binders: vec![], + out: String::new(), + } + } + fn push(&mut self, s: &str) { self.out.push_str(s); } diff --git a/compiler/rustc_typeck/src/check/intrinsic.rs b/compiler/rustc_typeck/src/check/intrinsic.rs index cd6b1115ed806..1d8c847ef6126 100644 --- a/compiler/rustc_typeck/src/check/intrinsic.rs +++ b/compiler/rustc_typeck/src/check/intrinsic.rs @@ -9,6 +9,7 @@ use crate::require_same_types; use rustc_errors::struct_span_err; use rustc_hir as hir; +use rustc_hir::lang_items::LangItem; use rustc_middle::traits::{ObligationCause, ObligationCauseCode}; use rustc_middle::ty::subst::Subst; use rustc_middle::ty::{self, TyCtxt}; @@ -120,18 +121,17 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { .copied(), ); let mk_va_list_ty = |mutbl| { - tcx.lang_items().va_list().map(|did| { - let region = tcx.mk_region(ty::ReLateBound( - ty::INNERMOST, - ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) }, - )); - let env_region = tcx.mk_region(ty::ReLateBound( - ty::INNERMOST, - ty::BoundRegion { var: ty::BoundVar::from_u32(1), kind: ty::BrEnv }, - )); - let va_list_ty = tcx.type_of(did).subst(tcx, &[region.into()]); - (tcx.mk_ref(env_region, ty::TypeAndMut { ty: va_list_ty, mutbl }), va_list_ty) - }) + let did = tcx.require_lang_item(LangItem::VaList, Some(it.span)); + let region = tcx.mk_region(ty::ReLateBound( + ty::INNERMOST, + ty::BoundRegion { var: ty::BoundVar::from_u32(0), kind: ty::BrAnon(0) }, + )); + let env_region = tcx.mk_region(ty::ReLateBound( + ty::INNERMOST, + ty::BoundRegion { var: ty::BoundVar::from_u32(1), kind: ty::BrEnv }, + )); + let va_list_ty = tcx.type_of(did).subst(tcx, &[region.into()]); + (tcx.mk_ref(env_region, ty::TypeAndMut { ty: va_list_ty, mutbl }), va_list_ty) }; let (n_tps, n_lts, inputs, output, unsafety) = if name_str.starts_with("atomic_") { @@ -191,7 +191,9 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { sym::needs_drop => (1, Vec::new(), tcx.types.bool), sym::type_name => (1, Vec::new(), tcx.mk_static_str()), - sym::type_id => (1, Vec::new(), tcx.types.u64), + sym::type_id => { + (1, Vec::new(), tcx.type_of(tcx.require_lang_item(LangItem::TypeId, Some(it.span)))) + } sym::offset | sym::arith_offset => ( 1, vec![ @@ -366,23 +368,21 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { ) } - sym::va_start | sym::va_end => match mk_va_list_ty(hir::Mutability::Mut) { - Some((va_list_ref_ty, _)) => (0, vec![va_list_ref_ty], tcx.mk_unit()), - None => bug!("`va_list` language item needed for C-variadic intrinsics"), - }, + sym::va_start | sym::va_end => { + let (va_list_ref_ty, _) = mk_va_list_ty(hir::Mutability::Mut); + (0, vec![va_list_ref_ty], tcx.mk_unit()) + } - sym::va_copy => match mk_va_list_ty(hir::Mutability::Not) { - Some((va_list_ref_ty, va_list_ty)) => { - let va_list_ptr_ty = tcx.mk_mut_ptr(va_list_ty); - (0, vec![va_list_ptr_ty, va_list_ref_ty], tcx.mk_unit()) - } - None => bug!("`va_list` language item needed for C-variadic intrinsics"), - }, + sym::va_copy => { + let (va_list_ref_ty, va_list_ty) = mk_va_list_ty(hir::Mutability::Not); + let va_list_ptr_ty = tcx.mk_mut_ptr(va_list_ty); + (0, vec![va_list_ptr_ty, va_list_ref_ty], tcx.mk_unit()) + } - sym::va_arg => match mk_va_list_ty(hir::Mutability::Mut) { - Some((va_list_ref_ty, _)) => (1, vec![va_list_ref_ty], param(0)), - None => bug!("`va_list` language item needed for C-variadic intrinsics"), - }, + sym::va_arg => { + let (va_list_ref_ty, _) = mk_va_list_ty(hir::Mutability::Mut); + (1, vec![va_list_ref_ty], param(0)) + } sym::nontemporal_store => (1, vec![tcx.mk_mut_ptr(param(0)), param(0)], tcx.mk_unit()), diff --git a/library/core/src/any.rs b/library/core/src/any.rs index 3b15ab1e6895b..087e9635b6b27 100644 --- a/library/core/src/any.rs +++ b/library/core/src/any.rs @@ -84,8 +84,9 @@ #![stable(feature = "rust1", since = "1.0.0")] -use crate::fmt; -use crate::intrinsics; +use crate::cmp::Ordering; +use crate::hash::{Hash, Hasher}; +use crate::{fmt, intrinsics, ptr, slice, str}; /////////////////////////////////////////////////////////////////////////////// // Any trait @@ -582,6 +583,74 @@ impl dyn Any + Send + Sync { // TypeID and its methods /////////////////////////////////////////////////////////////////////////////// +extern "C" { + type OpaqueStrBytes; +} + +// NOTE(eddyb) these are needed because `extern { type }`s don't implement any +// auto traits, and their absence ends up propagating to `TypeId` otherwise. +unsafe impl Send for OpaqueStrBytes {} +unsafe impl Sync for OpaqueStrBytes {} +impl crate::panic::RefUnwindSafe for OpaqueStrBytes {} + +/// Length-prefixed string containing the Rust mangling of a type, for `TypeId`. +/// +/// Equality checks have a fast-path for the "equal address" case, to reduce the +/// cost of `TypeId` equality checking. Note that comparing unequal `TypeId`s will +/// already almost never check the mangling, as the `hash` field is compared first. +struct TypeManglingStr { + len: usize, + bytes: OpaqueStrBytes, +} + +impl TypeManglingStr { + #[inline] + fn as_str(&self) -> &str { + // SAFETY: `&TypeManglingStr` was allocated by the `type_id` intrinsic, + // with valid UTF-8 starting at `&self.bytes` and `self.len` bytes long. + unsafe { + str::from_utf8_unchecked(slice::from_raw_parts( + ptr::addr_of!(self.bytes).cast::(), + self.len, + )) + } + } +} + +impl PartialEq for TypeManglingStr { + #[inline] + fn eq(&self, other: &Self) -> bool { + // FIXME(eddyb) should `likely` be used around the `ptr::eq`? + if ptr::eq(self, other) { + // Avoid comparing the bytes, if the address is the same. + true + } else { + self.as_str() == other.as_str() + } + } +} + +impl Eq for TypeManglingStr {} + +impl PartialOrd for TypeManglingStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for TypeManglingStr { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + if ptr::eq(self, other) { + // Avoid comparing the bytes, if the address is the same. + Ordering::Equal + } else { + self.as_str().cmp(other.as_str()) + } + } +} + /// A `TypeId` represents a globally unique identifier for a type. /// /// Each `TypeId` is an opaque object which does not allow inspection of what's @@ -594,10 +663,36 @@ impl dyn Any + Send + Sync { /// While `TypeId` implements `Hash`, `PartialOrd`, and `Ord`, it is worth /// noting that the hashes and ordering will vary between Rust releases. Beware /// of relying on them inside of your code! -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +#[cfg_attr(not(bootstrap), lang = "TypeId")] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] #[stable(feature = "rust1", since = "1.0.0")] pub struct TypeId { - t: u64, + hash: u64, + + // NOTE(eddyb) this follows `hash`, so that the auto-derived `PartialEq` + // doesn't waste time comparing `mangling`, when `hash` differs. + #[cfg(not(bootstrap))] + mangling: &'static TypeManglingStr, +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for TypeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // FIXME(eddyb) perhaps attempt demangling `mangling`. + f.debug_struct("TypeId").finish_non_exhaustive() + } +} + +// NOTE(eddyb) this avoids hashing `mangling` for performance reasons, as the +// `hash` field is sufficient for a lossy hashing operation, but there is a +// second reason for doing it: code in the wild expects a single `write_u64` +// call, and hashing the entirety of `mangling.as_str()` would break that. +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for TypeId { + #[inline] + fn hash(&self, state: &mut H) { + state.write_u64(self.hash); + } } impl TypeId { @@ -620,7 +715,14 @@ impl TypeId { #[stable(feature = "rust1", since = "1.0.0")] #[rustc_const_unstable(feature = "const_type_id", issue = "77125")] pub const fn of() -> TypeId { - TypeId { t: intrinsics::type_id::() } + #[cfg(bootstrap)] + { + TypeId { hash: intrinsics::type_id::() } + } + #[cfg(not(bootstrap))] + { + intrinsics::type_id::() + } } } diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index 8e02ca8431744..bf7ca7045f2ec 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -848,6 +848,11 @@ extern "rust-intrinsic" { /// /// The stabilized version of this intrinsic is [`core::any::TypeId::of`]. #[rustc_const_unstable(feature = "const_type_id", issue = "77125")] + #[cfg(not(bootstrap))] + pub fn type_id() -> crate::any::TypeId; + + #[rustc_const_unstable(feature = "const_type_id", issue = "77125")] + #[cfg(bootstrap)] pub fn type_id() -> u64; /// A guard for unsafe functions that cannot ever be executed if `T` is uninhabited: diff --git a/src/test/ui/const-generics/issues/issue-90318.stderr b/src/test/ui/const-generics/issues/issue-90318.stderr index c8690ecd0da7e..602896c56e6b8 100644 --- a/src/test/ui/const-generics/issues/issue-90318.stderr +++ b/src/test/ui/const-generics/issues/issue-90318.stderr @@ -18,7 +18,7 @@ LL | If<{ TypeId::of::() != TypeId::of::<()>() }>: True, note: impl defined here, but it is not `const` --> $SRC_DIR/core/src/any.rs:LL:COL | -LL | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +LL | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ^^^^^^^^^ = note: calls in constants are limited to constant functions, tuple structs and tuple variants = note: this error originates in the derive macro `PartialEq` (in Nightly builds, run with -Z macro-backtrace for more info) @@ -43,7 +43,7 @@ LL | If<{ TypeId::of::() != TypeId::of::<()>() }>: True, note: impl defined here, but it is not `const` --> $SRC_DIR/core/src/any.rs:LL:COL | -LL | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +LL | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ^^^^^^^^^ = note: calls in constants are limited to constant functions, tuple structs and tuple variants = note: this error originates in the derive macro `PartialEq` (in Nightly builds, run with -Z macro-backtrace for more info) diff --git a/src/test/ui/consts/issue-73976-monomorphic.rs b/src/test/ui/consts/issue-73976-monomorphic.rs index 7706a97f23b48..28f95d67d97cf 100644 --- a/src/test/ui/consts/issue-73976-monomorphic.rs +++ b/src/test/ui/consts/issue-73976-monomorphic.rs @@ -1,9 +1,10 @@ -// check-pass -// -// This test is complement to the test in issue-73976-polymorphic.rs. -// In that test we ensure that polymorphic use of type_id and type_name in patterns -// will be properly rejected. This test will ensure that monomorphic use of these -// would not be wrongly rejected in patterns. +// NOTE(eddyb) this was the original comment before this test started erroring: +// > // check-pass +// > // +// > // This test is complement to the test in issue-73976-polymorphic.rs. +// > // In that test we ensure that polymorphic use of type_id and type_name in patterns +// > // will be properly rejected. This test will ensure that monomorphic use of these +// > // would not be wrongly rejected in patterns. #![feature(const_type_id)] #![feature(const_type_name)] @@ -18,6 +19,7 @@ impl GetTypeId { const fn check_type_id() -> bool { matches!(GetTypeId::::VALUE, GetTypeId::::VALUE) + //~^ ERROR to use a constant of type } pub struct GetTypeNameLen(T); diff --git a/src/test/ui/consts/issue-73976-monomorphic.stderr b/src/test/ui/consts/issue-73976-monomorphic.stderr new file mode 100644 index 0000000000000..22b534b1175c9 --- /dev/null +++ b/src/test/ui/consts/issue-73976-monomorphic.stderr @@ -0,0 +1,8 @@ +error: to use a constant of type `any::TypeManglingStr` in a pattern, `any::TypeManglingStr` must be annotated with `#[derive(PartialEq, Eq)]` + --> $DIR/issue-73976-monomorphic.rs:21:37 + | +LL | matches!(GetTypeId::::VALUE, GetTypeId::::VALUE) + | ^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to previous error + diff --git a/src/test/ui/extern/extern-types-not-sync-send.stderr b/src/test/ui/extern/extern-types-not-sync-send.stderr index 7865ddeda34f5..1714f1152d2eb 100644 --- a/src/test/ui/extern/extern-types-not-sync-send.stderr +++ b/src/test/ui/extern/extern-types-not-sync-send.stderr @@ -5,6 +5,7 @@ LL | assert_sync::(); | ^ `A` cannot be shared between threads safely | = help: the trait `Sync` is not implemented for `A` + = help: the trait `Sync` is implemented for `any::OpaqueStrBytes` note: required by a bound in `assert_sync` --> $DIR/extern-types-not-sync-send.rs:9:28 | @@ -18,6 +19,7 @@ LL | assert_send::(); | ^ `A` cannot be sent between threads safely | = help: the trait `Send` is not implemented for `A` + = help: the trait `Send` is implemented for `any::OpaqueStrBytes` note: required by a bound in `assert_send` --> $DIR/extern-types-not-sync-send.rs:10:28 | diff --git a/src/test/ui/fmt/send-sync.stderr b/src/test/ui/fmt/send-sync.stderr index 7c7a3c8846b9b..fe5c903860e2c 100644 --- a/src/test/ui/fmt/send-sync.stderr +++ b/src/test/ui/fmt/send-sync.stderr @@ -5,6 +5,7 @@ LL | send(format_args!("{:?}", c)); | ^^^^ `core::fmt::Opaque` cannot be shared between threads safely | = help: within `[ArgumentV1<'_>]`, the trait `Sync` is not implemented for `core::fmt::Opaque` + = help: the trait `Sync` is implemented for `any::OpaqueStrBytes` = note: required because it appears within the type `&core::fmt::Opaque` = note: required because it appears within the type `ArgumentV1<'_>` = note: required because it appears within the type `[ArgumentV1<'_>]` @@ -23,6 +24,7 @@ LL | sync(format_args!("{:?}", c)); | ^^^^ `core::fmt::Opaque` cannot be shared between threads safely | = help: within `Arguments<'_>`, the trait `Sync` is not implemented for `core::fmt::Opaque` + = help: the trait `Sync` is implemented for `any::OpaqueStrBytes` = note: required because it appears within the type `&core::fmt::Opaque` = note: required because it appears within the type `ArgumentV1<'_>` = note: required because it appears within the type `[ArgumentV1<'_>]`