Skip to content

Commit

Permalink
Encode strings as Symbol in value tree representation
Browse files Browse the repository at this point in the history
  • Loading branch information
oli-obk committed Apr 1, 2021
1 parent bdfe567 commit fc2244a
Show file tree
Hide file tree
Showing 28 changed files with 120 additions and 113 deletions.
23 changes: 14 additions & 9 deletions compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,8 @@ pub(crate) fn codegen_valtree<'tcx>(
) -> CValue<'tcx> {
let layout = fx.layout_of(ty);
let tcx = fx.tcx;
let mut encode_slice = |valtree: ty::ValTree<'_>| {
let s: Vec<u8> = valtree
.unwrap_branch()
.iter()
.map(|b| u8::try_from(b.unwrap_leaf()).unwrap())
.collect();
let alloc_id = fx.tcx.allocate_bytes(&s);
let mut encode_slice = |s| {
let alloc_id = fx.tcx.allocate_bytes(s);

let ptr = pointer_for_alloc_id(fx, alloc_id, Mutability::Not).get_addr(fx);
let len = fx.bcx.ins().iconst(fx.pointer_type, i64::try_from(s.len()).unwrap());
Expand All @@ -169,8 +164,18 @@ pub(crate) fn codegen_valtree<'tcx>(

match *ty.kind() {
ty::Ref(_, pointee, _) => match *pointee.kind() {
ty::Str => encode_slice(valtree),
ty::Slice(elem_ty) if elem_ty == tcx.types.u8 => encode_slice(valtree),
ty::Str => {
let s = valtree.unwrap_str().as_str();
encode_slice(s.as_bytes())
}
ty::Slice(elem_ty) if elem_ty == tcx.types.u8 => {
let s: Vec<u8> = valtree
.unwrap_branch()
.iter()
.map(|b| u8::try_from(b.unwrap_leaf()).unwrap())
.collect();
encode_slice(&s)
}
ty::Array(elem_ty, _) if elem_ty == tcx.types.u8 => {
let s: Vec<u8> = valtree
.unwrap_branch()
Expand Down
23 changes: 14 additions & 9 deletions compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,8 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
return OperandRef::new_zst(bx, layout);
}

let encode_slice = |valtree: ValTree<'_>| {
let s: Vec<u8> = valtree
.unwrap_branch()
.iter()
.map(|b| u8::try_from(b.unwrap_leaf()).unwrap())
.collect();
let alloc_id = bx.tcx().allocate_bytes(&s);
let encode_slice = |s| {
let alloc_id = bx.tcx().allocate_bytes(s);

let a_scalar = match layout.abi {
Abi::ScalarPair(ref a, _) => a,
Expand All @@ -104,8 +99,18 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
let val = match val {
Err(valtree) => match ty.kind() {
ty::Ref(_, pointee, _) => match *pointee.kind() {
ty::Str => encode_slice(valtree),
ty::Slice(elem_ty) if elem_ty == bx.tcx().types.u8 => encode_slice(valtree),
ty::Str => {
let s = valtree.unwrap_str().as_str();
encode_slice(s.as_bytes())
}
ty::Slice(elem_ty) if elem_ty == bx.tcx().types.u8 => {
let s: Vec<u8> = valtree
.unwrap_branch()
.iter()
.map(|b| u8::try_from(b.unwrap_leaf()).unwrap())
.collect();
encode_slice(&s)
}
ty::Array(elem_ty, _) if elem_ty == bx.tcx().types.u8 => {
let s: Vec<u8> = valtree
.unwrap_branch()
Expand Down
18 changes: 14 additions & 4 deletions compiler/rustc_middle/src/ty/consts/valtree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::ty::TyCtxt;

use super::ScalarInt;
use rustc_macros::HashStable;
use rustc_span::Symbol;

#[derive(Copy, Clone, Debug, Hash, TyEncodable, TyDecodable, Eq, PartialEq, Ord, PartialOrd)]
#[derive(HashStable)]
Expand Down Expand Up @@ -30,33 +31,42 @@ pub enum ValTree<'tcx> {
/// Enums are represented by storing their discriminant as a field, followed by all
/// the fields of the variant.
///
/// `&str` and `&[T]` are encoded as if they were `&[T;N]`. So there is no wide pointer
/// `&[T]` are encoded as if they were `&[T;N]`. So there is no wide pointer
/// or metadata encoded, instead the length is taken directly from the number of elements
/// in the branch.
Branch(&'tcx [ValTree<'tcx>]),
/// `&str` could be encoded as a `Branch`, but the back and forth between valtree
/// representations and other representations of `str` is expensive.
Str(Symbol),
}

impl ValTree<'tcx> {
pub fn zst() -> Self {
Self::Branch(&[])
}
pub fn unwrap_str(self) -> Symbol {
match self {
Self::Str(s) => s,
_ => bug!("expected str, got {:?}", self),
}
}
pub fn unwrap_leaf(self) -> ScalarInt {
match self {
Self::Leaf(s) => s,
Self::Branch(branch) => bug!("expected leaf, got {:?}", branch),
_ => bug!("expected leaf, got {:?}", self),
}
}
pub fn unwrap_branch(self) -> &'tcx [Self] {
match self {
Self::Leaf(s) => bug!("expected branch, got {:?}", s),
Self::Branch(branch) => branch,
_ => bug!("expected branch, got {:?}", self),
}
}
#[inline]
pub fn try_to_scalar_int(self) -> Option<ScalarInt> {
match self {
Self::Leaf(s) => Some(s),
Self::Branch(_) => None,
Self::Str(_) | Self::Branch(_) => None,
}
}

Expand Down
9 changes: 2 additions & 7 deletions compiler/rustc_middle/src/ty/print/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1143,13 +1143,7 @@ pub trait PrettyPrinter<'tcx>:
match ty.kind() {
ty::Ref(_, pointee, _) => match *pointee.kind() {
ty::Str => {
let s: Vec<u8> = ct
.unwrap_branch()
.iter()
.map(|b| u8::try_from(b.unwrap_leaf()).unwrap())
.collect();
let s = String::from_utf8(s).unwrap();
p!(write("{:?}", s));
p!(write("{:?}", ct.unwrap_str().as_str()));
Ok(self)
}
// Special case byte strings
Expand All @@ -1174,6 +1168,7 @@ pub trait PrettyPrinter<'tcx>:
_ => match ct {
ty::ValTree::Leaf(int) => self.pretty_print_const_scalar_int(int, ty, print_ty),
ty::ValTree::Branch(branches) => bug!("{}: {:?}", ty, branches),
ty::ValTree::Str(s) => bug!("{}: {}", ty, s),
},
}
}
Expand Down
20 changes: 17 additions & 3 deletions compiler/rustc_mir/src/const_eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use rustc_middle::{
ty::{self, Ty, TyCtxt},
};
use rustc_span::{source_map::DUMMY_SP, symbol::Symbol};
use rustc_target::abi::{LayoutOf, VariantIdx};
use rustc_target::abi::{LayoutOf, Size, VariantIdx};

use crate::interpret::{
intern_const_alloc_recursive, ConstValue, InternKind, InterpCx, MPlaceTy, MemPlaceMeta, Scalar,
Expand Down Expand Up @@ -123,8 +123,22 @@ fn const_to_valtree<'tcx>(
branches(ecx, n.try_into().unwrap(), None, &mplace)
};
match mplace.layout.ty.kind() {
// str slices are encoded as a `u8` array.
ty::Str => array(ecx.tcx.types.u8),
ty::Str => {
let n = scalar.to_machine_usize(ecx).unwrap();
if n > 0 {
let ptr = mplace.ptr.assert_ptr();
let s = ecx.memory.get_raw(ptr.alloc_id).unwrap().get_bytes(
ecx,
ptr,
Size::from_bytes(n),
).unwrap();
let s = std::str::from_utf8(s).unwrap();
let s = Symbol::intern(s);
Ok(Some(ty::ValTree::Str(s)))
} else {
Ok(Some(ty::ValTree::Str(Symbol::intern(""))))
}
},
// Slices are encoded as an array
ty::Slice(elem_ty) => array(elem_ty),
// No other unsized types are structural match.
Expand Down
14 changes: 5 additions & 9 deletions compiler/rustc_mir/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -593,18 +593,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
Ok(OpTy { op, layout })
}
ty::Str => {
let bytes: Vec<u8> = val
.unwrap_branch()
.iter()
.map(|vt| u8::try_from(vt.unwrap_leaf()).unwrap())
.collect();
let alloc_id = self.tcx.allocate_bytes(&bytes);
let s = val.unwrap_str().as_str();
let alloc_id = self.tcx.allocate_bytes(s.as_bytes());
let ptr = self.global_base_pointer(alloc_id.into())?;
let layout =
from_known_layout(self.tcx, self.param_env, layout, || self.layout_of(ty))?;
let op = Operand::Immediate(Immediate::new_slice(
ptr.into(),
u64::try_from(bytes.len()).unwrap(),
u64::try_from(s.len()).unwrap(),
self,
));
Ok(OpTy { op, layout })
Expand All @@ -620,11 +616,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ValTree::Leaf(int) => {
self.const_val_to_op(ConstValue::Scalar(int.into()), ty, layout)
}
ValTree::Branch(branches) => span_bug!(
ValTree::Str(_) | ValTree::Branch(_) => span_bug!(
self.cur_span(),
"complex valtrees of type {} are unimplemented: {:?}",
ty,
branches
val
),
},
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_mir_build/src/thir/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ crate fn lit_to_const<'tcx>(
};

let val = match (lit, &ty.kind()) {
(ast::LitKind::Str(s, _), ty::Ref(..)) => byte_array(s.as_str().as_bytes()),
(ast::LitKind::Str(s, _), ty::Ref(..)) => ValTree::Str(*s),
(ast::LitKind::ByteStr(data), ty::Ref(..)) => byte_array(data),
(ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => {
ValTree::Leaf(ScalarInt::from_uint(*n, Size::from_bytes(1)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn main() -> () {
_5 = foo(move _6) -> bb1; // scope 4 at $DIR/array-index-is-temporary.rs:16:21: 16:27
// mir::Constant
// + span: $DIR/array-index-is-temporary.rs:16:21: 16:24
// + literal: Const { ty: unsafe fn(*mut usize) -> u32 {foo}, val: Value(Scalar(<ZST>)) }
// + literal: Const { ty: unsafe fn(*mut usize) -> u32 {foo}, val: Value(Leaf(<ZST>)) }
}

bb1: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ fn main() -> () {
StorageLive(_1); // scope 0 at $DIR/const_allocation.rs:8:5: 8:8
StorageLive(_2); // scope 0 at $DIR/const_allocation.rs:8:5: 8:8
_2 = const {alloc0: &&[(Option<i32>, &[&str])]}; // scope 0 at $DIR/const_allocation.rs:8:5: 8:8
// ty::Const
// + ty: &&[(std::option::Option<i32>, &[&str])]
// + val: Value(Scalar(alloc0))
// mir::Constant
// + span: $DIR/const_allocation.rs:8:5: 8:8
// + literal: Const { ty: &&[(std::option::Option<i32>, &[&str])], val: Value(Scalar(alloc0)) }
// + literal: Const { ty: &&[(Option<i32>, &[&str])], val: Value(Scalar(alloc0)) }
_1 = (*_2); // scope 0 at $DIR/const_allocation.rs:8:5: 8:8
StorageDead(_2); // scope 0 at $DIR/const_allocation.rs:8:8: 8:9
StorageDead(_1); // scope 0 at $DIR/const_allocation.rs:8:8: 8:9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,9 @@ fn main() -> () {
StorageLive(_1); // scope 0 at $DIR/const_allocation2.rs:5:5: 5:8
StorageLive(_2); // scope 0 at $DIR/const_allocation2.rs:5:5: 5:8
_2 = const {alloc0: &&[(Option<i32>, &[&u8])]}; // scope 0 at $DIR/const_allocation2.rs:5:5: 5:8
// ty::Const
// + ty: &&[(std::option::Option<i32>, &[&u8])]
// + val: Value(Scalar(alloc0))
// mir::Constant
// + span: $DIR/const_allocation2.rs:5:5: 5:8
// + literal: Const { ty: &&[(std::option::Option<i32>, &[&u8])], val: Value(Scalar(alloc0)) }
// + literal: Const { ty: &&[(Option<i32>, &[&u8])], val: Value(Scalar(alloc0)) }
_1 = (*_2); // scope 0 at $DIR/const_allocation2.rs:5:5: 5:8
StorageDead(_2); // scope 0 at $DIR/const_allocation2.rs:5:8: 5:9
StorageDead(_1); // scope 0 at $DIR/const_allocation2.rs:5:8: 5:9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ fn main() -> () {
StorageLive(_1); // scope 0 at $DIR/const_allocation3.rs:5:5: 5:8
StorageLive(_2); // scope 0 at $DIR/const_allocation3.rs:5:5: 5:8
_2 = const {alloc0: &&Packed}; // scope 0 at $DIR/const_allocation3.rs:5:5: 5:8
// ty::Const
// + ty: &&Packed
// + val: Value(Scalar(alloc0))
// mir::Constant
// + span: $DIR/const_allocation3.rs:5:5: 5:8
// + literal: Const { ty: &&Packed, val: Value(Scalar(alloc0)) }
Expand Down
4 changes: 2 additions & 2 deletions src/test/mir-opt/const_debuginfo.main.ConstDebugInfo.diff
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@
_9 = const "hello, world!"; // scope 4 at $DIR/const_debuginfo.rs:14:13: 14:28
// ty::Const
// + ty: &str
// + val: Value(Branch([Leaf(0x68), Leaf(0x65), Leaf(0x6c), Leaf(0x6c), Leaf(0x6f), Leaf(0x2c), Leaf(0x20), Leaf(0x77), Leaf(0x6f), Leaf(0x72), Leaf(0x6c), Leaf(0x64), Leaf(0x21)]))
// + val: Value(Str("hello, world!"))
// mir::Constant
// + span: $DIR/const_debuginfo.rs:14:13: 14:28
// + literal: Const { ty: &str, val: Value(Branch([Leaf(0x68), Leaf(0x65), Leaf(0x6c), Leaf(0x6c), Leaf(0x6f), Leaf(0x2c), Leaf(0x20), Leaf(0x77), Leaf(0x6f), Leaf(0x72), Leaf(0x6c), Leaf(0x64), Leaf(0x21)])) }
// + literal: Const { ty: &str, val: Value(Str("hello, world!")) }
StorageLive(_10); // scope 5 at $DIR/const_debuginfo.rs:16:9: 16:10
(_10.0: bool) = const true; // scope 5 at $DIR/const_debuginfo.rs:16:13: 16:34
(_10.1: bool) = const false; // scope 5 at $DIR/const_debuginfo.rs:16:13: 16:34
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
// + literal: Const { ty: fn(&str) -> ! {std::rt::begin_panic::<&str>}, val: Value(Leaf(<ZST>)) }
// ty::Const
// + ty: &str
// + val: Value(Branch([Leaf(0x65), Leaf(0x78), Leaf(0x70), Leaf(0x6c), Leaf(0x69), Leaf(0x63), Leaf(0x69), Leaf(0x74), Leaf(0x20), Leaf(0x70), Leaf(0x61), Leaf(0x6e), Leaf(0x69), Leaf(0x63)]))
// + val: Value(Str("explicit panic"))
// mir::Constant
// + span: $SRC_DIR/std/src/panic.rs:LL:COL
// + literal: Const { ty: &str, val: Value(Branch([Leaf(0x65), Leaf(0x78), Leaf(0x70), Leaf(0x6c), Leaf(0x69), Leaf(0x63), Leaf(0x69), Leaf(0x74), Leaf(0x20), Leaf(0x70), Leaf(0x61), Leaf(0x6e), Leaf(0x69), Leaf(0x63)])) }
// + literal: Const { ty: &str, val: Value(Str("explicit panic")) }
}

bb2: {
Expand Down
4 changes: 2 additions & 2 deletions src/test/mir-opt/inline/inline_diverging.g.Inline.diff
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@
+ // + literal: Const { ty: fn(&str) -> ! {std::rt::begin_panic::<&str>}, val: Value(Leaf(<ZST>)) }
+ // ty::Const
+ // + ty: &str
+ // + val: Value(Branch([Leaf(0x65), Leaf(0x78), Leaf(0x70), Leaf(0x6c), Leaf(0x69), Leaf(0x63), Leaf(0x69), Leaf(0x74), Leaf(0x20), Leaf(0x70), Leaf(0x61), Leaf(0x6e), Leaf(0x69), Leaf(0x63)]))
+ // + val: Value(Str("explicit panic"))
+ // mir::Constant
+ // + span: $DIR/inline-diverging.rs:16:9: 16:16
+ // + literal: Const { ty: &str, val: Value(Branch([Leaf(0x65), Leaf(0x78), Leaf(0x70), Leaf(0x6c), Leaf(0x69), Leaf(0x63), Leaf(0x69), Leaf(0x74), Leaf(0x20), Leaf(0x70), Leaf(0x61), Leaf(0x6e), Leaf(0x69), Leaf(0x63)])) }
+ // + literal: Const { ty: &str, val: Value(Str("explicit panic")) }
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
// mir::Constant
- // + span: $DIR/inline-into-box-place.rs:8:33: 8:41
- // + user_ty: UserType(1)
- // + literal: Const { ty: fn() -> std::vec::Vec<u32> {std::vec::Vec::<u32>::new}, val: Value(Scalar(<ZST>)) }
- // + literal: Const { ty: fn() -> std::vec::Vec<u32> {std::vec::Vec::<u32>::new}, val: Value(Leaf(<ZST>)) }
- }
-
- bb1: {
Expand Down Expand Up @@ -55,7 +55,7 @@
- _3 = alloc::alloc::box_free::<Vec<u32>, std::alloc::Global>(move (_2.0: std::ptr::Unique<std::vec::Vec<u32>>), move (_2.1: std::alloc::Global)) -> bb3; // scope 0 at $DIR/inline-into-box-place.rs:8:42: 8:43
- // mir::Constant
- // + span: $DIR/inline-into-box-place.rs:8:42: 8:43
- // + literal: Const { ty: unsafe fn(std::ptr::Unique<std::vec::Vec<u32>>, std::alloc::Global) {alloc::alloc::box_free::<std::vec::Vec<u32>, std::alloc::Global>}, val: Value(Scalar(<ZST>)) }
- // + literal: Const { ty: unsafe fn(std::ptr::Unique<std::vec::Vec<u32>>, std::alloc::Global) {alloc::alloc::box_free::<std::vec::Vec<u32>, std::alloc::Global>}, val: Value(Leaf(<ZST>)) }
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
_2 = bar() -> [return: bb2, unwind: bb5]; // scope 0 at /the/src/instrument_coverage.rs:12:12: 12:17
// mir::Constant
// + span: /the/src/instrument_coverage.rs:12:12: 12:15
// + literal: Const { ty: fn() -> bool {bar}, val: Value(Scalar(<ZST>)) }
// + literal: Const { ty: fn() -> bool {bar}, val: Value(Leaf(<ZST>)) }
}

bb2: {
Expand Down
2 changes: 1 addition & 1 deletion src/test/mir-opt/issue_72181.main.mir_map.0.32bit.mir
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ fn main() -> () {
_1 = std::mem::size_of::<Foo>() -> [return: bb1, unwind: bb3]; // scope 0 at $DIR/issue-72181.rs:24:13: 24:34
// mir::Constant
// + span: $DIR/issue-72181.rs:24:13: 24:32
// + literal: Const { ty: fn() -> usize {std::mem::size_of::<Foo>}, val: Value(Scalar(<ZST>)) }
// + literal: Const { ty: fn() -> usize {std::mem::size_of::<Foo>}, val: Value(Leaf(<ZST>)) }
}

bb1: {
Expand Down
Loading

0 comments on commit fc2244a

Please sign in to comment.