From ef3b6bcff67c0c69ad6c566f84ffb5bad766398a Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Fri, 22 Dec 2023 08:36:57 -0800 Subject: [PATCH] Add support for MOVSX* instructions Per https://www.ietf.org/archive/id/draft-ietf-bpf-isa-00.html#section-3.1 Signed-off-by: Dave Thaler --- src/asm_cfg.cpp | 10 +- src/asm_marshal.cpp | 21 ++- src/asm_ostream.cpp | 3 + src/asm_parse.cpp | 3 +- src/asm_syntax.hpp | 3 + src/asm_unmarshal.cpp | 9 +- src/assertions.cpp | 8 + src/crab/ebpf_domain.cpp | 56 +++++++ src/crab/ebpf_domain.hpp | 1 + src/test/test_yaml.cpp | 1 + test-data/movsx.yaml | 330 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 438 insertions(+), 7 deletions(-) create mode 100644 test-data/movsx.yaml diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index 0d4724435..3787507fc 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -181,9 +181,13 @@ static std::string instype(Instruction ins) { } else if (std::holds_alternative(ins)) { return "packet_access"; } else if (std::holds_alternative(ins)) { - if (std::get(ins).op == Bin::Op::MOV) - return "assign"; - return "arith"; + switch (std::get(ins).op) { + case Bin::Op::MOV: + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: return "assign"; + default: return "arith"; + } } else if (std::holds_alternative(ins)) { return "arith"; } else if (std::holds_alternative(ins)) { diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 36bbe783e..fd7a74902 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -36,20 +36,37 @@ static uint8_t op(Bin::Op op) { case Op::ADD: return 0x0; case Op::SUB: return 0x1; case Op::MUL: return 0x2; + case Op::SDIV: case Op::UDIV: return 0x3; case Op::OR: return 0x4; case Op::AND: return 0x5; case Op::LSH: return 0x6; case Op::RSH: return 0x7; + case Op::SMOD: case Op::UMOD: return 0x9; case Op::XOR: return 0xa; - case Op::MOV: return 0xb; + case Op::MOV: + case Op::MOVSX8: + case Op::MOVSX16: + case Op::MOVSX32: return 0xb; case Op::ARSH: return 0xc; } assert(false); return {}; } +static int16_t offset(Bin::Op op) { + using Op = Bin::Op; + switch (op) { + case Op::SDIV: + case Op::SMOD: return 1; + case Op::MOVSX8: return 8; + case Op::MOVSX16: return 16; + case Op::MOVSX32: return 32; + } + return 0; +} + static uint8_t imm(Un::Op op) { using Op = Un::Op; switch (op) { @@ -96,7 +113,7 @@ struct MarshalVisitor { ebpf_inst res{.opcode = static_cast((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)), .dst = b.dst.v, .src = 0, - .offset = 0, + .offset = offset(b.op), .imm = 0}; std::visit(overloaded{[&](Reg right) { res.opcode |= INST_SRC_REG; diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 43ff6491e..9f8c12254 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -56,6 +56,9 @@ std::ostream& operator<<(std::ostream& os, Bin::Op op) { using Op = Bin::Op; switch (op) { case Op::MOV: return os; + case Op::MOVSX8: return os << "s8"; + case Op::MOVSX16: return os << "s16"; + case Op::MOVSX32: return os << "s32"; case Op::ADD: return os << "+"; case Op::SUB: return os << "-"; case Op::MUL: return os << "*"; diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 7ce6428ed..9f4e42c1c 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -58,7 +58,8 @@ static const std::map str_to_binop = { {"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL}, {"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND}, {"<<", Bin::Op::LSH}, {">>", Bin::Op::RSH}, {"s>>", Bin::Op::ARSH}, {"^", Bin::Op::XOR}, - {"s/", Bin::Op::SDIV}, {"s%", Bin::Op::SMOD}, + {"s/", Bin::Op::SDIV}, {"s%", Bin::Op::SMOD}, {"s8", Bin::Op::MOVSX8}, {"s16", Bin::Op::MOVSX16}, + {"s32", Bin::Op::MOVSX32}, }; static const std::map str_to_unop = { diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 4121bc9e0..5490bb933 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -87,6 +87,9 @@ struct Bin { XOR, SDIV, SMOD, + MOVSX8, + MOVSX16, + MOVSX32, }; Op op; diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index 5cd3fdc99..4ae3617e0 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -109,7 +109,14 @@ struct Unmarshaller { default: throw InvalidInstruction{pc, "invalid ALU op 0x90"}; } case INST_ALU_OP_XOR: return Bin::Op::XOR; - case INST_ALU_OP_MOV: return Bin::Op::MOV; + case INST_ALU_OP_MOV: + switch (inst.offset) { + case 0: return Bin::Op::MOV; + case 8: return Bin::Op::MOVSX8; + case 16: return Bin::Op::MOVSX16; + case 32: return Bin::Op::MOVSX32; + default: throw InvalidInstruction{pc, "invalid ALU op 0xb0"}; + } case INST_ALU_OP_ARSH: if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU) note("arsh32 is not allowed"); diff --git a/src/assertions.cpp b/src/assertions.cpp index 4dd791470..f99a67d6f 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -180,6 +180,14 @@ class AssertExtractor { vector operator()(Bin ins) const { switch (ins.op) { case Bin::Op::MOV: return {}; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: + if (std::holds_alternative(ins.v)) { + auto src = reg(ins.v); + return {Assert{TypeConstraint{src, TypeGroup::number}}}; + } + return {}; case Bin::Op::ADD: if (std::holds_alternative(ins.v)) { auto src = reg(ins.v); diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 62667c88e..b511de0a7 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -2342,6 +2342,46 @@ void ebpf_domain_t::lshr(const Reg& dst_reg, int imm, int finite_width) { havoc_offsets(dst_reg); } +void ebpf_domain_t::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, + Bin::Op op) { + using namespace crab; + + int bits; + switch (op) { + case Bin::Op::MOVSX8: bits = 8; break; + case Bin::Op::MOVSX16: bits = 16; break; + case Bin::Op::MOVSX32: bits = 32; break; + default: throw std::exception(); + } + + reg_pack_t dst = reg_pack(dst_reg); + interval_t right_interval = m_inv.eval_interval(right_svalue); + type_inv.assign_type(m_inv, dst_reg, T_NUM); + havoc_offsets(dst_reg); + int64_t span = 1ULL << bits; + if (right_interval.ub() - right_interval.lb() >= number_t{span}) { + // Interval covers the full space. + havoc(dst.svalue); + return; + } + int64_t mask = 1ULL << (bits - 1); + + // Sign extend each bound. + int64_t lb = right_interval.lb().number().value().cast_to_sint64(); + lb &= (span - 1); + lb = (lb ^ mask) - mask; + int64_t ub = right_interval.ub().number().value().cast_to_sint64(); + ub &= (span - 1); + ub = (ub ^ mask) - mask; + m_inv.set(dst.svalue, crab::interval_t{number_t{lb}, number_t{ub}}); + + if (finite_width) { + m_inv.assign(dst.uvalue, dst.svalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + } +} + void ebpf_domain_t::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { using namespace crab; @@ -2413,6 +2453,11 @@ void ebpf_domain_t::operator()(const Bin& bin) { type_inv.assign_type(m_inv, bin.dst, T_NUM); havoc_offsets(bin.dst); break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: + sign_extend(bin.dst, number_t{(int32_t)imm}, finite_width, bin.op); + break; case Bin::Op::ADD: if (imm == 0) return; @@ -2655,6 +2700,17 @@ void ebpf_domain_t::operator()(const Bin& bin) { bitwise_xor(dst.svalue, dst.uvalue, src.uvalue, finite_width); havoc_offsets(bin.dst); break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: + if (m_inv.entail(type_is_number(src_reg))) { + sign_extend(bin.dst, src.svalue, finite_width, bin.op); + break; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; case Bin::Op::MOV: assign(dst.svalue, src.svalue); assign(dst.uvalue, src.uvalue); diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index c8bba9197..0fbb1992a 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -139,6 +139,7 @@ class ebpf_domain_t final { void shl_overflow(variable_t lhss, variable_t lhsu, const number_t& op2); void lshr(const Reg& reg, int imm, int finite_width); void ashr(const Reg& reg, const linear_expression_t& right_svalue, int finite_width); + void sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, Bin::Op op); void assume(const linear_constraint_t& cst); diff --git a/src/test/test_yaml.cpp b/src/test/test_yaml.cpp index 44c89156f..2456bfe3f 100644 --- a/src/test/test_yaml.cpp +++ b/src/test/test_yaml.cpp @@ -28,6 +28,7 @@ YAML_CASE("test-data/sdivmod.yaml") YAML_CASE("test-data/full64.yaml") YAML_CASE("test-data/jump.yaml") YAML_CASE("test-data/loop.yaml") +YAML_CASE("test-data/movsx.yaml") YAML_CASE("test-data/packet.yaml") YAML_CASE("test-data/parse.yaml") YAML_CASE("test-data/shift.yaml") diff --git a/test-data/movsx.yaml b/test-data/movsx.yaml new file mode 100644 index 000000000..7aa3d0f54 --- /dev/null +++ b/test-data/movsx.yaml @@ -0,0 +1,330 @@ +# Copyright (c) Prevail Verifier contributors. +# SPDX-License-Identifier: MIT +--- +test-case: movsx8 immediate to 32 bits + +pre: [] + +code: + : | + w1 s8= 384 ; 0x180 -> 0xFFFFFF80 + +post: + - r1.type=number + - r1.svalue=4294967168 + - r1.uvalue=4294967168 +--- +test-case: movsx16 immediate to 32 bits + +pre: [] + +code: + : | + w1 s16= 98304 ; 0x18000 -> 0xFFFF8000 + +post: + - r1.type=number + - r1.svalue=4294934528 + - r1.uvalue=4294934528 +--- +test-case: movsx8 immediate to 64 bits + +pre: [] + +code: + : | + r1 s8= 384 ; 0x180 -> 0xFFFFFFFFFFFFFF80 + +post: + - r1.type=number + - r1.svalue=-128 + - r1.uvalue=18446744073709551488 +--- +test-case: movsx16 immediate to 64 bits + +pre: [] + +code: + : | + r1 s16= 98304 ; 0x18000 -> 0xFFFFFFFFFFFF8000 + +post: + - r1.type=number + - r1.svalue=-32768 + - r1.uvalue=18446744073709518848 +--- +test-case: movsx32 immediate to 64 bits + +pre: [] + +code: + : | + r1 s32= 2147483648 ; 0x80000000 -> 0xFFFFFFFF80000000 + +post: + - r1.type=number + - r1.svalue=-2147483648 + - r1.uvalue=18446744071562067968 +--- +test-case: movsx8 register to 32 bits + +pre: ["r1.svalue=384", "r1.uvalue=384", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s8= r1 + +post: + - r1.type=number + - r1.svalue=384 + - r1.uvalue=384 + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=4294967168 + - r2.uvalue=4294967168 +--- +test-case: movsx16 register to 32 bits + +pre: ["r1.svalue=98304", "r1.uvalue=98304", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s16= r1 + +post: + - r1.type=number + - r1.svalue=98304 + - r1.uvalue=98304 + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=4294934528 + - r2.uvalue=4294934528 +--- +test-case: movsx8 register to 64 bits + +pre: ["r1.svalue=384", "r1.uvalue=384", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s8= r1 + +post: + - r1.type=number + - r1.svalue=384 + - r1.uvalue=384 + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=-128 + - r2.uvalue=18446744073709551488 +--- +test-case: movsx16 register to 64 bits + +pre: ["r1.svalue=98304", "r1.uvalue=98304", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s16= r1 + +post: + - r1.type=number + - r1.svalue=98304 + - r1.uvalue=98304 + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=-32768 + - r2.uvalue=18446744073709518848 +--- +test-case: movsx32 register to 64 bits + +pre: ["r1.svalue=2147483648", "r1.uvalue=2147483648", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s32= r1 + +post: + - r1.type=number + - r1.svalue=2147483648 + - r1.uvalue=2147483648 + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=-2147483648 + - r2.uvalue=18446744071562067968 +--- +test-case: movsx32 register with a non-number + +pre: [] + +code: + : | + r2 s32= r1 + +post: [] + +messages: + - "0: (r1.type == number)" +--- +test-case: movsx8 register range to 32 bits without wrap + +pre: ["r1.svalue=[128, 130]", "r1.uvalue=[128, 130]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s8= r1 + +post: + - r1.type=number + - r1.svalue=[128, 130] + - r1.uvalue=[128, 130] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[4294967168, 4294967170] + - r2.uvalue=[4294967168, 4294967170] + - r2.svalue=r2.uvalue +--- +test-case: movsx8 register range to 32 bits with wrap + +pre: ["r1.svalue=[255, 257]", "r1.uvalue=[255, 257]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s8= r1 + +post: + - r1.type=number + - r1.svalue=[255, 257] + - r1.uvalue=[255, 257] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[0, 4294967295] + - r2.uvalue=[0, 4294967295] + - r2.svalue=r2.uvalue +--- +test-case: movsx16 register range to 32 bits without wrap + +pre: ["r1.svalue=[32768, 32770]", "r1.uvalue=[32768, 32770]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s16= r1 + +post: + - r1.type=number + - r1.svalue=[32768, 32770] + - r1.uvalue=[32768, 32770] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[4294934528, 4294934530] + - r2.uvalue=[4294934528, 4294934530] + - r2.svalue=r2.uvalue +--- +test-case: movsx16 register range to 32 bits with wrap + +pre: ["r1.svalue=[65535, 65537]", "r1.uvalue=[65535, 65537]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + w2 s16= r1 + +post: + - r1.type=number + - r1.svalue=[65535, 65537] + - r1.uvalue=[65535, 65537] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[0, 4294967295] + - r2.uvalue=[0, 4294967295] + - r2.svalue=r2.uvalue +--- +test-case: movsx8 register range to 64 bits + +pre: ["r1.svalue=[255, 257]", "r1.uvalue=[255, 257]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s8= r1 + +post: + - r1.type=number + - r1.svalue=[255, 257] + - r1.uvalue=[255, 257] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[-1, 1] +--- +test-case: movsx8 register full range to 64 bits + +pre: ["r1.svalue=[255, 511]", "r1.uvalue=[255, 511]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s8= r1 + +post: + - r1.type=number + - r1.svalue=[255, 511] + - r1.uvalue=[255, 511] + - r1.svalue=r1.uvalue + - r2.type=number +--- +test-case: movsx16 register range to 64 bits + +pre: ["r1.svalue=[65535, 65537]", "r1.uvalue=[65535, 65537]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s16= r1 + +post: + - r1.type=number + - r1.svalue=[65535, 65537] + - r1.uvalue=[65535, 65537] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[-1, 1] +--- +test-case: movsx16 register full range to 64 bits + +pre: ["r1.svalue=[65535, 131071]", "r1.uvalue=[65535, 131071]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s16= r1 + +post: + - r1.type=number + - r1.svalue=[65535, 131071] + - r1.uvalue=[65535, 131071] + - r1.svalue=r1.uvalue + - r2.type=number +--- +test-case: movsx32 register range to 64 bits + +pre: ["r1.svalue=[4294967295, 4294967297]", "r1.uvalue=[4294967295, 4294967297]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s32= r1 + +post: + - r1.type=number + - r1.svalue=[4294967295, 4294967297] + - r1.uvalue=[4294967295, 4294967297] + - r1.svalue=r1.uvalue + - r2.type=number + - r2.svalue=[-1, 1] +--- +test-case: movsx32 register full range to 64 bits + +pre: ["r1.svalue=[4294967295, 8589934591]", "r1.uvalue=[4294967295, 8589934591]", "r1.type=number", "r1.svalue=r1.uvalue"] + +code: + : | + r2 s32= r1 + +post: + - r1.type=number + - r1.svalue=[4294967295, 8589934591] + - r1.uvalue=[4294967295, 8589934591] + - r1.svalue=r1.uvalue + - r2.type=number