From 8fe3744b6422451677d9ee9c17cdf7574cd16c8d Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Fri, 22 Sep 2023 15:03:12 +0300 Subject: [PATCH] zero-extension and sign-extension Signed-off-by: Elazar Gershuni --- src/asm_marshal.cpp | 9 +++++ src/asm_ostream.cpp | 2 ++ src/asm_parse.cpp | 20 ++++++++--- src/asm_syntax.hpp | 22 ++++++++---- src/asm_unmarshal.cpp | 37 +++++++++++++++++--- src/assertions.cpp | 1 + src/crab/ebpf_domain.cpp | 14 ++++++++ src/test/test_marshal.cpp | 4 ++- src/test/test_yaml.cpp | 1 + test-data/sext.yaml | 72 +++++++++++++++++++++++++++++++++++++++ 10 files changed, 165 insertions(+), 17 deletions(-) create mode 100644 test-data/sext.yaml diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 82e4bc207..280dc9113 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -60,6 +60,8 @@ static uint8_t imm(Un::Op op) { case Op::LE32: return 32; case Op::BE64: case Op::LE64: return 64; + case Op::ZEXT32: + case Op::SEXT32: return 32; } assert(false); return {}; @@ -138,6 +140,13 @@ struct MarshalVisitor { .offset = 0, .imm = imm(b.op), }}; + case Un::Op::ZEXT32: + case Un::Op::SEXT32: { + auto [lsh, rsh] = splitExt32(b); + auto lshift = this->operator()(lsh); + auto rshift = this->operator()(rsh); + return {lshift[0], rshift[0]}; + } } assert(false); return {}; diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 1abf83fd9..560a13484 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -220,6 +220,8 @@ struct InstructionPrinterVisitor { case Un::Op::LE32: os_ << "le32 "; break; case Un::Op::LE64: os_ << "le64 "; break; case Un::Op::NEG: os_ << "-"; break; + case Un::Op::ZEXT32: os_ << "zext32 "; break; + case Un::Op::SEXT32: os_ << "sext32 "; break; } os_ << b.dst; } diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 50d98f802..3781c5a30 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -33,7 +33,7 @@ using crab::linear_expression_t; #define OPASSIGN R"_(\s*(\S*)=\s*)_" #define ASSIGN R"_(\s*=\s*)_" #define LONGLONG R"_(\s*(ll|)\s*)_" -#define UNOP R"_((-|be16|be32|be64))_" +#define UNOP R"_((-|be16|be32|be64|zext32|sext32))_" #define PLUSMINUS R"_((\s*[+-])\s*)_" #define LPAREN R"_(\s*\(\s*)_" @@ -68,6 +68,8 @@ static const std::map str_to_unop = { {"le32", Un::Op::LE32}, {"le64", Un::Op::LE64}, {"-", Un::Op::NEG}, + {"zext32", Un::Op::ZEXT32}, + {"sext32", Un::Op::SEXT32}, }; static const std::map str_to_cmpop = { @@ -148,10 +150,18 @@ Instruction parse_instruction(const std::string& line, const std::map>>= 32 + SEXT32, /// Sign extension: dst <<= 32; dst >>= 32 }; Op op; @@ -111,6 +113,12 @@ struct Un { bool is64{}; }; +inline std::tuple splitExt32(Un un) { + Bin::Op rshift_op = un.op == Un::Op::ZEXT32 ? Bin::Op::RSH : Bin::Op::ARSH; + return {Bin{.op = Bin::Op::LSH, .dst = un.dst, .v = Imm{.v = 32}, .is64 = true, .lddw = false}, + Bin{.op = rshift_op, .dst = un.dst, .v = Imm{.v = 32}, .is64 = true, .lddw = false}}; +} + /// This instruction is encoded similarly to LDDW. /// See comment in makeLddw() at asm_unmarshal.cpp struct LoadMapFd { diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index 3536136f1..d1659e470 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -80,6 +80,10 @@ static auto getMemWidth(uint8_t opcode) -> int { // return {}; // } +static Instruction shift32(Reg dst, Bin::Op op) { + return (Instruction)Bin{.op=op,.dst = dst, .v = Imm{32}, .is64=true, .lddw=false}; +} + struct Unmarshaller { vector>& notes; const program_info& info; @@ -397,14 +401,14 @@ struct Unmarshaller { for (size_t pc = 0; pc < insts.size();) { ebpf_inst inst = insts[pc]; Instruction new_ins; - bool lddw = false; + bool skip_instruction = false; bool fallthrough = true; switch (inst.opcode & INST_CLS_MASK) { case INST_CLS_LD: if (inst.opcode == INST_OP_LDDW_IMM) { int32_t next_imm = pc < insts.size() - 1 ? insts[pc + 1].imm : 0; new_ins = makeLddw(inst, next_imm, insts, static_cast(pc)); - lddw = true; + skip_instruction = true; break; } // fallthrough @@ -413,7 +417,32 @@ struct Unmarshaller { case INST_CLS_STX: new_ins = makeMemOp(pc, inst); break; case INST_CLS_ALU: - case INST_CLS_ALU64: new_ins = makeAluOp(pc, inst); break; + case INST_CLS_ALU64: { + new_ins = makeAluOp(pc, inst); + + // Merge (rX <<= 32; rX >>>= 32) into rX = zext32 rX + // (rX <<= 32; rX >>= 32) into rX = sext32 rX + if (pc >= insts.size() - 1) + break; + ebpf_inst next = insts[pc + 1]; + auto dst = Reg{inst.dst}; + + if (new_ins != shift32(dst, Bin::Op::LSH)) + break; + + if ((next.opcode & INST_CLS_MASK) != INST_CLS_ALU64) + break; + auto next_ins = makeAluOp(pc+1, next); + if (next_ins == shift32(dst, Bin::Op::RSH)) { + new_ins = Un{.op = Un::Op::ZEXT32, .dst = dst, .is64 = true}; + skip_instruction = true; + } else if (next_ins == shift32(dst, Bin::Op::ARSH)) { + new_ins = Un{.op = Un::Op::SEXT32, .dst = dst, .is64 = true}; + skip_instruction = true; + } + + break; + } case INST_CLS_JMP32: case INST_CLS_JMP: { @@ -454,7 +483,7 @@ struct Unmarshaller { pc++; note_next_pc(); - if (lddw) { + if (skip_instruction) { pc++; note_next_pc(); } diff --git a/src/assertions.cpp b/src/assertions.cpp index 7a74218bb..7086ac162 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -172,6 +172,7 @@ class AssertExtractor { vector operator()(Un ins) { return { + // This is also true for ZEXT and SEXT Assert{TypeConstraint{ins.dst, TypeGroup::number}} }; } diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 157822d37..c9c9da656 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -1478,6 +1478,20 @@ void ebpf_domain_t::operator()(const Un& stmt) { neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); havoc_offsets(stmt.dst); break; + case Un::Op::ZEXT32: + case Un::Op::SEXT32: { + // keep relational information if zext32/sext32 is no-op + if (stmt.op == Un::Op::ZEXT32 && m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(false)) { + break; + } + if (stmt.op == Un::Op::SEXT32 && m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(false)) { + break; + } + auto [lsh, rsh] = splitExt32(stmt); + (*this)(lsh); + (*this)(rsh); + break; + } } } diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index abf58f2bd..670f688c7 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -69,9 +69,11 @@ TEST_CASE("disasm_marshal", "[disasm][marshal]") { Un::Op::LE32, Un::Op::LE64, Un::Op::NEG, + Un::Op::ZEXT32, + Un::Op::SEXT32, }; for (auto op : ops) - compare_marshal_unmarshal(Un{.op = op, .dst = Reg{1}}); + compare_marshal_unmarshal(Un{.op = op, .dst = Reg{1}, .is64 = true}); } SECTION("LoadMapFd") { compare_marshal_unmarshal(LoadMapFd{.dst = Reg{1}, .mapfd = 1}, true); } diff --git a/src/test/test_yaml.cpp b/src/test/test_yaml.cpp index 86f572c37..25122e03c 100644 --- a/src/test/test_yaml.cpp +++ b/src/test/test_yaml.cpp @@ -28,6 +28,7 @@ YAML_CASE("test-data/full64.yaml") YAML_CASE("test-data/jump.yaml") YAML_CASE("test-data/packet.yaml") YAML_CASE("test-data/parse.yaml") +YAML_CASE("test-data/sext.yaml") YAML_CASE("test-data/shift.yaml") YAML_CASE("test-data/stack.yaml") YAML_CASE("test-data/subtract.yaml") diff --git a/test-data/sext.yaml b/test-data/sext.yaml new file mode 100644 index 000000000..15f99adfc --- /dev/null +++ b/test-data/sext.yaml @@ -0,0 +1,72 @@ +# Copyright (c) Prevail Verifier contributors. +# SPDX-License-Identifier: MIT + +# Tests for signed/unsigned extension +--- +test-case: small zext relational nop + +pre: ["r1.type=number", "r1.uvalue=[0, 5]", + "r2.type=number", "r2.uvalue=[0, 5]", + "r1.uvalue=r2.uvalue"] + +code: + : | + r1 = zext32 w1 ; nop + +post: + - r1.type=number + - r1.uvalue=[0, 5] + - r2.type=number + - r2.uvalue=[0, 5] + - r1.uvalue=r2.uvalue +--- +test-case: small sext relational nop + +pre: ["r1.type=number", "r1.svalue=[-5, 5]", + "r2.type=number", "r2.svalue=[-5, 5]", + "r1.svalue=r2.svalue"] + +code: + : | + r1 = sext32 w1 ; nop + +post: + - r1.type=number + - r1.svalue=[-5, 5] + - r2.type=number + - r2.svalue=[-5, 5] + - r1.svalue=r2.svalue +--- +test-case: large zext relational is two shifts + +pre: ["r1.type=number", "r1.uvalue=[0, 4294967299]", + "r2.type=number", "r2.uvalue=[0, 4294967299]", + "r1.uvalue=r2.uvalue"] + +code: + : | + r1 = zext32 w1 ; nop + +post: + - r1.type=number + - r1.svalue=[0, 4294967295] + - r1.svalue=r1.uvalue + - r1.uvalue=[0, 4294967295] + - r2.type=number + - r2.uvalue=[0, 4294967299] +--- +test-case: large sext relational is two shifts + +pre: ["r1.type=number", "r1.svalue=[-5, 4294967299]", + "r2.type=number", "r2.svalue=[-5, 4294967299]", + "r1.svalue=r2.svalue"] + +code: + : | + r1 = sext32 w1 ; nop + +post: + - r1.type=number + - r1.svalue=[-2147483648, 2147483647] + - r2.type=number + - r2.svalue=[-5, 4294967299]