From d1a7cc4208b5d13e73c834b79b7b397cfeccd898 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sat, 9 Nov 2024 08:46:42 -0800 Subject: [PATCH 1/8] Update notes for Windows to install VS Clang Signed-off-by: Alan Jowett --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fa67f563a..449606798 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,11 @@ cmake --build build #### Dependencies * Install [git](https://git-scm.com/download/win) -* Install [Visual Studio Build Tools 2022](https://aka.ms/vs/17/release/vs_buildtools.exe) and choose the "C++ build tools" workload (Visual Studio Build Tools 2022 has support for CMake Version 3.25). +* Install [Visual Studio Build Tools 2022](https://aka.ms/vs/17/release/vs_buildtools.exe) and: + * Choose the "C++ build tools" workload (Visual Studio Build Tools 2022 has support for CMake Version 3.25) + * Under Individual Components, select: + * "C++ Clang Compiler" + * "MSBuild support for LLVM" * Install [nuget.exe](https://www.nuget.org/downloads) #### Make on Windows (which uses a multi-configuration generator) From 501c6e08084d0e7d743a56b750ba4a72873495fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Nov 2024 16:38:16 +0000 Subject: [PATCH 2/8] Bump external/libbtf from `69e0268` to `2de28d7` Bumps [external/libbtf](https://github.com/Alan-Jowett/libbtf) from `69e0268` to `2de28d7`. - [Release notes](https://github.com/Alan-Jowett/libbtf/releases) - [Commits](https://github.com/Alan-Jowett/libbtf/compare/69e02688e89352d49c72707c2819448515d856df...2de28d74884f051a1b129a4535d46841fbf6d6de) --- updated-dependencies: - dependency-name: external/libbtf dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- external/libbtf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/libbtf b/external/libbtf index 69e02688e..2de28d748 160000 --- a/external/libbtf +++ b/external/libbtf @@ -1 +1 @@ -Subproject commit 69e02688e89352d49c72707c2819448515d856df +Subproject commit 2de28d74884f051a1b129a4535d46841fbf6d6de From 71a34c73cf3c42a2fd2ac294ddd16f1c457b82b7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 9 Nov 2024 16:38:14 +0000 Subject: [PATCH 3/8] Bump external/bpf_conformance from `1ce2861` to `b544539` Bumps [external/bpf_conformance](https://github.com/Alan-Jowett/bpf_conformance) from `1ce2861` to `b544539`. - [Release notes](https://github.com/Alan-Jowett/bpf_conformance/releases) - [Commits](https://github.com/Alan-Jowett/bpf_conformance/compare/1ce286105edf3577ef380bd8a498cfa2314d6dfa...b544539840db58bcae75b153d3c0368820a15828) --- updated-dependencies: - dependency-name: external/bpf_conformance dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- external/bpf_conformance | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/bpf_conformance b/external/bpf_conformance index 1ce286105..b54453984 160000 --- a/external/bpf_conformance +++ b/external/bpf_conformance @@ -1 +1 @@ -Subproject commit 1ce286105edf3577ef380bd8a498cfa2314d6dfa +Subproject commit b544539840db58bcae75b153d3c0368820a15828 From 41fbd5a378e80afbb45813a72a3353226c899fef Mon Sep 17 00:00:00 2001 From: Dave Thaler Date: Sat, 9 Nov 2024 12:41:37 -0800 Subject: [PATCH 4/8] Fix bpf2bpf local function calls with stack usage (#784) Signed-off-by: Dave Thaler --- ebpf-samples | 2 +- src/asm_ostream.hpp | 2 +- src/asm_syntax.hpp | 2 +- src/crab/ebpf_domain.cpp | 26 ++++++++++++++------------ src/crab/ebpf_domain.hpp | 3 +-- test-data/calllocal.yaml | 8 ++++---- test-data/stack.yaml | 2 +- test-data/subtract.yaml | 1 + 8 files changed, 24 insertions(+), 22 deletions(-) diff --git a/ebpf-samples b/ebpf-samples index 325cce1bc..33f01fafa 160000 --- a/ebpf-samples +++ b/ebpf-samples @@ -1 +1 @@ -Subproject commit 325cce1bc528a8b70e02ea914d407b4e4f89731d +Subproject commit 33f01fafa414520ad56ab219db343422bb655b32 diff --git a/src/asm_ostream.hpp b/src/asm_ostream.hpp index 83d210cd9..5f9d0e190 100644 --- a/src/asm_ostream.hpp +++ b/src/asm_ostream.hpp @@ -17,7 +17,7 @@ inline std::function label_to_offset16(const pc_t pc) { const int64_t offset = label.from - gsl::narrow(pc) - 1; const bool is16 = std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); - return is16 ? gsl::narrow(offset) : 0; + return gsl::narrow(is16 ? offset : 0); }; } diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index a008fd73a..9e007c9e1 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -59,7 +59,7 @@ struct label_t { if (stack_frame_prefix.empty()) { return 1; } - return 2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER); + return gsl::narrow(2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER)); } friend std::ostream& operator<<(std::ostream& os, const label_t& label) { diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 02e37074a..491b7f1f4 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -938,18 +938,12 @@ void ebpf_domain_t::restore_callee_saved_registers(const std::string& prefix) { } void ebpf_domain_t::havoc_subprogram_stack(const std::string& prefix) { - // Calculate the call stack depth being returned from. Since we're returning - // *to* the given prefix, the current call stack is 2 + the number of - // '/' separators because we need to account for the current frame and the root frame. - const int call_stack_depth = 2 + std::ranges::count(prefix, STACK_FRAME_DELIMITER); - const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; const auto intv = m_inv.eval_interval(r10_stack_offset); if (!intv.is_singleton()) { return; } - const int64_t stack_offset = intv.singleton()->cast_to(); - const int32_t stack_start = stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth; + const int64_t stack_start = intv.singleton()->cast_to() - EBPF_SUBPROGRAM_STACK_SIZE; for (const data_kind_t kind : iterate_kinds()) { stack.havoc(m_inv, kind, stack_start, EBPF_SUBPROGRAM_STACK_SIZE); } @@ -1212,15 +1206,15 @@ void ebpf_domain_t::operator()(const basic_block_t& bb) { } } -void ebpf_domain_t::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const int call_stack_depth) const { +void ebpf_domain_t::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, + const linear_expression_t& ub) const { using namespace crab::dsl_syntax; const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; const auto interval = inv.eval_interval(r10_stack_offset); if (interval.is_singleton()) { const int64_t stack_offset = interval.singleton()->cast_to(); - require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth, - "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth"); + require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE, + "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE"); } require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); } @@ -1391,6 +1385,10 @@ void ebpf_domain_t::operator()(const Exit& a) { } havoc_subprogram_stack(prefix); restore_callee_saved_registers(prefix); + + // Restore r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, EBPF_SUBPROGRAM_STACK_SIZE, 64); } void ebpf_domain_t::operator()(const Jmp&) const { @@ -1709,7 +1707,7 @@ void ebpf_domain_t::operator()(const ValidAccess& s) { } case T_STACK: { auto [lb, ub] = lb_ub_access_pair(s, reg.stack_offset); - check_access_stack(inv, lb, ub, s.call_stack_depth); + check_access_stack(inv, lb, ub); // if within bounds, it can never be null if (s.access_type == AccessType::read) { // Require that the stack range contains numbers. @@ -2258,6 +2256,10 @@ void ebpf_domain_t::operator()(const CallLocal& call) { return; } save_callee_saved_registers(call.stack_frame_prefix); + + // Update r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, -EBPF_SUBPROGRAM_STACK_SIZE, 64); } void ebpf_domain_t::operator()(const Callx& callx) { diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index 9f3cee592..91605ec3f 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -171,8 +171,7 @@ class ebpf_domain_t final { void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const; // memory check / load / store - void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - int call_stack_depth) const; + void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, std::optional packet_size) const; diff --git a/test-data/calllocal.yaml b/test-data/calllocal.yaml index 5f09446ae..74e558ef3 100644 --- a/test-data/calllocal.yaml +++ b/test-data/calllocal.yaml @@ -347,14 +347,14 @@ code: r0 += r1 ; r0 = 1 + 2 + 3 = 6 exit : | - *(u8 *)(r10 - 513) = 2 + *(u8 *)(r10 - 1) = 2 call - r1 = *(u8 *)(r10 - 513) + r1 = *(u8 *)(r10 - 1) r0 += r1 ; r0 = 2 + 3 = 5 exit : | - *(u8 *)(r10 - 1025) = 3 - r0 = *(u8 *)(r10 - 1025) + *(u8 *)(r10 - 1) = 3 + r0 = *(u8 *)(r10 - 1) exit post: diff --git a/test-data/stack.yaml b/test-data/stack.yaml index 456097c4c..8df80ca1e 100644 --- a/test-data/stack.yaml +++ b/test-data/stack.yaml @@ -594,4 +594,4 @@ post: - s[511].uvalue=0 messages: - - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth (valid_access(r10.offset-513, width=1) for write)" + - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE (valid_access(r10.offset-513, width=1) for write)" diff --git a/test-data/subtract.yaml b/test-data/subtract.yaml index 3c7047d96..a065a6b84 100644 --- a/test-data/subtract.yaml +++ b/test-data/subtract.yaml @@ -159,3 +159,4 @@ post: - r10.stack_offset=4096 messages: - "0: Upper bound must be at most EBPF_TOTAL_STACK_SIZE (r2.type == number or r1.type == r2.type in {ctx, stack, packet})" + - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE (r2.type == number or r1.type == r2.type in {ctx, stack, packet})" From b825d2df6695adc46bd07650c6a9fdc8b99e51b3 Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Sun, 10 Nov 2024 01:54:13 +0200 Subject: [PATCH 5/8] Split ebpf_domain_t into domain, transformer, checker (#787) Signed-off-by: Elazar Gershuni --- src/crab/ebpf_domain.cpp | 311 +++++++++++++++++++------------------- src/crab/ebpf_domain.hpp | 165 +++++++++++--------- src/crab/fwd_analyzer.cpp | 8 +- src/crab_verifier.cpp | 15 +- src/main/check.cpp | 2 +- test-data/jump.yaml | 177 +++++++++++++++++----- 6 files changed, 407 insertions(+), 271 deletions(-) diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 491b7f1f4..04cbfa929 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -737,7 +737,7 @@ std::optional ebpf_domain_t::get_type_offset_variable(const Reg& reg return get_type_offset_variable(reg, m_inv); } -void ebpf_domain_t::set_require_check(std::function f) { check_require = std::move(f); } +void ebpf_checker::set_require_check(std::function f) { check_require = std::move(f); } ebpf_domain_t ebpf_domain_t::top() { ebpf_domain_t abs; @@ -854,34 +854,34 @@ void ebpf_domain_t::operator+=(const linear_constraint_t& cst) { m_inv += cst; } void ebpf_domain_t::operator-=(const variable_t var) { m_inv -= var; } -void ebpf_domain_t::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } -void ebpf_domain_t::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } +void ebpf_transformer::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } +void ebpf_transformer::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } -void ebpf_domain_t::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, - const int finite_width) { +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, + const int finite_width) { m_inv.apply(op, x, y, z, finite_width); } -void ebpf_domain_t::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { m_inv.apply(op, x, y, z, finite_width); } -void ebpf_domain_t::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { m_inv.apply(op, x, y, z, finite_width); } -void ebpf_domain_t::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, - const int finite_width) { +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, + const int finite_width) { m_inv.apply(op, x, y, k, finite_width); } -void ebpf_domain_t::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); } -void ebpf_domain_t::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); } @@ -902,7 +902,7 @@ static void havoc_register(NumAbsDomain& inv, const Reg& reg) { inv -= r.uvalue; } -void ebpf_domain_t::scratch_caller_saved_registers() { +void ebpf_transformer::scratch_caller_saved_registers() { for (int i = R1_ARG; i <= R5_ARG; i++) { Reg r{gsl::narrow(i)}; havoc_register(m_inv, r); @@ -910,7 +910,7 @@ void ebpf_domain_t::scratch_caller_saved_registers() { } } -void ebpf_domain_t::save_callee_saved_registers(const std::string& prefix) { +void ebpf_transformer::save_callee_saved_registers(const std::string& prefix) { // Create variables specific to the new call stack frame that store // copies of the states of r6 through r9. for (int r = R6; r <= R9; r++) { @@ -923,7 +923,7 @@ void ebpf_domain_t::save_callee_saved_registers(const std::string& prefix) { } } -void ebpf_domain_t::restore_callee_saved_registers(const std::string& prefix) { +void ebpf_transformer::restore_callee_saved_registers(const std::string& prefix) { for (int r = R6; r <= R9; r++) { for (const data_kind_t kind : iterate_kinds()) { const variable_t src_var = variable_t::stack_frame_var(kind, r, prefix); @@ -937,7 +937,7 @@ void ebpf_domain_t::restore_callee_saved_registers(const std::string& prefix) { } } -void ebpf_domain_t::havoc_subprogram_stack(const std::string& prefix) { +void ebpf_transformer::havoc_subprogram_stack(const std::string& prefix) { const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; const auto intv = m_inv.eval_interval(r10_stack_offset); if (!intv.is_singleton()) { @@ -949,7 +949,7 @@ void ebpf_domain_t::havoc_subprogram_stack(const std::string& prefix) { } } -void ebpf_domain_t::forget_packet_pointers() { +void ebpf_transformer::forget_packet_pointers() { using namespace crab::dsl_syntax; for (const variable_t type_variable : variable_t::get_type_variables()) { @@ -961,7 +961,7 @@ void ebpf_domain_t::forget_packet_pointers() { } } - initialize_packet(*this); + initialize_packet(dom); } static void overflow_bounds(NumAbsDomain& inv, variable_t lhs, number_t span, int finite_width, bool issigned) { @@ -1054,109 +1054,113 @@ static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_ } } -void ebpf_domain_t::add(const variable_t lhs, const variable_t op2) { +void ebpf_transformer::add(const variable_t lhs, const variable_t op2) { apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); } -void ebpf_domain_t::add(const variable_t lhs, const number_t& op2) { +void ebpf_transformer::add(const variable_t lhs, const number_t& op2) { apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); } -void ebpf_domain_t::sub(const variable_t lhs, const variable_t op2) { +void ebpf_transformer::sub(const variable_t lhs, const variable_t op2) { apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); } -void ebpf_domain_t::sub(const variable_t lhs, const number_t& op2) { +void ebpf_transformer::sub(const variable_t lhs, const number_t& op2) { apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); } // Add/subtract with overflow are both signed and unsigned. We can use either one of the two to compute the // result before adjusting for overflow, though if one is top we want to use the other to retain precision. -void ebpf_domain_t::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, finite_width); } -void ebpf_domain_t::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, finite_width); } -void ebpf_domain_t::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, finite_width); } -void ebpf_domain_t::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, finite_width); } -void ebpf_domain_t::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { +void ebpf_transformer::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, -1, finite_width); } -void ebpf_domain_t::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); } -void ebpf_domain_t::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { // Use finite width 64 to make the svalue be set as well as the uvalue. apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, 64); } -void ebpf_domain_t::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, 64); } -void ebpf_domain_t::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, finite_width); } -void ebpf_domain_t::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, 64); } -void ebpf_domain_t::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); } -void ebpf_domain_t::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); } static void assume(NumAbsDomain& inv, const linear_constraint_t& cst) { inv += cst; } -void ebpf_domain_t::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } +void ebpf_transformer::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } -void ebpf_domain_t::require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const { +void ebpf_checker::require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const { if (check_require) { check_require(inv, cst, s + " (" + this->current_assertion + ")"); } @@ -1167,10 +1171,10 @@ void ebpf_domain_t::require(NumAbsDomain& inv, const linear_constraint_t& cst, c } /// Forget everything we know about the value of a variable. -void ebpf_domain_t::havoc(const variable_t v) { m_inv -= v; } -void ebpf_domain_t::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } +void ebpf_transformer::havoc(const variable_t v) { m_inv -= v; } +void ebpf_transformer::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } -void ebpf_domain_t::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } +void ebpf_transformer::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } static linear_constraint_t type_is_pointer(const reg_pack_t& r) { using namespace crab::dsl_syntax; @@ -1189,7 +1193,7 @@ static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { return r.type != T_STACK; } -void ebpf_domain_t::operator()(const Assertion& assertion) { +void ebpf_checker::operator()(const Assertion& assertion) { if (check_require || thread_local_options.assume_assertions) { this->current_assertion = to_string(assertion); std::visit(*this, assertion); @@ -1197,17 +1201,8 @@ void ebpf_domain_t::operator()(const Assertion& assertion) { } } -void ebpf_domain_t::operator()(const basic_block_t& bb) { - for (const GuardedInstruction& ins : bb) { - for (const Assertion& assertion : ins.preconditions) { - (*this)(assertion); - } - std::visit(*this, ins.cmd); - } -} - -void ebpf_domain_t::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, - const linear_expression_t& ub) const { +void ebpf_checker::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, + const linear_expression_t& ub) const { using namespace crab::dsl_syntax; const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; const auto interval = inv.eval_interval(r10_stack_offset); @@ -1219,8 +1214,8 @@ void ebpf_domain_t::check_access_stack(NumAbsDomain& inv, const linear_expressio require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); } -void ebpf_domain_t::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, - const linear_expression_t& ub) const { +void ebpf_checker::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, + const linear_expression_t& ub) const { using namespace crab::dsl_syntax; require(inv, lb >= 0, "Lower bound must be at least 0"); require(inv, ub <= global_program_info->type.context_descriptor->size, @@ -1228,8 +1223,8 @@ void ebpf_domain_t::check_access_context(NumAbsDomain& inv, const linear_express std::to_string(global_program_info->type.context_descriptor->size)); } -void ebpf_domain_t::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const std::optional packet_size) const { +void ebpf_checker::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const std::optional packet_size) const { using namespace crab::dsl_syntax; require(inv, lb >= variable_t::meta_offset(), "Lower bound must be at least meta_offset"); if (packet_size) { @@ -1240,14 +1235,14 @@ void ebpf_domain_t::check_access_packet(NumAbsDomain& inv, const linear_expressi } } -void ebpf_domain_t::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const variable_t shared_region_size) const { +void ebpf_checker::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const variable_t shared_region_size) const { using namespace crab::dsl_syntax; require(inv, lb >= 0, "Lower bound must be at least 0"); require(inv, ub <= shared_region_size, std::string("Upper bound must be at most ") + shared_region_size.name()); } -void ebpf_domain_t::operator()(const Assume& s) { +void ebpf_transformer::operator()(const Assume& s) { const Condition cond = s.cond; const auto dst = reg_pack(cond.left); if (const auto psrc_reg = std::get_if(&cond.right)) { @@ -1263,8 +1258,8 @@ void ebpf_domain_t::operator()(const Assume& s) { } else { // Either pointers to a singleton region, // or an equality comparison on map descriptors/pointers to non-singleton locations - if (const auto dst_offset = get_type_offset_variable(cond.left, type)) { - if (const auto src_offset = get_type_offset_variable(src_reg, type)) { + if (const auto dst_offset = dom.get_type_offset_variable(cond.left, type)) { + if (const auto src_offset = dom.get_type_offset_variable(src_reg, type)) { inv += assume_cst_offsets_reg(cond.op, dst_offset.value(), src_offset.value()); } } @@ -1272,7 +1267,7 @@ void ebpf_domain_t::operator()(const Assume& s) { }); } else { // We should only reach here if `--assume-assert` is off - assert(!thread_local_options.assume_assertions || is_bottom()); + assert(!thread_local_options.assume_assertions || dom.is_bottom()); // be sound in any case, it happens to flush out bugs: m_inv.set_to_top(); } @@ -1284,7 +1279,7 @@ void ebpf_domain_t::operator()(const Assume& s) { } } -void ebpf_domain_t::operator()(const Undefined& a) {} +void ebpf_transformer::operator()(const Undefined& a) {} // Simple truncation function usable with swap_endianness(). template @@ -1292,7 +1287,7 @@ constexpr T truncate(T x) noexcept { return x; } -void ebpf_domain_t::operator()(const Un& stmt) { +void ebpf_transformer::operator()(const Un& stmt) { const auto dst = reg_pack(stmt.dst); auto swap_endianness = [&](const variable_t v, auto be_or_le) { if (m_inv.entail(type_is_number(stmt.dst))) { @@ -1377,7 +1372,7 @@ void ebpf_domain_t::operator()(const Un& stmt) { } } -void ebpf_domain_t::operator()(const Exit& a) { +void ebpf_transformer::operator()(const Exit& a) { // Clean up any state for the current stack frame. const std::string prefix = a.stack_frame_prefix; if (prefix.empty()) { @@ -1391,11 +1386,11 @@ void ebpf_domain_t::operator()(const Exit& a) { add(r10_reg, EBPF_SUBPROGRAM_STACK_SIZE, 64); } -void ebpf_domain_t::operator()(const Jmp&) const { +void ebpf_transformer::operator()(const Jmp&) const { // This is a NOP. It only exists to hold the jump preconditions. } -void ebpf_domain_t::operator()(const Comparable& s) { +void ebpf_checker::operator()(const Comparable& s) { using namespace crab::dsl_syntax; if (type_inv.same_type(m_inv, s.r1, s.r2)) { // Same type. If both are numbers, that's okay. Otherwise: @@ -1416,13 +1411,13 @@ void ebpf_domain_t::operator()(const Comparable& s) { }; } -void ebpf_domain_t::operator()(const Addable& s) { +void ebpf_checker::operator()(const Addable& s) { if (!type_inv.implies_type(m_inv, type_is_pointer(reg_pack(s.ptr)), type_is_number(s.num))) { require(m_inv, linear_constraint_t::false_const(), "Only numbers can be added to pointers"); } } -void ebpf_domain_t::operator()(const ValidDivisor& s) { +void ebpf_checker::operator()(const ValidDivisor& s) { using namespace crab::dsl_syntax; const auto reg = reg_pack(s.reg); if (!type_inv.implies_type(m_inv, type_is_pointer(reg), type_is_number(s.reg))) { @@ -1434,19 +1429,19 @@ void ebpf_domain_t::operator()(const ValidDivisor& s) { } } -void ebpf_domain_t::operator()(const ValidStore& s) { +void ebpf_checker::operator()(const ValidStore& s) { if (!type_inv.implies_type(m_inv, type_is_not_stack(reg_pack(s.mem)), type_is_number(s.val))) { require(m_inv, linear_constraint_t::false_const(), "Only numbers can be stored to externally-visible regions"); } } -void ebpf_domain_t::operator()(const TypeConstraint& s) { +void ebpf_checker::operator()(const TypeConstraint& s) { if (!type_inv.is_in_group(m_inv, s.reg, s.types)) { require(m_inv, linear_constraint_t::false_const(), "Invalid type"); } } -void ebpf_domain_t::operator()(const BoundedLoopCount& s) { +void ebpf_checker::operator()(const BoundedLoopCount& s) { // Enforces an upper bound on loop iterations by checking that the loop counter // does not exceed the specified limit using namespace crab::dsl_syntax; @@ -1454,7 +1449,7 @@ void ebpf_domain_t::operator()(const BoundedLoopCount& s) { require(m_inv, counter <= s.limit, "Loop counter is too large"); } -void ebpf_domain_t::operator()(const FuncConstraint& s) { +void ebpf_checker::operator()(const FuncConstraint& s) { // Look up the helper function id. const reg_pack_t& reg = reg_pack(s.reg); const auto src_interval = m_inv.eval_interval(reg.svalue); @@ -1476,7 +1471,7 @@ void ebpf_domain_t::operator()(const FuncConstraint& s) { require(m_inv, linear_constraint_t::false_const(), "callx helper function id is not a valid singleton"); } -void ebpf_domain_t::operator()(const ValidSize& s) { +void ebpf_checker::operator()(const ValidSize& s) { using namespace crab::dsl_syntax; const auto r = reg_pack(s.reg); require(m_inv, s.can_be_zero ? r.svalue >= 0 : r.svalue > 0, "Invalid size"); @@ -1598,7 +1593,7 @@ interval_t ebpf_domain_t::get_map_max_entries(const Reg& map_fd_reg) const { return result; } -void ebpf_domain_t::operator()(const ValidCall& s) { +void ebpf_checker::operator()(const ValidCall& s) { if (!s.stack_frame_prefix.empty()) { const EbpfHelperPrototype proto = global_program_info->platform->get_helper_prototype(s.func); if (proto.return_type == EBPF_RETURN_TYPE_INTEGER_OR_NO_RETURN_IF_SUCCEED) { @@ -1608,22 +1603,22 @@ void ebpf_domain_t::operator()(const ValidCall& s) { } } -void ebpf_domain_t::operator()(const ValidMapKeyValue& s) { +void ebpf_checker::operator()(const ValidMapKeyValue& s) { using namespace crab::dsl_syntax; - const auto fd_type = get_map_type(s.map_fd_reg); + const auto fd_type = dom.get_map_type(s.map_fd_reg); const auto access_reg = reg_pack(s.access_reg); int width; if (s.key) { - const auto key_size = get_map_key_size(s.map_fd_reg).singleton(); + const auto key_size = dom.get_map_key_size(s.map_fd_reg).singleton(); if (!key_size.has_value()) { require(m_inv, linear_constraint_t::false_const(), "Map key size is not singleton"); return; } width = key_size->narrow(); } else { - const auto value_size = get_map_value_size(s.map_fd_reg).singleton(); + const auto value_size = dom.get_map_value_size(s.map_fd_reg).singleton(); if (!value_size.has_value()) { require(m_inv, linear_constraint_t::false_const(), "Map value size is not singleton"); return; @@ -1655,7 +1650,7 @@ void ebpf_domain_t::operator()(const ValidMapKeyValue& s) { variable_t key_value = variable_t::cell_var(data_kind_t::svalues, offset.value(), sizeof(uint32_t)); - if (auto max_entries = get_map_max_entries(s.map_fd_reg).lb().number()) { + if (auto max_entries = dom.get_map_max_entries(s.map_fd_reg).lb().number()) { require(inv, key_value < *max_entries, "Array index overflow"); } else { require(inv, linear_constraint_t::false_const(), "Max entries is not finite"); @@ -1688,7 +1683,7 @@ static std::tuple lb_ub_access_pair(co : lb + reg_pack(std::get(s.width)).svalue; return {lb, ub}; } -void ebpf_domain_t::operator()(const ValidAccess& s) { +void ebpf_checker::operator()(const ValidAccess& s) { using namespace crab::dsl_syntax; const bool is_comparison_check = s.width == Value{Imm{0}}; @@ -1763,13 +1758,13 @@ void ebpf_domain_t::operator()(const ValidAccess& s) { }); } -void ebpf_domain_t::operator()(const ZeroCtxOffset& s) { +void ebpf_checker::operator()(const ZeroCtxOffset& s) { using namespace crab::dsl_syntax; const auto reg = reg_pack(s.reg); require(m_inv, reg.ctx_offset == 0, "Nonzero context offset"); } -void ebpf_domain_t::operator()(const Packet& a) { +void ebpf_transformer::operator()(const Packet& a) { const auto reg = reg_pack(R0_RETURN_VALUE); constexpr Reg r0_reg{R0_RETURN_VALUE}; type_inv.assign_type(m_inv, r0_reg, T_NUM); @@ -1779,8 +1774,8 @@ void ebpf_domain_t::operator()(const Packet& a) { scratch_caller_saved_registers(); } -void ebpf_domain_t::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, - const int width, const Reg& src_reg) { +void ebpf_transformer::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, + const int width, const Reg& src_reg) { type_inv.assign_type(inv, target_reg, stack.load(inv, data_kind_t::types, addr, width)); using namespace crab::dsl_syntax; if (inv.entail(width <= reg_pack(src_reg).stack_numeric_size)) { @@ -1819,8 +1814,8 @@ void ebpf_domain_t::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, cons } } -void ebpf_domain_t::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, - const int width) { +void ebpf_transformer::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, + const int width) { using namespace crab::dsl_syntax; if (inv.is_bottom()) { return; @@ -1886,8 +1881,8 @@ void ebpf_domain_t::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const } } -void ebpf_domain_t::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, - const int width) { +void ebpf_transformer::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, + const linear_expression_t& addr, const int width) { if (inv.is_bottom()) { return; } @@ -1906,7 +1901,7 @@ void ebpf_domain_t::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& targe } } -void ebpf_domain_t::do_load(const Mem& b, const Reg& target_reg) { +void ebpf_transformer::do_load(const Mem& b, const Reg& target_reg) { using namespace crab::dsl_syntax; const auto mem_reg = reg_pack(b.access.basereg); @@ -1949,10 +1944,10 @@ void ebpf_domain_t::do_load(const Mem& b, const Reg& target_reg) { }); } -void ebpf_domain_t::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, - const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { +void ebpf_transformer::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, + const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { { const std::optional var = stack.store_type(inv, addr, width, val_type); type_inv.assign_type(inv, var, val_type); @@ -2041,7 +2036,7 @@ void ebpf_domain_t::do_store_stack(NumAbsDomain& inv, const linear_expression_t& } } -void ebpf_domain_t::operator()(const Mem& b) { +void ebpf_transformer::operator()(const Mem& b) { if (m_inv.is_bottom()) { return; } @@ -2058,9 +2053,9 @@ void ebpf_domain_t::operator()(const Mem& b) { } } -void ebpf_domain_t::do_mem_store(const Mem& b, const linear_expression_t& val_type, - const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { +void ebpf_transformer::do_mem_store(const Mem& b, const linear_expression_t& val_type, + const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { if (m_inv.is_bottom()) { return; } @@ -2078,7 +2073,7 @@ void ebpf_domain_t::do_mem_store(const Mem& b, const linear_expression_t& val_ty } m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, const type_encoding_t type) { if (type == T_STACK) { - const auto base_addr = linear_expression_t(get_type_offset_variable(b.access.basereg, type).value()); + const auto base_addr = linear_expression_t(dom.get_type_offset_variable(b.access.basereg, type).value()); do_store_stack(inv, dsl_syntax::operator+(base_addr, offset), width, val_type, val_svalue, val_uvalue, opt_val_reg); } @@ -2101,7 +2096,7 @@ static Bin atomic_to_bin(const Atomic& a) { return bin; } -void ebpf_domain_t::operator()(const Atomic& a) { +void ebpf_transformer::operator()(const Atomic& a) { if (m_inv.is_bottom()) { return; } @@ -2152,7 +2147,7 @@ void ebpf_domain_t::operator()(const Atomic& a) { type_inv.havoc_type(m_inv, r11); } -void ebpf_domain_t::operator()(const Call& call) { +void ebpf_transformer::operator()(const Call& call) { using namespace crab::dsl_syntax; if (m_inv.is_bottom()) { return; @@ -2179,10 +2174,10 @@ void ebpf_domain_t::operator()(const Call& call) { case ArgPair::Kind::PTR_TO_WRITABLE_MEM: { bool store_numbers = true; - auto variable = get_type_offset_variable(param.mem); + auto variable = dom.get_type_offset_variable(param.mem); if (!variable.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Argument must be a pointer to writable memory"); - return; + // checked by the checker + break; } variable_t addr = variable.value(); variable_t width = reg_pack(param.size).svalue; @@ -2219,16 +2214,16 @@ void ebpf_domain_t::operator()(const Call& call) { if (call.is_map_lookup) { // This is the only way to get a null pointer if (maybe_fd_reg) { - if (const auto map_type = get_map_type(*maybe_fd_reg)) { + if (const auto map_type = dom.get_map_type(*maybe_fd_reg)) { if (global_program_info->platform->get_map_type(*map_type).value_type == EbpfMapValueType::MAP) { - if (const auto inner_map_fd = get_map_inner_map_fd(*maybe_fd_reg)) { + if (const auto inner_map_fd = dom.get_map_inner_map_fd(*maybe_fd_reg)) { do_load_mapfd(r0_reg, to_signed(*inner_map_fd), true); goto out; } } else { assign_valid_ptr(r0_reg, true); assign(r0_pack.shared_offset, 0); - m_inv.set(r0_pack.shared_region_size, get_map_value_size(*maybe_fd_reg)); + m_inv.set(r0_pack.shared_region_size, dom.get_map_value_size(*maybe_fd_reg)); type_inv.assign_type(m_inv, r0_reg, T_SHARED); } } @@ -2250,7 +2245,7 @@ void ebpf_domain_t::operator()(const Call& call) { } } -void ebpf_domain_t::operator()(const CallLocal& call) { +void ebpf_transformer::operator()(const CallLocal& call) { using namespace crab::dsl_syntax; if (m_inv.is_bottom()) { return; @@ -2262,7 +2257,7 @@ void ebpf_domain_t::operator()(const CallLocal& call) { add(r10_reg, -EBPF_SUBPROGRAM_STACK_SIZE, 64); } -void ebpf_domain_t::operator()(const Callx& callx) { +void ebpf_transformer::operator()(const Callx& callx) { using namespace crab::dsl_syntax; if (m_inv.is_bottom()) { return; @@ -2284,7 +2279,7 @@ void ebpf_domain_t::operator()(const Callx& callx) { } } -void ebpf_domain_t::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { +void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { const EbpfMapDescriptor& desc = global_program_info->platform->get_map_descriptor(mapfd); const EbpfMapType& type = global_program_info->platform->get_map_type(desc.type); if (type.value_type == EbpfMapValueType::PROGRAM) { @@ -2297,9 +2292,9 @@ void ebpf_domain_t::do_load_mapfd(const Reg& dst_reg, const int mapfd, const boo assign_valid_ptr(dst_reg, maybe_null); } -void ebpf_domain_t::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } +void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } -void ebpf_domain_t::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { +void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { using namespace crab::dsl_syntax; const reg_pack_t& reg = reg_pack(dst_reg); havoc(reg.svalue); @@ -2315,7 +2310,7 @@ void ebpf_domain_t::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) // If nothing is known of the stack_numeric_size, // try to recompute the stack_numeric_size. -void ebpf_domain_t::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { const variable_t stack_numeric_size_variable = variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); @@ -2332,13 +2327,13 @@ void ebpf_domain_t::recompute_stack_numeric_size(NumAbsDomain& inv, const variab } } -void ebpf_domain_t::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { recompute_stack_numeric_size(inv, reg_pack(reg).type); } -void ebpf_domain_t::add(const Reg& reg, const int imm, const int finite_width) { +void ebpf_transformer::add(const Reg& reg, const int imm, const int finite_width) { const auto dst = reg_pack(reg); - const auto offset = get_type_offset_variable(reg); + const auto offset = dom.get_type_offset_variable(reg); add_overflow(dst.svalue, dst.uvalue, imm, finite_width); if (offset.has_value()) { add(offset.value(), imm); @@ -2353,7 +2348,7 @@ void ebpf_domain_t::add(const Reg& reg, const int imm, const int finite_width) { } } -void ebpf_domain_t::shl(const Reg& dst_reg, int imm, const int finite_width) { +void ebpf_transformer::shl(const Reg& dst_reg, int imm, const int finite_width) { const reg_pack_t dst = reg_pack(dst_reg); // The BPF ISA requires masking the imm. @@ -2391,7 +2386,7 @@ void ebpf_domain_t::shl(const Reg& dst_reg, int imm, const int finite_width) { havoc_offsets(dst_reg); } -void ebpf_domain_t::lshr(const Reg& dst_reg, int imm, int finite_width) { +void ebpf_transformer::lshr(const Reg& dst_reg, int imm, int finite_width) { reg_pack_t dst = reg_pack(dst_reg); // The BPF ISA requires masking the imm. @@ -2438,8 +2433,8 @@ static int _movsx_bits(const Bin::Op op) { } } -void ebpf_domain_t::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, - const Bin::Op op) { +void ebpf_transformer::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, + const Bin::Op op) { using namespace crab; const int bits = _movsx_bits(op); @@ -2474,7 +2469,7 @@ void ebpf_domain_t::sign_extend(const Reg& dst_reg, const linear_expression_t& r } } -void ebpf_domain_t::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { +void ebpf_transformer::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { using namespace crab; reg_pack_t dst = reg_pack(dst_reg); @@ -2521,7 +2516,7 @@ static void apply(NumAbsDomain& inv, const binop_t& op, const variable_t x, cons inv.apply(op, x, y, z, 0); } -void ebpf_domain_t::operator()(const Bin& bin) { +void ebpf_transformer::operator()(const Bin& bin) { using namespace crab::dsl_syntax; auto dst = reg_pack(bin.dst); @@ -2630,9 +2625,9 @@ void ebpf_domain_t::operator()(const Bin& bin) { if (dst_type == T_NUM && src_type != T_NUM) { // num += ptr type_inv.assign_type(inv, bin.dst, src_type); - if (const auto dst_offset = get_type_offset_variable(bin.dst, src_type)) { + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, src_type)) { crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst.svalue, - get_type_offset_variable(src_reg, src_type).value()); + dom.get_type_offset_variable(src_reg, src_type).value()); } if (src_type == T_SHARED) { inv.assign(dst.shared_region_size, src.shared_region_size); @@ -2640,7 +2635,7 @@ void ebpf_domain_t::operator()(const Bin& bin) { } else if (dst_type != T_NUM && src_type == T_NUM) { // ptr += num type_inv.assign_type(inv, bin.dst, dst_type); - if (const auto dst_offset = get_type_offset_variable(bin.dst, dst_type)) { + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, dst_type)) { crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst_offset.value(), src.svalue); if (dst_type == T_STACK) { @@ -2689,9 +2684,9 @@ void ebpf_domain_t::operator()(const Bin& bin) { default: // ptr -= ptr // Assertions should make sure we only perform this on non-shared pointers. - if (const auto dst_offset = get_type_offset_variable(bin.dst, type)) { + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, type)) { apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst_offset.value(), - get_type_offset_variable(src_reg, type).value(), finite_width); + dom.get_type_offset_variable(src_reg, type).value(), finite_width); inv -= dst_offset.value(); } crab::havoc_offsets(inv, bin.dst); @@ -2709,7 +2704,7 @@ void ebpf_domain_t::operator()(const Bin& bin) { havoc_offsets(bin.dst); } else { sub_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); - if (auto dst_offset = get_type_offset_variable(bin.dst)) { + if (auto dst_offset = dom.get_type_offset_variable(bin.dst)) { sub(dst_offset.value(), src.svalue); if (type_inv.has_type(m_inv, dst.type, T_STACK)) { // Reduce the numeric size. @@ -2897,7 +2892,7 @@ std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom) { return o; } -void ebpf_domain_t::initialize_packet(ebpf_domain_t& inv) { +void ebpf_transformer::initialize_packet(ebpf_domain_t& inv) { using namespace crab::dsl_syntax; inv -= variable_t::packet_size(); @@ -2910,14 +2905,14 @@ void ebpf_domain_t::initialize_packet(ebpf_domain_t& inv) { inv += variable_t::meta_offset() <= 0; inv += variable_t::meta_offset() >= -4098; } else { - inv.assign(variable_t::meta_offset(), 0); + ebpf_transformer{inv}.assign(variable_t::meta_offset(), 0); } } ebpf_domain_t ebpf_domain_t::from_constraints(const std::set& constraints, const bool setup_constraints) { ebpf_domain_t inv; if (setup_constraints) { - inv = setup_entry(false); + inv = ebpf_transformer::setup_entry(false); } auto numeric_ranges = std::vector(); for (const auto& cst : parse_linear_constraints(constraints, numeric_ranges)) { @@ -2932,15 +2927,15 @@ ebpf_domain_t ebpf_domain_t::from_constraints(const std::set& const return inv; } -ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { +ebpf_domain_t ebpf_transformer::setup_entry(const bool init_r1) { using namespace crab::dsl_syntax; ebpf_domain_t inv; const auto r10 = reg_pack(R10_STACK_POINTER); constexpr Reg r10_reg{R10_STACK_POINTER}; - inv += EBPF_TOTAL_STACK_SIZE <= r10.svalue; - inv += r10.svalue <= PTR_MAX; - inv.assign(r10.stack_offset, EBPF_TOTAL_STACK_SIZE); + inv.m_inv += EBPF_TOTAL_STACK_SIZE <= r10.svalue; + inv.m_inv += r10.svalue <= PTR_MAX; + inv.m_inv.assign(r10.stack_offset, EBPF_TOTAL_STACK_SIZE); // stack_numeric_size would be 0, but TOP has the same result // so no need to assign it. inv.type_inv.assign_type(inv.m_inv, r10_reg, T_STACK); @@ -2948,9 +2943,9 @@ ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { if (init_r1) { const auto r1 = reg_pack(R1_ARG); constexpr Reg r1_reg{R1_ARG}; - inv += 1 <= r1.svalue; - inv += r1.svalue <= PTR_MAX; - inv.assign(r1.ctx_offset, 0); + inv.m_inv += 1 <= r1.svalue; + inv.m_inv += r1.svalue <= PTR_MAX; + inv.m_inv.assign(r1.ctx_offset, 0); inv.type_inv.assign_type(inv.m_inv, r1_reg, T_CTX); } @@ -2958,7 +2953,7 @@ ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { return inv; } -void ebpf_domain_t::initialize_loop_counter(const label_t& label) { +void ebpf_transformer::initialize_loop_counter(const label_t& label) { m_inv.assign(variable_t::loop_counter(to_string(label)), 0); } @@ -2970,7 +2965,7 @@ extended_number ebpf_domain_t::get_loop_count_upper_bound() const { return ub; } -void ebpf_domain_t::operator()(const IncrementLoopCounter& ins) { +void ebpf_transformer::operator()(const IncrementLoopCounter& ins) { const auto counter = variable_t::loop_counter(to_string(ins.name)); this->add(counter, 1); } diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index 91605ec3f..880830a93 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -16,6 +16,11 @@ namespace crab { class ebpf_domain_t final { + friend class ebpf_checker; + friend class ebpf_transformer; + + friend std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom); + public: ebpf_domain_t(); ebpf_domain_t(NumAbsDomain inv, domains::array_domain_t stack); @@ -41,31 +46,58 @@ class ebpf_domain_t final { ebpf_domain_t widening_thresholds(const ebpf_domain_t& other, const thresholds_t& ts); ebpf_domain_t narrow(const ebpf_domain_t& other) const; - typedef bool check_require_func_t(NumAbsDomain&, const linear_constraint_t&, std::string); - void set_require_check(std::function f); + static ebpf_domain_t calculate_constant_limits(); extended_number get_loop_count_upper_bound() const; - static ebpf_domain_t setup_entry(bool init_r1); static ebpf_domain_t from_constraints(const std::set& constraints, bool setup_constraints); string_invariant to_set() const; - // abstract transformers - void operator()(const basic_block_t& bb); + private: + // private generic domain functions + void operator+=(const linear_constraint_t& cst); + void operator-=(variable_t var); - void operator()(const Assume&); - void operator()(const Bin&); - void operator()(const Call&); - void operator()(const CallLocal&); - void operator()(const Callx&); - void operator()(const Exit&); - void operator()(const Jmp&) const; - void operator()(const LoadMapFd&); - void operator()(const Atomic&); - void operator()(const Mem&); - void operator()(const Packet&); - void operator()(const Un&); - void operator()(const Undefined&); - void operator()(const IncrementLoopCounter&); + [[nodiscard]] + std::optional get_map_type(const Reg& map_fd_reg) const; + [[nodiscard]] + std::optional get_map_inner_map_fd(const Reg& map_fd_reg) const; + [[nodiscard]] + interval_t get_map_key_size(const Reg& map_fd_reg) const; + [[nodiscard]] + interval_t get_map_value_size(const Reg& map_fd_reg) const; + [[nodiscard]] + interval_t get_map_max_entries(const Reg& map_fd_reg) const; + + static std::optional get_type_offset_variable(const Reg& reg, int type); + [[nodiscard]] + std::optional get_type_offset_variable(const Reg& reg, const NumAbsDomain& inv) const; + [[nodiscard]] + std::optional get_type_offset_variable(const Reg& reg) const; + + bool get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const; + + /// Mapping from variables (including registers, types, offsets, + /// memory locations, etc.) to numeric intervals or relationships + /// to other variables. + NumAbsDomain m_inv; + + /// Represents the stack as a memory region, i.e., an array of bytes, + /// allowing mapping to variable in the m_inv numeric domains + /// while dealing with overlapping byte ranges. + domains::array_domain_t stack; + + TypeDomain type_inv; +}; + +class ebpf_checker final { + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; + + public: + explicit ebpf_checker(ebpf_domain_t& dom) : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} void operator()(const Assertion&); @@ -82,14 +114,54 @@ class ebpf_domain_t final { void operator()(const ZeroCtxOffset&); void operator()(const BoundedLoopCount&); - void initialize_loop_counter(const label_t& label); - static ebpf_domain_t calculate_constant_limits(); + typedef bool check_require_func_t(NumAbsDomain&, const linear_constraint_t&, std::string); + void set_require_check(std::function f); private: - // private generic domain functions - void operator+=(const linear_constraint_t& cst); - void operator-=(variable_t var); + // memory check / load / store + void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; + void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; + void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + std::optional packet_size) const; + void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + variable_t shared_region_size) const; + std::function check_require{}; + std::string current_assertion; + void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const; +}; + +class ebpf_transformer final { + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; + + public: + explicit ebpf_transformer(ebpf_domain_t& dom) + : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} + + // abstract transformers + void operator()(const Assume&); + void operator()(const Bin&); + void operator()(const Call&); + void operator()(const CallLocal&); + void operator()(const Callx&); + void operator()(const Exit&); + void operator()(const Jmp&) const; + void operator()(const LoadMapFd&); + void operator()(const Atomic&); + void operator()(const Mem&); + void operator()(const Packet&); + void operator()(const Un&); + void operator()(const Undefined&); + void operator()(const IncrementLoopCounter&); + void initialize_loop_counter(const label_t& label); + + static ebpf_domain_t setup_entry(bool init_r1); + + private: void assign(variable_t lhs, variable_t rhs); void assign(variable_t x, const linear_expression_t& e); void assign(variable_t x, int64_t e); @@ -143,41 +215,15 @@ class ebpf_domain_t final { /// Forget everything about all offset variables for a given register. void havoc_offsets(const Reg& reg); - static std::optional get_type_offset_variable(const Reg& reg, int type); - [[nodiscard]] - std::optional get_type_offset_variable(const Reg& reg, const NumAbsDomain& inv) const; - [[nodiscard]] - std::optional get_type_offset_variable(const Reg& reg) const; - void scratch_caller_saved_registers(); void save_callee_saved_registers(const std::string& prefix); void restore_callee_saved_registers(const std::string& prefix); void havoc_subprogram_stack(const std::string& prefix); - [[nodiscard]] - std::optional get_map_type(const Reg& map_fd_reg) const; - [[nodiscard]] - std::optional get_map_inner_map_fd(const Reg& map_fd_reg) const; - [[nodiscard]] - interval_t get_map_key_size(const Reg& map_fd_reg) const; - [[nodiscard]] - interval_t get_map_value_size(const Reg& map_fd_reg) const; - [[nodiscard]] - interval_t get_map_max_entries(const Reg& map_fd_reg) const; void forget_packet_pointers(); void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); - void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const; - - // memory check / load / store - void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; - void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; - void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - std::optional packet_size) const; - void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - variable_t shared_region_size) const; - void recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const; void recompute_stack_numeric_size(NumAbsDomain& inv, variable_t type_variable) const; void do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width, @@ -193,26 +239,7 @@ class ebpf_domain_t final { void do_mem_store(const Mem& b, const linear_expression_t& val_type, const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); - friend std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom); - static void initialize_packet(ebpf_domain_t& inv); - - private: - /// Mapping from variables (including registers, types, offsets, - /// memory locations, etc.) to numeric intervals or relationships - /// to other variables. - NumAbsDomain m_inv; - - /// Represents the stack as a memory region, i.e., an array of bytes, - /// allowing mapping to variable in the m_inv numeric domains - /// while dealing with overlapping byte ranges. - domains::array_domain_t stack; - - std::function check_require{}; - bool get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const; - - TypeDomain type_inv; - std::string current_assertion; }; // end ebpf_domain_t } // namespace crab diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index 992ae1964..9486f5012 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -69,7 +69,10 @@ class interleaved_fwd_fixpoint_iterator_t final { void transform_to_post(const label_t& label, ebpf_domain_t pre) { const basic_block_t& bb = _cfg.get_node(label); - pre(bb); + + for (const GuardedInstruction& ins : bb) { + std::visit(ebpf_transformer{pre}, ins.cmd); + }; _post[label] = std::move(pre); } @@ -129,7 +132,8 @@ std::pair run_forward_analyzer(const cfg_t // This enables enforcement of upper bounds on loop iterations // during program verification. // TODO: Consider making this an instruction instead of an explicit call. - analyzer._wto.for_each_loop_head([&](const label_t& label) { entry_inv.initialize_loop_counter(label); }); + analyzer._wto.for_each_loop_head( + [&](const label_t& label) { ebpf_transformer{entry_inv}.initialize_loop_counter(label); }); } analyzer.set_pre(cfg.entry_label(), entry_inv); for (const auto& component : analyzer._wto) { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index a5af14653..43e7afb43 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -62,7 +62,9 @@ static checks_db generate_report(const cfg_t& cfg, const crab::invariant_table_t for (const label_t& label : cfg.sorted_labels()) { const basic_block_t& bb = cfg.get_node(label); ebpf_domain_t from_inv(pre_invariants.at(label)); - from_inv.set_require_check( + const bool pre_bot = from_inv.is_bottom(); + crab::ebpf_checker checker{from_inv}; + checker.set_require_check( [&m_db, label](auto& inv, const crab::linear_constraint_t& cst, const std::string& s) { if (inv.is_bottom()) { return true; @@ -85,9 +87,12 @@ static checks_db generate_report(const cfg_t& cfg, const crab::invariant_table_t } }); - const bool pre_bot = from_inv.is_bottom(); - - from_inv(bb); + for (const GuardedInstruction& ins : bb) { + for (const Assertion& assertion : ins.preconditions) { + checker(assertion); + } + std::visit(crab::ebpf_transformer{from_inv}, ins.cmd); + }; if (!pre_bot && from_inv.is_bottom()) { m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(bb.label())); @@ -172,7 +177,7 @@ static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info try { // Get dictionaries of pre-invariants and post-invariants for each basic block. - ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); + ebpf_domain_t entry_dom = crab::ebpf_transformer::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { diff --git a/src/main/check.cpp b/src/main/check.cpp index ef6241418..81d224de1 100644 --- a/src/main/check.cpp +++ b/src/main/check.cpp @@ -38,7 +38,7 @@ static const std::map _conformance_groups {"callx", bpf_conformance_groups_t::callx}, {"divmul32", bpf_conformance_groups_t::divmul32}, {"divmul64", bpf_conformance_groups_t::divmul64}, {"packet", bpf_conformance_groups_t::packet}}; -static std::optional _get_conformance_group_by_name(std::string group) { +static std::optional _get_conformance_group_by_name(const std::string& group) { if (!_conformance_groups.contains(group)) { return {}; } diff --git a/test-data/jump.yaml b/test-data/jump.yaml index 12319afb5..772b6c9ec 100644 --- a/test-data/jump.yaml +++ b/test-data/jump.yaml @@ -760,7 +760,10 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0:1: Code is unreachable after 0:1" @@ -782,7 +785,10 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "2: Code is unreachable after 2" @@ -803,7 +809,10 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "2: Code is unreachable after 2" @@ -873,22 +882,21 @@ pre: code: : | - if r1 <= 0 goto + if r1 > 0 goto r0 = 1 : | exit post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - r1.ctx_offset=0 - r1.svalue=[1, 2147418112] - r1.type=ctx - r1.uvalue=[1, +oo] messages: - - "0:2: Code is unreachable after 0:2" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JGE with imm 0 and pointer @@ -900,22 +908,21 @@ pre: code: : | - if r1 <= 0 goto + if r1 >= 0 goto r0 = 1 : | exit post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - r1.ctx_offset=0 - r1.svalue=[1, 2147418112] - r1.type=ctx - - r1.uvalue=[1, +oo] + - r1.uvalue=[0, +oo] messages: - - "0:2: Code is unreachable after 0:2" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSLT with imm 0 and pointer @@ -932,10 +939,17 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" + - "0:2: Code is unreachable after 0:2" - "0: Invalid type (r1.type == number)" --- @@ -953,11 +967,18 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:2: Code is unreachable after 0:2" --- test-case: JSGT with imm 0 and pointer @@ -969,16 +990,22 @@ pre: code: : | - if r1 s<= 0 goto + if r1 s> 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSGE with imm 0 and pointer @@ -990,16 +1017,22 @@ pre: code: : | - if r1 s<= 0 goto + if r1 s>= 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JEQ32 with imm 0 and pointer @@ -1016,11 +1049,18 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:2: Code is unreachable after 0:2" --- test-case: JNE32 with imm 0 and pointer @@ -1037,11 +1077,17 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSET32 with imm 0 and pointer @@ -1058,11 +1104,16 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JNSET32 with imm 0 and pointer @@ -1079,11 +1130,16 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JLT32 with imm 0 and pointer @@ -1100,11 +1156,18 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:2: Code is unreachable after 0:2" --- test-case: JLE32 with imm 0 and pointer @@ -1121,11 +1184,16 @@ code: : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JGT32 with imm 0 and pointer @@ -1137,16 +1205,21 @@ pre: code: : | - if w1 <= 0 goto + if w1 > 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JGE32 with imm 0 and pointer @@ -1158,16 +1231,22 @@ pre: code: : | - if w1 <= 0 goto + if w1 >= 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSLT32 with imm 0 and pointer @@ -1184,11 +1263,18 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:2: Code is unreachable after 0:2" --- test-case: JSLE32 with imm 0 and pointer @@ -1205,11 +1291,18 @@ code: : | exit -post: [] +post: + - r0.svalue=1 + - r0.type=number + - r0.uvalue=1 + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:2: Code is unreachable after 0:2" --- test-case: JSGT32 with imm 0 and pointer @@ -1221,16 +1314,22 @@ pre: code: : | - if w1 s<= 0 goto + if w1 s> 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSGE32 with imm 0 and pointer @@ -1242,13 +1341,19 @@ pre: code: : | - if w1 s<= 0 goto + if w1 s>= 0 goto r0 = 1 : | exit -post: [] +post: + - r1.ctx_offset=0 + - r1.svalue=[1, 2147418112] + - r1.type=ctx messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" From 6b54213e661b8eef6e47d678469f2e10f8eb82a5 Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Sun, 10 Nov 2024 05:00:20 +0200 Subject: [PATCH 6/8] move checker and transformer to different cpp files Signed-off-by: Elazar Gershuni --- src/crab/ebpf_checker.cpp | 444 ++++++ src/crab/ebpf_domain.cpp | 2801 ++------------------------------- src/crab/ebpf_domain.hpp | 174 +- src/crab/ebpf_transformer.cpp | 2430 ++++++++++++++++++++++++++++ src/crab/fwd_analyzer.cpp | 10 +- src/crab_verifier.cpp | 35 +- test-data/jump.yaml | 157 +- test-data/packet.yaml | 29 + 8 files changed, 3052 insertions(+), 3028 deletions(-) create mode 100644 src/crab/ebpf_checker.cpp create mode 100644 src/crab/ebpf_transformer.cpp diff --git a/src/crab/ebpf_checker.cpp b/src/crab/ebpf_checker.cpp new file mode 100644 index 000000000..3ec0ed85f --- /dev/null +++ b/src/crab/ebpf_checker.cpp @@ -0,0 +1,444 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT + +// This file is eBPF-specific, not derived from CRAB. + +#include +#include +#include + +#include "asm_ostream.hpp" +#include "asm_unmarshal.hpp" +#include "config.hpp" +#include "crab/array_domain.hpp" +#include "crab/ebpf_domain.hpp" +#include "crab_utils/num_safety.hpp" +#include "dsl_syntax.hpp" +#include "platform.hpp" +#include "string_constraints.hpp" + +using crab::domains::NumAbsDomain; +namespace crab { + +static bool check_require(const NumAbsDomain& inv, const linear_constraint_t& cst) { + if (inv.is_bottom()) { + return true; + } + if (cst.is_contradiction()) { + return false; + } + if (inv.entail(cst)) { + // XXX: add_redundant(s); + return true; + } + if (inv.intersect(cst)) { + // XXX: add_error() if imply negation + return false; + } + return false; +} + +class ebpf_checker final { + public: + explicit ebpf_checker(ebpf_domain_t& dom, const Assertion& assertion, const std::optional& label = {}) + : assertion{assertion}, label{label}, dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} + + void visit(const Assertion& assertion) { std::visit(*this, assertion); } + + void operator()(const Addable&); + void operator()(const BoundedLoopCount&); + void operator()(const Comparable&); + void operator()(const FuncConstraint&); + void operator()(const ValidDivisor&); + void operator()(const TypeConstraint&); + void operator()(const ValidAccess&); + void operator()(const ValidCall&); + void operator()(const ValidMapKeyValue&); + void operator()(const ValidSize&); + void operator()(const ValidStore&); + void operator()(const ZeroCtxOffset&); + + private: + std::string create_warning(const std::string& s) const { return s + " (" + to_string(assertion) + ")"; } + + void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& msg) { + if (label && !check_require(inv, cst)) { + warnings.push_back(create_warning(msg)); + } + + if (thread_local_options.assume_assertions) { + // avoid redundant errors + inv += cst; + } + } + + void require(const std::string& msg) { + if (label) { + warnings.push_back(create_warning(msg)); + } + if (thread_local_options.assume_assertions) { + m_inv.set_to_bottom(); + } + } + + // memory check / load / store + void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + std::optional packet_size); + void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + variable_t shared_region_size); + + public: + std::vector warnings; + + private: + const Assertion& assertion; + const std::optional label; + + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; +}; + +void ebpf_domain_assume(ebpf_domain_t& dom, const Assertion& assertion) { + if (dom.is_bottom()) { + return; + } + ebpf_checker{dom, assertion}.visit(assertion); +} + +std::vector ebpf_domain_check(ebpf_domain_t& dom, const label_t& label, const Assertion& assertion) { + if (dom.is_bottom()) { + return {}; + } + ebpf_checker checker{dom, assertion, label}; + checker.visit(assertion); + return std::move(checker.warnings); +} + +static linear_constraint_t type_is_pointer(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type >= T_CTX; +} + +static linear_constraint_t type_is_number(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type == T_NUM; +} + +static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } + +static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type != T_STACK; +} + +void ebpf_checker::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) { + using namespace crab::dsl_syntax; + const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; + const auto interval = inv.eval_interval(r10_stack_offset); + if (interval.is_singleton()) { + const int64_t stack_offset = interval.singleton()->cast_to(); + require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE, + "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE"); + } + require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); +} + +void ebpf_checker::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, + const linear_expression_t& ub) { + using namespace crab::dsl_syntax; + require(inv, lb >= 0, "Lower bound must be at least 0"); + require(inv, ub <= global_program_info->type.context_descriptor->size, + std::string("Upper bound must be at most ") + + std::to_string(global_program_info->type.context_descriptor->size)); +} + +void ebpf_checker::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const std::optional packet_size) { + using namespace crab::dsl_syntax; + require(inv, lb >= variable_t::meta_offset(), "Lower bound must be at least meta_offset"); + if (packet_size) { + require(inv, ub <= *packet_size, "Upper bound must be at most packet_size"); + } else { + require(inv, ub <= MAX_PACKET_SIZE, + std::string{"Upper bound must be at most "} + std::to_string(MAX_PACKET_SIZE)); + } +} + +void ebpf_checker::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const variable_t shared_region_size) { + using namespace crab::dsl_syntax; + require(inv, lb >= 0, "Lower bound must be at least 0"); + require(inv, ub <= shared_region_size, std::string("Upper bound must be at most ") + shared_region_size.name()); +} + +void ebpf_checker::operator()(const Comparable& s) { + using namespace crab::dsl_syntax; + if (type_inv.same_type(m_inv, s.r1, s.r2)) { + // Same type. If both are numbers, that's okay. Otherwise: + const auto inv = m_inv.when(reg_pack(s.r2).type != T_NUM); + // We must check that they belong to a singleton region: + if (!type_inv.is_in_group(inv, s.r1, TypeGroup::singleton_ptr) && + !type_inv.is_in_group(inv, s.r1, TypeGroup::map_fd)) { + require("Cannot subtract pointers to non-singleton regions"); + return; + } + // And, to avoid wraparound errors, they must be within bounds. + this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r1, 0, Imm{0}, false}); + this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r2, 0, Imm{0}, false}); + } else { + // _Maybe_ different types, so r2 must be a number. + // We checked in a previous assertion that r1 is a pointer or a number. + require(m_inv, reg_pack(s.r2).type == T_NUM, "Cannot subtract pointers to different regions"); + }; +} + +void ebpf_checker::operator()(const Addable& s) { + if (!type_inv.implies_type(m_inv, type_is_pointer(reg_pack(s.ptr)), type_is_number(s.num))) { + require("Only numbers can be added to pointers"); + } +} + +void ebpf_checker::operator()(const ValidDivisor& s) { + using namespace crab::dsl_syntax; + const auto reg = reg_pack(s.reg); + if (!type_inv.implies_type(m_inv, type_is_pointer(reg), type_is_number(s.reg))) { + require("Only numbers can be used as divisors"); + } + if (!thread_local_options.allow_division_by_zero) { + const auto v = s.is_signed ? reg.svalue : reg.uvalue; + require(m_inv, v != 0, "Possible division by zero"); + } +} + +void ebpf_checker::operator()(const ValidStore& s) { + if (!type_inv.implies_type(m_inv, type_is_not_stack(reg_pack(s.mem)), type_is_number(s.val))) { + require("Only numbers can be stored to externally-visible regions"); + } +} + +void ebpf_checker::operator()(const TypeConstraint& s) { + if (!type_inv.is_in_group(m_inv, s.reg, s.types)) { + require("Invalid type"); + } +} + +void ebpf_checker::operator()(const BoundedLoopCount& s) { + // Enforces an upper bound on loop iterations by checking that the loop counter + // does not exceed the specified limit + using namespace crab::dsl_syntax; + const auto counter = variable_t::loop_counter(to_string(s.name)); + require(m_inv, counter <= s.limit, "Loop counter is too large"); +} + +void ebpf_checker::operator()(const FuncConstraint& s) { + // Look up the helper function id. + const reg_pack_t& reg = reg_pack(s.reg); + const auto src_interval = m_inv.eval_interval(reg.svalue); + if (const auto sn = src_interval.singleton()) { + if (sn->fits()) { + // We can now process it as if the id was immediate. + const int32_t imm = sn->cast_to(); + if (!global_program_info->platform->is_helper_usable(imm)) { + require("invalid helper function id " + std::to_string(imm)); + return; + } + const Call call = make_call(imm, *global_program_info->platform); + for (const Assertion& sub_assertion : get_assertions(call, *global_program_info, {})) { + // TODO: create explicit sub assertions elsewhere + ebpf_checker sub_checker{dom, sub_assertion, label}; + sub_checker.visit(sub_assertion); + for (const auto& warning : sub_checker.warnings) { + warnings.push_back(warning); + } + } + return; + } + } + require("callx helper function id is not a valid singleton"); +} + +void ebpf_checker::operator()(const ValidSize& s) { + using namespace crab::dsl_syntax; + const auto r = reg_pack(s.reg); + require(m_inv, s.can_be_zero ? r.svalue >= 0 : r.svalue > 0, "Invalid size"); +} + +void ebpf_checker::operator()(const ValidCall& s) { + if (!s.stack_frame_prefix.empty()) { + const EbpfHelperPrototype proto = global_program_info->platform->get_helper_prototype(s.func); + if (proto.return_type == EBPF_RETURN_TYPE_INTEGER_OR_NO_RETURN_IF_SUCCEED) { + require("tail call not supported in subprogram"); + return; + } + } +} + +void ebpf_checker::operator()(const ValidMapKeyValue& s) { + using namespace crab::dsl_syntax; + + const auto fd_type = dom.get_map_type(s.map_fd_reg); + + const auto access_reg = reg_pack(s.access_reg); + int width; + if (s.key) { + const auto key_size = dom.get_map_key_size(s.map_fd_reg).singleton(); + if (!key_size.has_value()) { + require("Map key size is not singleton"); + return; + } + width = key_size->narrow(); + } else { + const auto value_size = dom.get_map_value_size(s.map_fd_reg).singleton(); + if (!value_size.has_value()) { + require("Map value size is not singleton"); + return; + } + width = value_size->narrow(); + } + + m_inv = type_inv.join_over_types(m_inv, s.access_reg, [&](NumAbsDomain& inv, type_encoding_t access_reg_type) { + if (access_reg_type == T_STACK) { + variable_t lb = access_reg.stack_offset; + linear_expression_t ub = lb + width; + if (!stack.all_num(inv, lb, ub)) { + auto lb_is = inv[lb].lb().number(); + std::string lb_s = lb_is && lb_is->fits() ? std::to_string(lb_is->narrow()) : "-oo"; + auto ub_is = inv.eval_interval(ub).ub().number(); + std::string ub_s = ub_is && ub_is->fits() ? std::to_string(ub_is->narrow()) : "oo"; + require(inv, linear_constraint_t::false_const(), + "Illegal map update with a non-numerical value [" + lb_s + "-" + ub_s + ")"); + } else if (thread_local_options.strict && fd_type.has_value()) { + EbpfMapType map_type = global_program_info->platform->get_map_type(*fd_type); + if (map_type.is_array) { + // Get offset value. + variable_t key_ptr = access_reg.stack_offset; + std::optional offset = inv[key_ptr].singleton(); + if (!offset.has_value()) { + require("Pointer must be a singleton"); + } else if (s.key) { + // Look up the value pointed to by the key pointer. + variable_t key_value = + variable_t::cell_var(data_kind_t::svalues, offset.value(), sizeof(uint32_t)); + + if (auto max_entries = dom.get_map_max_entries(s.map_fd_reg).lb().number()) { + require(inv, key_value < *max_entries, "Array index overflow"); + } else { + require("Max entries is not finite"); + } + require(inv, key_value >= 0, "Array index underflow"); + } + } + } + } else if (access_reg_type == T_PACKET) { + variable_t lb = access_reg.packet_offset; + linear_expression_t ub = lb + width; + check_access_packet(inv, lb, ub, {}); + // Packet memory is both readable and writable. + } else if (access_reg_type == T_SHARED) { + variable_t lb = access_reg.shared_offset; + linear_expression_t ub = lb + width; + check_access_shared(inv, lb, ub, access_reg.shared_region_size); + require(inv, access_reg.svalue > 0, "Possible null access"); + // Shared memory is zero-initialized when created so is safe to read and write. + } else { + require("Only stack or packet can be used as a parameter"); + } + }); +} + +static std::tuple lb_ub_access_pair(const ValidAccess& s, + const variable_t offset_var) { + using namespace crab::dsl_syntax; + linear_expression_t lb = offset_var + s.offset; + linear_expression_t ub = std::holds_alternative(s.width) ? lb + std::get(s.width).v + : lb + reg_pack(std::get(s.width)).svalue; + return {lb, ub}; +} + +void ebpf_checker::operator()(const ValidAccess& s) { + using namespace crab::dsl_syntax; + + const bool is_comparison_check = s.width == Value{Imm{0}}; + + const auto reg = reg_pack(s.reg); + // join_over_types instead of simple iteration is only needed for assume-assert + m_inv = type_inv.join_over_types(m_inv, s.reg, [&](NumAbsDomain& inv, type_encoding_t type) { + switch (type) { + case T_PACKET: { + auto [lb, ub] = lb_ub_access_pair(s, reg.packet_offset); + check_access_packet(inv, lb, ub, + is_comparison_check ? std::optional{} : variable_t::packet_size()); + // if within bounds, it can never be null + // Context memory is both readable and writable. + break; + } + case T_STACK: { + auto [lb, ub] = lb_ub_access_pair(s, reg.stack_offset); + check_access_stack(inv, lb, ub); + // if within bounds, it can never be null + if (s.access_type == AccessType::read) { + // Require that the stack range contains numbers. + if (!stack.all_num(inv, lb, ub)) { + if (s.offset < 0) { + require("Stack content is not numeric"); + } else if (const auto pimm = std::get_if(&s.width)) { + if (!inv.entail(gsl::narrow(pimm->v) <= reg.stack_numeric_size - s.offset)) { + require("Stack content is not numeric"); + } + } else { + if (!inv.entail(reg_pack(std::get(s.width)).svalue <= reg.stack_numeric_size - s.offset)) { + require("Stack content is not numeric"); + } + } + } + } + break; + } + case T_CTX: { + auto [lb, ub] = lb_ub_access_pair(s, reg.ctx_offset); + check_access_context(inv, lb, ub); + // if within bounds, it can never be null + // The context is both readable and writable. + break; + } + case T_SHARED: { + auto [lb, ub] = lb_ub_access_pair(s, reg.shared_offset); + check_access_shared(inv, lb, ub, reg.shared_region_size); + if (!is_comparison_check && !s.or_null) { + require(inv, reg.svalue > 0, "Possible null access"); + } + // Shared memory is zero-initialized when created so is safe to read and write. + break; + } + case T_NUM: + if (!is_comparison_check) { + if (s.or_null) { + require(inv, reg.svalue == 0, "Non-null number"); + } else { + require("Only pointers can be dereferenced"); + } + } + break; + case T_MAP: + case T_MAP_PROGRAMS: + if (!is_comparison_check) { + require("FDs cannot be dereferenced directly"); + } + break; + default: require("Invalid type"); break; + } + }); +} + +void ebpf_checker::operator()(const ZeroCtxOffset& s) { + using namespace crab::dsl_syntax; + const auto reg = reg_pack(s.reg); + require(m_inv, reg.ctx_offset == 0, "Nonzero context offset"); +} + +} // namespace crab diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 04cbfa929..8d3ef4f48 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -3,7 +3,6 @@ // This file is eBPF-specific, not derived from CRAB. -#include #include #include #include @@ -15,707 +14,12 @@ #include "config.hpp" #include "crab/array_domain.hpp" #include "crab/ebpf_domain.hpp" -#include "crab_utils/num_safety.hpp" #include "dsl_syntax.hpp" -#include "platform.hpp" #include "string_constraints.hpp" using crab::domains::NumAbsDomain; namespace crab { -constexpr int MAX_PACKET_SIZE = 0xffff; - -// Pointers in the BPF VM are defined to be 64 bits. Some contexts, like -// data, data_end, and meta in Linux's struct xdp_md are only 32 bit offsets -// from a base address not exposed to the program, but when a program is loaded, -// the offsets get replaced with 64-bit address pointers. However, we currently -// need to do pointer arithmetic on 64-bit numbers so for now we cap the interval -// to 32 bits. -constexpr int64_t PTR_MAX = std::numeric_limits::max() - MAX_PACKET_SIZE; - -/** Linear constraint for a pointer comparison. - */ -static linear_constraint_t assume_cst_offsets_reg(const Condition::Op op, const variable_t dst_offset, - const variable_t src_offset) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - switch (op) { - case Op::EQ: return eq(dst_offset, src_offset); - case Op::NE: return neq(dst_offset, src_offset); - case Op::GE: return dst_offset >= src_offset; - case Op::SGE: return dst_offset >= src_offset; // pointer comparison is unsigned - case Op::LE: return dst_offset <= src_offset; - case Op::SLE: return dst_offset <= src_offset; // pointer comparison is unsigned - case Op::GT: return dst_offset > src_offset; - case Op::SGT: return dst_offset > src_offset; // pointer comparison is unsigned - case Op::SLT: return src_offset > dst_offset; - // Note: reverse the test as a workaround strange lookup: - case Op::LT: return src_offset > dst_offset; // FIX unsigned - default: return dst_offset - dst_offset == 0; - } -} - -static std::vector assume_bit_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, - variable_t dst_uvalue, interval_t src_interval) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - - auto dst_interval = inv.eval_interval(dst_uvalue); - std::optional dst_n = dst_interval.singleton(); - if (!dst_n || !dst_n.value().fits_cast_to()) { - return {}; - } - - std::optional src_n = src_interval.singleton(); - if (!src_n || !src_n->fits_cast_to()) { - return {}; - } - uint64_t src_int_value = src_n.value().cast_to(); - if (!is64) { - src_int_value = gsl::narrow_cast(src_int_value); - } - - bool result; - switch (op) { - case Op::SET: result = (dst_n.value().cast_to() & src_int_value) != 0; break; - case Op::NSET: result = (dst_n.value().cast_to() & src_int_value) == 0; break; - default: throw std::exception(); - } - - return {result ? linear_constraint_t::true_const() : linear_constraint_t::false_const()}; -} - -static std::vector assume_signed_64bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, - const variable_t left_uvalue, - const interval_t& right_interval, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using namespace crab::dsl_syntax; - if (right_interval <= interval_t::nonnegative(64) && !right_interval.is_singleton()) { - return {(left_svalue == right_svalue), (left_uvalue == right_uvalue), eq(left_svalue, left_uvalue)}; - } else { - return {(left_svalue == right_svalue), (left_uvalue == right_uvalue)}; - } -} - -static std::vector assume_signed_32bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, - const variable_t left_uvalue, - const interval_t& right_interval) { - using namespace crab::dsl_syntax; - - if (const auto rn = right_interval.singleton()) { - const auto left_svalue_interval = inv.eval_interval(left_svalue); - if (auto size = left_svalue_interval.finite_size()) { - // Find the lowest 64-bit svalue whose low 32 bits match the singleton. - - // Get lower bound as a 64-bit value. - int64_t lb = left_svalue_interval.lb().number()->cast_to(); - - // Use the high 32-bits from the left lower bound and the low 32-bits from the right singleton. - // The result might be lower than the lower bound. - const int64_t lb_match = (lb & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); - if (lb_match < lb) { - // The result is lower than the left interval, so try the next higher matching 64-bit value. - // It's ok if this goes higher than the left upper bound. - lb += 0x100000000; - } - - // Find the highest 64-bit svalue whose low 32 bits match the singleton. - - // Get upper bound as a 64-bit value. - const int64_t ub = left_svalue_interval.ub().number()->cast_to(); - - // Use the high 32-bits from the left upper bound and the low 32-bits from the right singleton. - // The result might be higher than the upper bound. - const int64_t ub_match = (ub & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); - if (ub_match > ub) { - // The result is higher than the left interval, so try the next lower matching 64-bit value. - // It's ok if this goes lower than the left lower bound. - lb -= 0x100000000; - } - - if (to_unsigned(lb_match) <= to_unsigned(ub_match)) { - // The interval is also valid when cast to a uvalue, meaning - // both bounds are positive or both are negative. - return {left_svalue >= lb_match, left_svalue <= ub_match, left_uvalue >= to_unsigned(lb_match), - left_uvalue <= to_unsigned(ub_match)}; - } else { - // The interval can only be represented as an svalue. - return {left_svalue >= lb_match, left_svalue <= ub_match}; - } - } - } - return {}; -} - -// Given left and right values, get the left and right intervals, and also split -// the left interval into separate negative and positive intervals. -static void get_signed_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, - const variable_t left_uvalue, const linear_expression_t& right_svalue, - interval_t& left_interval, interval_t& right_interval, - interval_t& left_interval_positive, interval_t& left_interval_negative) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - // Get intervals as 32-bit or 64-bit as appropriate. - left_interval = inv.eval_interval(left_svalue); - right_interval = inv.eval_interval(right_svalue); - if (!is64) { - if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || - (left_interval <= interval_t::negative(32) && right_interval <= interval_t::negative(32))) { - is64 = true; - // fallthrough as 64bit, including deduction of relational information - } else { - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); - // continue as 32bit - } - } - - if (!left_interval.is_top()) { - left_interval_positive = left_interval & interval_t::nonnegative(64); - left_interval_negative = left_interval & interval_t::negative(64); - } else { - left_interval = inv.eval_interval(left_uvalue); - if (!left_interval.is_top()) { - // The interval is TOP as a signed interval but is represented precisely as an unsigned interval, - // so split into two signed intervals that can be treated separately. - left_interval_positive = left_interval & interval_t::nonnegative(64); - const number_t lih_ub = - left_interval.ub().number() ? left_interval.ub().number()->truncate_to() : -1; - left_interval_negative = interval_t{std::numeric_limits::min(), lih_ub}; - } else { - left_interval_positive = interval_t::nonnegative(64); - left_interval_negative = interval_t::negative(64); - } - } - - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); -} - -// Given left and right values, get the left and right intervals, and also split -// the left interval into separate low and high intervals. -static void get_unsigned_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, - const variable_t left_uvalue, const linear_expression_t& right_uvalue, - interval_t& left_interval, interval_t& right_interval, interval_t& left_interval_low, - interval_t& left_interval_high) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - // Get intervals as 32-bit or 64-bit as appropriate. - left_interval = inv.eval_interval(left_uvalue); - right_interval = inv.eval_interval(right_uvalue); - if (!is64) { - if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || - (left_interval <= interval_t::unsigned_high(32) && right_interval <= interval_t::unsigned_high(32))) { - is64 = true; - // fallthrough as 64bit, including deduction of relational information - } else { - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); - // continue as 32bit - } - } - - if (!left_interval.is_top()) { - left_interval_low = left_interval & interval_t::nonnegative(64); - left_interval_high = left_interval & interval_t::unsigned_high(64); - } else { - left_interval = inv.eval_interval(left_svalue); - if (!left_interval.is_top()) { - // The interval is TOP as an unsigned interval but is represented precisely as a signed interval, - // so split into two unsigned intervals that can be treated separately. - left_interval_low = interval_t(0, left_interval.ub()).truncate_to(); - left_interval_high = interval_t(left_interval.lb(), -1).truncate_to(); - } else { - left_interval_low = interval_t::nonnegative(64); - left_interval_high = interval_t::unsigned_high(64); - } - } - - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); -} - -static std::vector -assume_signed_64bit_lt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, - const interval_t& left_interval_positive, const interval_t& left_interval_negative, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::negative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1]. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(64) && - right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // Interval can only be represented as an svalue. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } -} - -static std::vector -assume_signed_32bit_lt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_positive, - const interval_t& left_interval_negative, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {std::numeric_limits::max() < left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(32) && - right_interval <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX] - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && - inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { - // Interval can only be represented as an svalue. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else { - // We can't directly compare the svalues since they may differ in high order bits. - return {}; - } -} - -static std::vector -assume_signed_64bit_gt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, - const interval_t& left_interval_positive, const interval_t& left_interval_negative, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(64) && - right_interval <= interval_t::negative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {std::numeric_limits::max() < left_uvalue, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // Interval can only be represented as an svalue. - return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } -} - -static std::vector -assume_signed_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_positive, - const interval_t& left_interval_negative, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(32) && - right_interval <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {left_uvalue >= number_t{std::numeric_limits::max()} + 1, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && - inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { - // Interval can only be represented as an svalue. - return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // We can't directly compare the svalues since they may differ in high order bits. - return {}; - } -} - -static std::vector assume_signed_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, - variable_t left_svalue, variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_positive = interval_t::bottom(); - interval_t left_interval_negative = interval_t::bottom(); - get_signed_intervals(inv, is64, left_svalue, left_uvalue, right_svalue, left_interval, right_interval, - left_interval_positive, left_interval_negative); - - if (op == Condition::Op::EQ) { - // Handle svalue == right. - if (is64) { - return assume_signed_64bit_eq(inv, left_svalue, left_uvalue, right_interval, right_svalue, right_uvalue); - } else { - return assume_signed_32bit_eq(inv, left_svalue, left_uvalue, right_interval); - } - } - - const bool is_lt = op == Condition::Op::SLT || op == Condition::Op::SLE; - bool strict = op == Condition::Op::SLT || op == Condition::Op::SGT; - - auto llb = left_interval.lb(); - auto lub = left_interval.ub(); - auto rlb = right_interval.lb(); - auto rub = right_interval.ub(); - if (!is_lt && (strict ? lub <= rlb : lub < rlb)) { - // Left signed interval is lower than right signed interval. - return {linear_constraint_t::false_const()}; - } else if (is_lt && (strict ? llb >= rub : llb > rub)) { - // Left signed interval is higher than right signed interval. - return {linear_constraint_t::false_const()}; - } - if (is_lt && (strict ? lub < rlb : lub <= rlb)) { - // Left signed interval is lower than right signed interval. - return {linear_constraint_t::true_const()}; - } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { - // Left signed interval is higher than right signed interval. - return {linear_constraint_t::true_const()}; - } - - if (is64) { - if (is_lt) { - return assume_signed_64bit_lt(strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } else { - return assume_signed_64bit_gt(strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } - } else { - // 32-bit compare. - if (is_lt) { - return assume_signed_32bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } else { - return assume_signed_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } - } - return {}; -} - -static std::vector -assume_unsigned_64bit_lt(const NumAbsDomain& inv, bool strict, variable_t left_svalue, variable_t left_uvalue, - const interval_t& left_interval_low, const interval_t& left_interval_high, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - auto rub = right_interval.ub(); - auto lllb = left_interval_low.truncate_to().lb(); - if (right_interval <= interval_t::nonnegative(64) && (strict ? lllb >= rub : lllb > rub)) { - // The high interval is out of range. - if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { - return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), - left_uvalue <= *lsubn, left_svalue >= 0}; - } else { - return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), - left_svalue >= 0}; - } - } - auto lhlb = left_interval_high.truncate_to().lb(); - if (right_interval <= interval_t::unsigned_high(64) && (strict ? lhlb >= rub : lhlb > rub)) { - // The high interval is out of range. - if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { - return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), - left_uvalue <= *lsubn, left_svalue >= 0}; - } else { - return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), - left_svalue >= 0}; - } - } - if (right_interval <= interval_t::signed_int(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - auto llub = left_interval_low.truncate_to().ub(); - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - left_uvalue <= *llub.number(), 0 <= left_svalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (left_interval_low.is_bottom() && right_interval <= interval_t::unsigned_high(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if ((left_interval_low | left_interval_high) == interval_t::unsigned_int(64)) { - // Interval can only be represented as a uvalue, and was TOP before. - return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } -} - -static std::vector assume_unsigned_32bit_lt(const NumAbsDomain& inv, const bool strict, - const variable_t left_svalue, - const variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (inv.eval_interval(left_uvalue) <= interval_t::nonnegative(32) && - inv.eval_interval(right_uvalue) <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT32_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (inv.eval_interval(left_svalue) <= interval_t::negative(32) && - inv.eval_interval(right_svalue) <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT32_MIN, -1]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && - inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // We can't directly compare the uvalues since they may differ in high order bits. - return {}; - } -} - -static std::vector -assume_unsigned_64bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_low, - const interval_t& left_interval_high, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - const auto rlb = right_interval.lb(); - const auto llub = left_interval_low.truncate_to().ub(); - const auto lhlb = left_interval_high.truncate_to().lb(); - - if (right_interval <= interval_t::nonnegative(64) && (strict ? llub <= rlb : llub < rlb)) { - // The low interval is out of range. - return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - *lhlb.number() == std::numeric_limits::max() ? left_uvalue == *lhlb.number() - : left_uvalue >= *lhlb.number(), - left_svalue < 0}; - } else if (right_interval <= interval_t::unsigned_high(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if ((left_interval_low | left_interval_high) <= interval_t::nonnegative(64) && - right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } -} - -static std::vector -assume_unsigned_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_low, - const interval_t& left_interval_high, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::unsigned_high(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && - inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } else { - // We can't directly compare the uvalues since they may differ in high order bits. - return {}; - }; -} - -static std::vector assume_unsigned_cst_interval(const NumAbsDomain& inv, Condition::Op op, - bool is64, variable_t left_svalue, - variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_low = interval_t::bottom(); - interval_t left_interval_high = interval_t::bottom(); - get_unsigned_intervals(inv, is64, left_svalue, left_uvalue, right_uvalue, left_interval, right_interval, - left_interval_low, left_interval_high); - - // Handle uvalue != right. - if (op == Condition::Op::NE) { - if (auto rn = right_interval.singleton()) { - if (rn == left_interval.truncate_to_uint(is64 ? 64 : 32).lb().number()) { - // "NE lower bound" is equivalent to "GT lower bound". - op = Condition::Op::GT; - right_interval = interval_t{left_interval.lb()}; - } else if (rn == left_interval.ub().number()) { - // "NE upper bound" is equivalent to "LT upper bound". - op = Condition::Op::LT; - right_interval = interval_t{left_interval.ub()}; - } else { - return {}; - } - } else { - return {}; - } - } - - const bool is_lt = op == Condition::Op::LT || op == Condition::Op::LE; - bool strict = op == Condition::Op::LT || op == Condition::Op::GT; - - auto [llb, lub] = left_interval.pair(); - auto [rlb, rub] = right_interval.pair(); - if (is_lt ? (strict ? llb >= rub : llb > rub) : (strict ? lub <= rlb : lub < rlb)) { - // Left unsigned interval is lower than right unsigned interval. - return {linear_constraint_t::false_const()}; - } - if (is_lt && (strict ? lub < rlb : lub <= rlb)) { - // Left unsigned interval is lower than right unsigned interval. We still add a - // relationship for use when widening, such as is used in the prime conformance test. - if (is64) { - return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } - return {}; - } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { - // Left unsigned interval is higher than right unsigned interval. We still add a - // relationship for use when widening, such as is used in the prime conformance test. - if (is64) { - return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } else { - return {}; - } - } - - if (is64) { - if (is_lt) { - return assume_unsigned_64bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } else { - return assume_unsigned_64bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } - } else { - if (is_lt) { - return assume_unsigned_32bit_lt(inv, strict, left_svalue, left_uvalue, right_svalue, right_uvalue); - } else { - return assume_unsigned_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } - } -} - -/** Linear constraints for a comparison with a constant. - */ -static std::vector assume_cst_imm(const NumAbsDomain& inv, const Condition::Op op, const bool is64, - const variable_t dst_svalue, const variable_t dst_uvalue, - const int64_t imm) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - switch (op) { - case Op::EQ: - case Op::SGE: - case Op::SLE: - case Op::SGT: - case Op::SLT: - return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, gsl::narrow_cast(imm)); - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, interval_t{imm}); - case Op::NE: - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: - return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, - gsl::narrow_cast(imm)); - } - return {}; -} - -/** Linear constraint for a numerical comparison between registers. - */ -static std::vector assume_cst_reg(const NumAbsDomain& inv, const Condition::Op op, const bool is64, - const variable_t dst_svalue, const variable_t dst_uvalue, - const variable_t src_svalue, const variable_t src_uvalue) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - if (is64) { - switch (op) { - case Op::EQ: { - const interval_t src_interval = inv.eval_interval(src_svalue); - if (!src_interval.is_singleton() && src_interval <= interval_t::nonnegative(64)) { - return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue), eq(dst_svalue, dst_uvalue)}; - } else { - return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue)}; - } - } - case Op::NE: return {neq(dst_svalue, src_svalue)}; - case Op::SGE: return {dst_svalue >= src_svalue}; - case Op::SLE: return {dst_svalue <= src_svalue}; - case Op::SGT: return {dst_svalue > src_svalue}; - // Note: reverse the test as a workaround strange lookup: - case Op::SLT: return {src_svalue > dst_svalue}; - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - } - } else { - switch (op) { - case Op::EQ: - case Op::SGE: - case Op::SLE: - case Op::SGT: - case Op::SLT: return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); - case Op::NE: - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - } - } - assert(false); - throw std::exception(); -} - std::optional ebpf_domain_t::get_type_offset_variable(const Reg& reg, const int type) { reg_pack_t r = reg_pack(reg); switch (type) { @@ -737,7 +41,7 @@ std::optional ebpf_domain_t::get_type_offset_variable(const Reg& reg return get_type_offset_variable(reg, m_inv); } -void ebpf_checker::set_require_check(std::function f) { check_require = std::move(f); } +string_invariant ebpf_domain_t::to_set() const { return this->m_inv.to_set() + this->stack.to_set(); } ebpf_domain_t ebpf_domain_t::top() { ebpf_domain_t abs; @@ -854,2035 +158,130 @@ void ebpf_domain_t::operator+=(const linear_constraint_t& cst) { m_inv += cst; } void ebpf_domain_t::operator-=(const variable_t var) { m_inv -= var; } -void ebpf_transformer::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } -void ebpf_transformer::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } - -void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, - const int finite_width) { - m_inv.apply(op, x, y, k, finite_width); -} - -void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { - std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); -} - -void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { - std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); -} +// Get the start and end of the range of possible map fd values. +// In the future, it would be cleaner to use a set rather than an interval +// for map fds. +bool ebpf_domain_t::get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const { + const interval_t& map_fd_interval = m_inv[reg_pack(map_fd_reg).map_fd]; + const auto lb = map_fd_interval.lb().number(); + const auto ub = map_fd_interval.ub().number(); + if (!lb || !lb->fits() || !ub || !ub->fits()) { + return false; + } + *start_fd = lb->narrow(); + *end_fd = ub->narrow(); -static void havoc_offsets(NumAbsDomain& inv, const Reg& reg) { - const reg_pack_t r = reg_pack(reg); - inv -= r.ctx_offset; - inv -= r.map_fd; - inv -= r.packet_offset; - inv -= r.shared_offset; - inv -= r.shared_region_size; - inv -= r.stack_offset; - inv -= r.stack_numeric_size; -} -static void havoc_register(NumAbsDomain& inv, const Reg& reg) { - const reg_pack_t r = reg_pack(reg); - havoc_offsets(inv, reg); - inv -= r.svalue; - inv -= r.uvalue; + // Cap the maximum range we'll check. + constexpr int max_range = 32; + return *map_fd_interval.finite_size() < max_range; } -void ebpf_transformer::scratch_caller_saved_registers() { - for (int i = R1_ARG; i <= R5_ARG; i++) { - Reg r{gsl::narrow(i)}; - havoc_register(m_inv, r); - type_inv.havoc_type(m_inv, r); +// All maps in the range must have the same type for us to use it. +std::optional ebpf_domain_t::get_map_type(const Reg& map_fd_reg) const { + int32_t start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return std::optional(); } -} -void ebpf_transformer::save_callee_saved_registers(const std::string& prefix) { - // Create variables specific to the new call stack frame that store - // copies of the states of r6 through r9. - for (int r = R6; r <= R9; r++) { - for (const data_kind_t kind : iterate_kinds()) { - const variable_t src_var = variable_t::reg(kind, r); - if (!m_inv[src_var].is_top()) { - assign(variable_t::stack_frame_var(kind, r, prefix), src_var); - } + std::optional type; + for (int32_t map_fd = start_fd; map_fd <= end_fd; map_fd++) { + EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); + if (map == nullptr) { + return std::optional(); } - } -} - -void ebpf_transformer::restore_callee_saved_registers(const std::string& prefix) { - for (int r = R6; r <= R9; r++) { - for (const data_kind_t kind : iterate_kinds()) { - const variable_t src_var = variable_t::stack_frame_var(kind, r, prefix); - if (!m_inv[src_var].is_top()) { - assign(variable_t::reg(kind, r), src_var); - } else { - havoc(variable_t::reg(kind, r)); - } - havoc(src_var); + if (!type.has_value()) { + type = map->type; + } else if (map->type != *type) { + return std::optional(); } } + return type; } -void ebpf_transformer::havoc_subprogram_stack(const std::string& prefix) { - const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; - const auto intv = m_inv.eval_interval(r10_stack_offset); - if (!intv.is_singleton()) { - return; - } - const int64_t stack_start = intv.singleton()->cast_to() - EBPF_SUBPROGRAM_STACK_SIZE; - for (const data_kind_t kind : iterate_kinds()) { - stack.havoc(m_inv, kind, stack_start, EBPF_SUBPROGRAM_STACK_SIZE); +// All maps in the range must have the same inner map fd for us to use it. +std::optional ebpf_domain_t::get_map_inner_map_fd(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return {}; } -} - -void ebpf_transformer::forget_packet_pointers() { - using namespace crab::dsl_syntax; - for (const variable_t type_variable : variable_t::get_type_variables()) { - if (type_inv.has_type(m_inv, type_variable, T_PACKET)) { - havoc(variable_t::kind_var(data_kind_t::types, type_variable)); - havoc(variable_t::kind_var(data_kind_t::packet_offsets, type_variable)); - havoc(variable_t::kind_var(data_kind_t::svalues, type_variable)); - havoc(variable_t::kind_var(data_kind_t::uvalues, type_variable)); + std::optional inner_map_fd; + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); + if (map == nullptr) { + return {}; + } + if (!inner_map_fd.has_value()) { + inner_map_fd = map->inner_map_fd; + } else if (map->type != *inner_map_fd) { + return {}; } } - - initialize_packet(dom); + return inner_map_fd; } -static void overflow_bounds(NumAbsDomain& inv, variable_t lhs, number_t span, int finite_width, bool issigned) { - using namespace crab::dsl_syntax; - auto interval = inv[lhs]; - if (interval.ub() - interval.lb() >= span) { - // Interval covers the full space. - inv -= lhs; - return; - } - if (interval.is_bottom()) { - inv -= lhs; - return; - } - number_t lb_value = interval.lb().number().value(); - number_t ub_value = interval.ub().number().value(); - - // Compute the interval, taking overflow into account. - // For a signed result, we need to ensure the signed and unsigned results match - // so for a 32-bit operation, 0x80000000 should be a positive 64-bit number not - // a sign extended negative one. - number_t lb = lb_value.truncate_to_uint(finite_width); - number_t ub = ub_value.truncate_to_uint(finite_width); - if (issigned) { - lb = lb.truncate_to(); - ub = ub.truncate_to(); - } - if (lb > ub) { - // Range wraps in the middle, so we cannot represent as an unsigned interval. - inv -= lhs; - return; - } - auto new_interval = interval_t{lb, ub}; - if (new_interval != interval) { - // Update the variable, which will lose any relationships to other variables. - inv.set(lhs, new_interval); - } -} - -static void overflow_signed(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { - const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} - : finite_width == 32 ? number_t{std::numeric_limits::max()} - : throw std::exception()}; - overflow_bounds(inv, lhs, span, finite_width, true); -} - -static void overflow_unsigned(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { - const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} - : finite_width == 32 ? number_t{std::numeric_limits::max()} - : throw std::exception()}; - overflow_bounds(inv, lhs, span, finite_width, false); -} -static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const number_t& z, const int finite_width) { - inv.apply(op, xs, y, z, finite_width); - if (finite_width) { - inv.assign(xu, xs); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const number_t& z, const int finite_width) { - inv.apply(op, xu, y, z, finite_width); - if (finite_width) { - inv.assign(xs, xu); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const variable_t z, const int finite_width) { - inv.apply(op, xs, y, z, finite_width); - if (finite_width) { - inv.assign(xu, xs); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const variable_t z, const int finite_width) { - inv.apply(op, xu, y, z, finite_width); - if (finite_width) { - inv.assign(xs, xu); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -void ebpf_transformer::add(const variable_t lhs, const variable_t op2) { - apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); -} -void ebpf_transformer::add(const variable_t lhs, const number_t& op2) { - apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); -} -void ebpf_transformer::sub(const variable_t lhs, const variable_t op2) { - apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); -} -void ebpf_transformer::sub(const variable_t lhs, const number_t& op2) { - apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); -} - -// Add/subtract with overflow are both signed and unsigned. We can use either one of the two to compute the -// result before adjusting for overflow, though if one is top we want to use the other to retain precision. -void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} - -void ebpf_transformer::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, -1, finite_width); -} -void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); -} - -void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - // Use finite width 64 to make the svalue be set as well as the uvalue. - apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { - apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); -} - -static void assume(NumAbsDomain& inv, const linear_constraint_t& cst) { inv += cst; } -void ebpf_transformer::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } - -void ebpf_checker::require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const { - if (check_require) { - check_require(inv, cst, s + " (" + this->current_assertion + ")"); - } - if (thread_local_options.assume_assertions) { - // avoid redundant errors - crab::assume(inv, cst); - } -} - -/// Forget everything we know about the value of a variable. -void ebpf_transformer::havoc(const variable_t v) { m_inv -= v; } -void ebpf_transformer::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } - -void ebpf_transformer::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } - -static linear_constraint_t type_is_pointer(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type >= T_CTX; -} - -static linear_constraint_t type_is_number(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type == T_NUM; -} - -static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } - -static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type != T_STACK; -} - -void ebpf_checker::operator()(const Assertion& assertion) { - if (check_require || thread_local_options.assume_assertions) { - this->current_assertion = to_string(assertion); - std::visit(*this, assertion); - this->current_assertion.clear(); - } -} - -void ebpf_checker::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, - const linear_expression_t& ub) const { - using namespace crab::dsl_syntax; - const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; - const auto interval = inv.eval_interval(r10_stack_offset); - if (interval.is_singleton()) { - const int64_t stack_offset = interval.singleton()->cast_to(); - require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE, - "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE"); - } - require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); -} - -void ebpf_checker::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, - const linear_expression_t& ub) const { - using namespace crab::dsl_syntax; - require(inv, lb >= 0, "Lower bound must be at least 0"); - require(inv, ub <= global_program_info->type.context_descriptor->size, - std::string("Upper bound must be at most ") + - std::to_string(global_program_info->type.context_descriptor->size)); -} - -void ebpf_checker::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const std::optional packet_size) const { - using namespace crab::dsl_syntax; - require(inv, lb >= variable_t::meta_offset(), "Lower bound must be at least meta_offset"); - if (packet_size) { - require(inv, ub <= *packet_size, "Upper bound must be at most packet_size"); - } else { - require(inv, ub <= MAX_PACKET_SIZE, - std::string{"Upper bound must be at most "} + std::to_string(MAX_PACKET_SIZE)); - } -} - -void ebpf_checker::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const variable_t shared_region_size) const { - using namespace crab::dsl_syntax; - require(inv, lb >= 0, "Lower bound must be at least 0"); - require(inv, ub <= shared_region_size, std::string("Upper bound must be at most ") + shared_region_size.name()); -} - -void ebpf_transformer::operator()(const Assume& s) { - const Condition cond = s.cond; - const auto dst = reg_pack(cond.left); - if (const auto psrc_reg = std::get_if(&cond.right)) { - const auto src_reg = *psrc_reg; - const auto src = reg_pack(src_reg); - if (type_inv.same_type(m_inv, cond.left, std::get(cond.right))) { - m_inv = type_inv.join_over_types(m_inv, cond.left, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_NUM) { - for (const linear_constraint_t& cst : - assume_cst_reg(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, src.svalue, src.uvalue)) { - inv += cst; - } - } else { - // Either pointers to a singleton region, - // or an equality comparison on map descriptors/pointers to non-singleton locations - if (const auto dst_offset = dom.get_type_offset_variable(cond.left, type)) { - if (const auto src_offset = dom.get_type_offset_variable(src_reg, type)) { - inv += assume_cst_offsets_reg(cond.op, dst_offset.value(), src_offset.value()); - } - } - } - }); - } else { - // We should only reach here if `--assume-assert` is off - assert(!thread_local_options.assume_assertions || dom.is_bottom()); - // be sound in any case, it happens to flush out bugs: - m_inv.set_to_top(); - } - } else { - const int64_t imm = gsl::narrow_cast(std::get(cond.right).v); - for (const linear_constraint_t& cst : assume_cst_imm(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, imm)) { - assume(cst); - } - } -} - -void ebpf_transformer::operator()(const Undefined& a) {} - -// Simple truncation function usable with swap_endianness(). -template -constexpr T truncate(T x) noexcept { - return x; -} - -void ebpf_transformer::operator()(const Un& stmt) { - const auto dst = reg_pack(stmt.dst); - auto swap_endianness = [&](const variable_t v, auto be_or_le) { - if (m_inv.entail(type_is_number(stmt.dst))) { - if (const auto n = m_inv.eval_interval(v).singleton()) { - if (n->fits_cast_to()) { - m_inv.set(v, interval_t{be_or_le(n->cast_to())}); - return; - } - } - } - havoc(v); - havoc_offsets(stmt.dst); - }; - // Swap bytes if needed. For 64-bit types we need the weights to fit in a - // signed int64, but for smaller types we don't want sign extension, - // so we use unsigned which still fits in a signed int64. - switch (stmt.op) { - case Un::Op::BE16: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::BE32: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::BE64: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } - break; - case Un::Op::LE16: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::LE32: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::LE64: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } - break; - case Un::Op::SWAP16: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::SWAP32: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::SWAP64: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::NEG: - neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); - havoc_offsets(stmt.dst); - break; - } -} - -void ebpf_transformer::operator()(const Exit& a) { - // Clean up any state for the current stack frame. - const std::string prefix = a.stack_frame_prefix; - if (prefix.empty()) { - return; - } - havoc_subprogram_stack(prefix); - restore_callee_saved_registers(prefix); - - // Restore r10. - constexpr Reg r10_reg{R10_STACK_POINTER}; - add(r10_reg, EBPF_SUBPROGRAM_STACK_SIZE, 64); -} - -void ebpf_transformer::operator()(const Jmp&) const { - // This is a NOP. It only exists to hold the jump preconditions. -} - -void ebpf_checker::operator()(const Comparable& s) { - using namespace crab::dsl_syntax; - if (type_inv.same_type(m_inv, s.r1, s.r2)) { - // Same type. If both are numbers, that's okay. Otherwise: - auto inv = m_inv.when(reg_pack(s.r2).type != T_NUM); - // We must check that they belong to a singleton region: - if (!type_inv.is_in_group(inv, s.r1, TypeGroup::singleton_ptr) && - !type_inv.is_in_group(inv, s.r1, TypeGroup::map_fd)) { - require(inv, linear_constraint_t::false_const(), "Cannot subtract pointers to non-singleton regions"); - return; - } - // And, to avoid wraparound errors, they must be within bounds. - this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r1, 0, Imm{0}, false}); - this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r2, 0, Imm{0}, false}); - } else { - // _Maybe_ different types, so r2 must be a number. - // We checked in a previous assertion that r1 is a pointer or a number. - require(m_inv, reg_pack(s.r2).type == T_NUM, "Cannot subtract pointers to different regions"); - }; -} - -void ebpf_checker::operator()(const Addable& s) { - if (!type_inv.implies_type(m_inv, type_is_pointer(reg_pack(s.ptr)), type_is_number(s.num))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be added to pointers"); - } -} - -void ebpf_checker::operator()(const ValidDivisor& s) { - using namespace crab::dsl_syntax; - const auto reg = reg_pack(s.reg); - if (!type_inv.implies_type(m_inv, type_is_pointer(reg), type_is_number(s.reg))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be used as divisors"); - } - if (!thread_local_options.allow_division_by_zero) { - const auto v = s.is_signed ? reg.svalue : reg.uvalue; - require(m_inv, v != 0, "Possible division by zero"); - } -} - -void ebpf_checker::operator()(const ValidStore& s) { - if (!type_inv.implies_type(m_inv, type_is_not_stack(reg_pack(s.mem)), type_is_number(s.val))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be stored to externally-visible regions"); - } -} - -void ebpf_checker::operator()(const TypeConstraint& s) { - if (!type_inv.is_in_group(m_inv, s.reg, s.types)) { - require(m_inv, linear_constraint_t::false_const(), "Invalid type"); - } -} - -void ebpf_checker::operator()(const BoundedLoopCount& s) { - // Enforces an upper bound on loop iterations by checking that the loop counter - // does not exceed the specified limit - using namespace crab::dsl_syntax; - const auto counter = variable_t::loop_counter(to_string(s.name)); - require(m_inv, counter <= s.limit, "Loop counter is too large"); -} - -void ebpf_checker::operator()(const FuncConstraint& s) { - // Look up the helper function id. - const reg_pack_t& reg = reg_pack(s.reg); - const auto src_interval = m_inv.eval_interval(reg.svalue); - if (const auto sn = src_interval.singleton()) { - if (sn->fits()) { - // We can now process it as if the id was immediate. - const int32_t imm = sn->cast_to(); - if (!global_program_info->platform->is_helper_usable(imm)) { - require(m_inv, linear_constraint_t::false_const(), "invalid helper function id " + std::to_string(imm)); - return; - } - Call call = make_call(imm, *global_program_info->platform); - for (const Assertion& assertion : get_assertions(call, *global_program_info, {})) { - (*this)(assertion); - } - return; - } - } - require(m_inv, linear_constraint_t::false_const(), "callx helper function id is not a valid singleton"); -} - -void ebpf_checker::operator()(const ValidSize& s) { - using namespace crab::dsl_syntax; - const auto r = reg_pack(s.reg); - require(m_inv, s.can_be_zero ? r.svalue >= 0 : r.svalue > 0, "Invalid size"); -} - -// Get the start and end of the range of possible map fd values. -// In the future, it would be cleaner to use a set rather than an interval -// for map fds. -bool ebpf_domain_t::get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const { - const interval_t& map_fd_interval = m_inv[reg_pack(map_fd_reg).map_fd]; - const auto lb = map_fd_interval.lb().number(); - const auto ub = map_fd_interval.ub().number(); - if (!lb || !lb->fits() || !ub || !ub->fits()) { - return false; - } - *start_fd = lb->narrow(); - *end_fd = ub->narrow(); - - // Cap the maximum range we'll check. - constexpr int max_range = 32; - return *map_fd_interval.finite_size() < max_range; -} - -// All maps in the range must have the same type for us to use it. -std::optional ebpf_domain_t::get_map_type(const Reg& map_fd_reg) const { - int32_t start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return std::optional(); - } - - std::optional type; - for (int32_t map_fd = start_fd; map_fd <= end_fd; map_fd++) { - EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); - if (map == nullptr) { - return std::optional(); - } - if (!type.has_value()) { - type = map->type; - } else if (map->type != *type) { - return std::optional(); - } - } - return type; -} - -// All maps in the range must have the same inner map fd for us to use it. -std::optional ebpf_domain_t::get_map_inner_map_fd(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return {}; - } - - std::optional inner_map_fd; - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); - if (map == nullptr) { - return {}; - } - if (!inner_map_fd.has_value()) { - inner_map_fd = map->inner_map_fd; - } else if (map->type != *inner_map_fd) { - return {}; - } - } - return inner_map_fd; -} - -// We can deal with a range of key sizes. -interval_t ebpf_domain_t::get_map_key_size(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t{map->key_size}; - } else { - return interval_t::top(); - } - } - return result; -} - -// We can deal with a range of value sizes. -interval_t ebpf_domain_t::get_map_value_size(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t(map->value_size); - } else { - return interval_t::top(); - } - } - return result; -} - -// We can deal with a range of max_entries values. -interval_t ebpf_domain_t::get_map_max_entries(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t(map->max_entries); - } else { - return interval_t::top(); - } - } - return result; -} - -void ebpf_checker::operator()(const ValidCall& s) { - if (!s.stack_frame_prefix.empty()) { - const EbpfHelperPrototype proto = global_program_info->platform->get_helper_prototype(s.func); - if (proto.return_type == EBPF_RETURN_TYPE_INTEGER_OR_NO_RETURN_IF_SUCCEED) { - require(m_inv, linear_constraint_t::false_const(), "tail call not supported in subprogram"); - return; - } - } -} - -void ebpf_checker::operator()(const ValidMapKeyValue& s) { - using namespace crab::dsl_syntax; - - const auto fd_type = dom.get_map_type(s.map_fd_reg); - - const auto access_reg = reg_pack(s.access_reg); - int width; - if (s.key) { - const auto key_size = dom.get_map_key_size(s.map_fd_reg).singleton(); - if (!key_size.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Map key size is not singleton"); - return; - } - width = key_size->narrow(); - } else { - const auto value_size = dom.get_map_value_size(s.map_fd_reg).singleton(); - if (!value_size.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Map value size is not singleton"); - return; - } - width = value_size->narrow(); - } - - m_inv = type_inv.join_over_types(m_inv, s.access_reg, [&](NumAbsDomain& inv, type_encoding_t access_reg_type) { - if (access_reg_type == T_STACK) { - variable_t lb = access_reg.stack_offset; - linear_expression_t ub = lb + width; - if (!stack.all_num(inv, lb, ub)) { - auto lb_is = inv[lb].lb().number(); - std::string lb_s = lb_is && lb_is->fits() ? std::to_string(lb_is->narrow()) : "-oo"; - auto ub_is = inv.eval_interval(ub).ub().number(); - std::string ub_s = ub_is && ub_is->fits() ? std::to_string(ub_is->narrow()) : "oo"; - require(inv, linear_constraint_t::false_const(), - "Illegal map update with a non-numerical value [" + lb_s + "-" + ub_s + ")"); - } else if (thread_local_options.strict && fd_type.has_value()) { - EbpfMapType map_type = global_program_info->platform->get_map_type(*fd_type); - if (map_type.is_array) { - // Get offset value. - variable_t key_ptr = access_reg.stack_offset; - std::optional offset = inv[key_ptr].singleton(); - if (!offset.has_value()) { - require(inv, linear_constraint_t::false_const(), "Pointer must be a singleton"); - } else if (s.key) { - // Look up the value pointed to by the key pointer. - variable_t key_value = - variable_t::cell_var(data_kind_t::svalues, offset.value(), sizeof(uint32_t)); - - if (auto max_entries = dom.get_map_max_entries(s.map_fd_reg).lb().number()) { - require(inv, key_value < *max_entries, "Array index overflow"); - } else { - require(inv, linear_constraint_t::false_const(), "Max entries is not finite"); - } - require(inv, key_value >= 0, "Array index underflow"); - } - } - } - } else if (access_reg_type == T_PACKET) { - variable_t lb = access_reg.packet_offset; - linear_expression_t ub = lb + width; - check_access_packet(inv, lb, ub, {}); - // Packet memory is both readable and writable. - } else if (access_reg_type == T_SHARED) { - variable_t lb = access_reg.shared_offset; - linear_expression_t ub = lb + width; - check_access_shared(inv, lb, ub, access_reg.shared_region_size); - require(inv, access_reg.svalue > 0, "Possible null access"); - // Shared memory is zero-initialized when created so is safe to read and write. - } else { - require(inv, linear_constraint_t::false_const(), "Only stack or packet can be used as a parameter"); - } - }); -} -static std::tuple lb_ub_access_pair(const ValidAccess& s, - const variable_t offset_var) { - using namespace crab::dsl_syntax; - linear_expression_t lb = offset_var + s.offset; - linear_expression_t ub = std::holds_alternative(s.width) ? lb + std::get(s.width).v - : lb + reg_pack(std::get(s.width)).svalue; - return {lb, ub}; -} -void ebpf_checker::operator()(const ValidAccess& s) { - using namespace crab::dsl_syntax; - - const bool is_comparison_check = s.width == Value{Imm{0}}; - - const auto reg = reg_pack(s.reg); - // join_over_types instead of simple iteration is only needed for assume-assert - m_inv = type_inv.join_over_types(m_inv, s.reg, [&](NumAbsDomain& inv, type_encoding_t type) { - switch (type) { - case T_PACKET: { - auto [lb, ub] = lb_ub_access_pair(s, reg.packet_offset); - check_access_packet(inv, lb, ub, - is_comparison_check ? std::optional{} : variable_t::packet_size()); - // if within bounds, it can never be null - // Context memory is both readable and writable. - break; - } - case T_STACK: { - auto [lb, ub] = lb_ub_access_pair(s, reg.stack_offset); - check_access_stack(inv, lb, ub); - // if within bounds, it can never be null - if (s.access_type == AccessType::read) { - // Require that the stack range contains numbers. - if (!stack.all_num(inv, lb, ub)) { - if (s.offset < 0) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } else if (const auto pimm = std::get_if(&s.width)) { - if (!inv.entail(gsl::narrow(pimm->v) <= reg.stack_numeric_size - s.offset)) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } - } else { - if (!inv.entail(reg_pack(std::get(s.width)).svalue <= reg.stack_numeric_size - s.offset)) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } - } - } - } - break; - } - case T_CTX: { - auto [lb, ub] = lb_ub_access_pair(s, reg.ctx_offset); - check_access_context(inv, lb, ub); - // if within bounds, it can never be null - // The context is both readable and writable. - break; - } - case T_SHARED: { - auto [lb, ub] = lb_ub_access_pair(s, reg.shared_offset); - check_access_shared(inv, lb, ub, reg.shared_region_size); - if (!is_comparison_check && !s.or_null) { - require(inv, reg.svalue > 0, "Possible null access"); - } - // Shared memory is zero-initialized when created so is safe to read and write. - break; - } - case T_NUM: - if (!is_comparison_check) { - if (s.or_null) { - require(inv, reg.svalue == 0, "Non-null number"); - } else { - require(inv, linear_constraint_t::false_const(), "Only pointers can be dereferenced"); - } - } - break; - case T_MAP: - case T_MAP_PROGRAMS: - if (!is_comparison_check) { - require(inv, linear_constraint_t::false_const(), "FDs cannot be dereferenced directly"); - } - break; - default: require(inv, linear_constraint_t::false_const(), "Invalid type"); break; - } - }); -} - -void ebpf_checker::operator()(const ZeroCtxOffset& s) { - using namespace crab::dsl_syntax; - const auto reg = reg_pack(s.reg); - require(m_inv, reg.ctx_offset == 0, "Nonzero context offset"); -} - -void ebpf_transformer::operator()(const Packet& a) { - const auto reg = reg_pack(R0_RETURN_VALUE); - constexpr Reg r0_reg{R0_RETURN_VALUE}; - type_inv.assign_type(m_inv, r0_reg, T_NUM); - havoc_offsets(r0_reg); - havoc(reg.svalue); - havoc(reg.uvalue); - scratch_caller_saved_registers(); -} - -void ebpf_transformer::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, - const int width, const Reg& src_reg) { - type_inv.assign_type(inv, target_reg, stack.load(inv, data_kind_t::types, addr, width)); - using namespace crab::dsl_syntax; - if (inv.entail(width <= reg_pack(src_reg).stack_numeric_size)) { - type_inv.assign_type(inv, target_reg, T_NUM); - } - - const reg_pack_t& target = reg_pack(target_reg); - if (width == 1 || width == 2 || width == 4 || width == 8) { - // Use the addr before we havoc the destination register since we might be getting the - // addr from that same register. - const std::optional sresult = stack.load(inv, data_kind_t::svalues, addr, width); - const std::optional uresult = stack.load(inv, data_kind_t::uvalues, addr, width); - havoc_register(inv, target_reg); - inv.assign(target.svalue, sresult); - inv.assign(target.uvalue, uresult); - - if (type_inv.has_type(inv, target.type, T_CTX)) { - inv.assign(target.ctx_offset, stack.load(inv, data_kind_t::ctx_offsets, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_MAP) || type_inv.has_type(inv, target.type, T_MAP_PROGRAMS)) { - inv.assign(target.map_fd, stack.load(inv, data_kind_t::map_fds, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_PACKET)) { - inv.assign(target.packet_offset, stack.load(inv, data_kind_t::packet_offsets, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_SHARED)) { - inv.assign(target.shared_offset, stack.load(inv, data_kind_t::shared_offsets, addr, width)); - inv.assign(target.shared_region_size, stack.load(inv, data_kind_t::shared_region_sizes, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_STACK)) { - inv.assign(target.stack_offset, stack.load(inv, data_kind_t::stack_offsets, addr, width)); - inv.assign(target.stack_numeric_size, stack.load(inv, data_kind_t::stack_numeric_sizes, addr, width)); - } - } else { - havoc_register(inv, target_reg); - } -} - -void ebpf_transformer::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, - const int width) { - using namespace crab::dsl_syntax; - if (inv.is_bottom()) { - return; - } - - const ebpf_context_descriptor_t* desc = global_program_info->type.context_descriptor; - - const reg_pack_t& target = reg_pack(target_reg); - - if (desc->end < 0) { - havoc_register(inv, target_reg); - type_inv.assign_type(inv, target_reg, T_NUM); - return; - } - - const interval_t interval = inv.eval_interval(addr_vague); - const std::optional maybe_addr = interval.singleton(); - havoc_register(inv, target_reg); - - const bool may_touch_ptr = - interval.contains(desc->data) || interval.contains(desc->meta) || interval.contains(desc->end); - - if (!maybe_addr) { - if (may_touch_ptr) { - type_inv.havoc_type(inv, target_reg); - } else { - type_inv.assign_type(inv, target_reg, T_NUM); - } - return; +// We can deal with a range of key sizes. +interval_t ebpf_domain_t::get_map_key_size(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } - const number_t addr = *maybe_addr; - - // We use offsets for packet data, data_end, and meta during verification, - // but at runtime they will be 64-bit pointers. We can use the offset values - // for verification like we use map_fd's as a proxy for maps which - // at runtime are actually 64-bit memory pointers. - const int offset_width = desc->end - desc->data; - if (addr == desc->data) { - if (width == offset_width) { - inv.assign(target.packet_offset, 0); - } - } else if (addr == desc->end) { - if (width == offset_width) { - inv.assign(target.packet_offset, variable_t::packet_size()); - } - } else if (addr == desc->meta) { - if (width == offset_width) { - inv.assign(target.packet_offset, variable_t::meta_offset()); - } - } else { - if (may_touch_ptr) { - type_inv.havoc_type(inv, target_reg); + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t{map->key_size}; } else { - type_inv.assign_type(inv, target_reg, T_NUM); + return interval_t::top(); } - return; - } - if (width == offset_width) { - type_inv.assign_type(inv, target_reg, T_PACKET); - inv += 4098 <= target.svalue; - inv += target.svalue <= PTR_MAX; - } -} - -void ebpf_transformer::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, - const linear_expression_t& addr, const int width) { - if (inv.is_bottom()) { - return; - } - const reg_pack_t& target = reg_pack(target_reg); - - type_inv.assign_type(inv, target_reg, T_NUM); - havoc_register(inv, target_reg); - - // A 1 or 2 byte copy results in a limited range of values that may be used as array indices. - if (width == 1) { - inv.set(target.svalue, interval_t::full()); - inv.set(target.uvalue, interval_t::full()); - } else if (width == 2) { - inv.set(target.svalue, interval_t::full()); - inv.set(target.uvalue, interval_t::full()); - } -} - -void ebpf_transformer::do_load(const Mem& b, const Reg& target_reg) { - using namespace crab::dsl_syntax; - - const auto mem_reg = reg_pack(b.access.basereg); - const int width = b.access.width; - const int offset = b.access.offset; - - if (b.access.basereg.v == R10_STACK_POINTER) { - const linear_expression_t addr = mem_reg.stack_offset + offset; - do_load_stack(m_inv, target_reg, addr, width, b.access.basereg); - return; } - - m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, type_encoding_t type) { - switch (type) { - case T_UNINIT: return; - case T_MAP: return; - case T_MAP_PROGRAMS: return; - case T_NUM: return; - case T_CTX: { - linear_expression_t addr = mem_reg.ctx_offset + offset; - do_load_ctx(inv, target_reg, addr, width); - break; - } - case T_STACK: { - linear_expression_t addr = mem_reg.stack_offset + offset; - do_load_stack(inv, target_reg, addr, width, b.access.basereg); - break; - } - case T_PACKET: { - linear_expression_t addr = mem_reg.packet_offset + offset; - do_load_packet_or_shared(inv, target_reg, addr, width); - break; - } - default: { - linear_expression_t addr = mem_reg.shared_offset + offset; - do_load_packet_or_shared(inv, target_reg, addr, width); - break; - } - } - }); + return result; } -void ebpf_transformer::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, - const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { - { - const std::optional var = stack.store_type(inv, addr, width, val_type); - type_inv.assign_type(inv, var, val_type); - } - if (width == 8) { - inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); - inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_CTX)) { - inv.assign(stack.store(inv, data_kind_t::ctx_offsets, addr, width, opt_val_reg->ctx_offset), - opt_val_reg->ctx_offset); - } else { - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - } - - if (opt_val_reg && - (type_inv.has_type(m_inv, val_type, T_MAP) || type_inv.has_type(m_inv, val_type, T_MAP_PROGRAMS))) { - inv.assign(stack.store(inv, data_kind_t::map_fds, addr, width, opt_val_reg->map_fd), opt_val_reg->map_fd); - } else { - stack.havoc(inv, data_kind_t::map_fds, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_PACKET)) { - inv.assign(stack.store(inv, data_kind_t::packet_offsets, addr, width, opt_val_reg->packet_offset), - opt_val_reg->packet_offset); - } else { - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_SHARED)) { - inv.assign(stack.store(inv, data_kind_t::shared_offsets, addr, width, opt_val_reg->shared_offset), - opt_val_reg->shared_offset); - inv.assign(stack.store(inv, data_kind_t::shared_region_sizes, addr, width, opt_val_reg->shared_region_size), - opt_val_reg->shared_region_size); - } else { - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_STACK)) { - inv.assign(stack.store(inv, data_kind_t::stack_offsets, addr, width, opt_val_reg->stack_offset), - opt_val_reg->stack_offset); - inv.assign(stack.store(inv, data_kind_t::stack_numeric_sizes, addr, width, opt_val_reg->stack_numeric_size), - opt_val_reg->stack_numeric_size); - } else { - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); - } - } else { - if ((width == 1 || width == 2 || width == 4) && type_inv.get_type(m_inv, val_type) == T_NUM) { - // Keep track of numbers on the stack that might be used as array indices. - inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); - inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); - } else { - stack.havoc(inv, data_kind_t::svalues, addr, width); - stack.havoc(inv, data_kind_t::uvalues, addr, width); - } - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - stack.havoc(inv, data_kind_t::map_fds, addr, width); - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); - } - - // Update stack_numeric_size for any stack type variables. - // stack_numeric_size holds the number of continuous bytes starting from stack_offset that are known to be numeric. - auto updated_lb = m_inv.eval_interval(addr).lb(); - auto updated_ub = m_inv.eval_interval(addr).ub() + width; - for (const variable_t type_variable : variable_t::get_type_variables()) { - if (!type_inv.has_type(inv, type_variable, T_STACK)) { - continue; - } - const variable_t stack_offset_variable = variable_t::kind_var(data_kind_t::stack_offsets, type_variable); - const variable_t stack_numeric_size_variable = - variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); - - using namespace crab::dsl_syntax; - // See if the variable's numeric interval overlaps with changed bytes. - if (m_inv.intersect(dsl_syntax::operator<=(addr, stack_offset_variable + stack_numeric_size_variable)) && - m_inv.intersect(operator>=(addr + width, stack_offset_variable))) { - havoc(stack_numeric_size_variable); - recompute_stack_numeric_size(m_inv, type_variable); - } +// We can deal with a range of value sizes. +interval_t ebpf_domain_t::get_map_value_size(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } -} -void ebpf_transformer::operator()(const Mem& b) { - if (m_inv.is_bottom()) { - return; - } - if (const auto preg = std::get_if(&b.value)) { - if (b.is_load) { - do_load(b, *preg); + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t(map->value_size); } else { - const auto data_reg = reg_pack(*preg); - do_mem_store(b, data_reg.type, data_reg.svalue, data_reg.uvalue, data_reg); - } - } else { - const uint64_t imm = std::get(b.value).v; - do_mem_store(b, T_NUM, to_signed(imm), imm, {}); - } -} - -void ebpf_transformer::do_mem_store(const Mem& b, const linear_expression_t& val_type, - const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { - if (m_inv.is_bottom()) { - return; - } - const int width = b.access.width; - const number_t offset{b.access.offset}; - if (b.access.basereg.v == R10_STACK_POINTER) { - const auto r10_stack_offset = reg_pack(b.access.basereg).stack_offset; - const auto r10_interval = m_inv.eval_interval(r10_stack_offset); - if (r10_interval.is_singleton()) { - const int32_t stack_offset = r10_interval.singleton()->cast_to(); - const number_t base_addr{stack_offset}; - do_store_stack(m_inv, base_addr + offset, width, val_type, val_svalue, val_uvalue, opt_val_reg); - } - return; - } - m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_STACK) { - const auto base_addr = linear_expression_t(dom.get_type_offset_variable(b.access.basereg, type).value()); - do_store_stack(inv, dsl_syntax::operator+(base_addr, offset), width, val_type, val_svalue, val_uvalue, - opt_val_reg); - } - // do nothing for any other type - }); -} - -// Construct a Bin operation that does the main operation that a given Atomic operation does atomically. -static Bin atomic_to_bin(const Atomic& a) { - Bin bin{.dst = Reg{R11_ATOMIC_SCRATCH}, .v = a.valreg, .is64 = a.access.width == sizeof(uint64_t), .lddw = false}; - switch (a.op) { - case Atomic::Op::ADD: bin.op = Bin::Op::ADD; break; - case Atomic::Op::OR: bin.op = Bin::Op::OR; break; - case Atomic::Op::AND: bin.op = Bin::Op::AND; break; - case Atomic::Op::XOR: bin.op = Bin::Op::XOR; break; - case Atomic::Op::XCHG: - case Atomic::Op::CMPXCHG: bin.op = Bin::Op::MOV; break; - default: throw std::exception(); - } - return bin; -} - -void ebpf_transformer::operator()(const Atomic& a) { - if (m_inv.is_bottom()) { - return; - } - if (!m_inv.entail(type_is_pointer(reg_pack(a.access.basereg))) || - !m_inv.entail(type_is_number(reg_pack(a.valreg)))) { - return; - } - if (m_inv.entail(type_is_not_stack(reg_pack(a.access.basereg)))) { - // Shared memory regions are volatile so we can just havoc - // any register that will be updated. - if (a.op == Atomic::Op::CMPXCHG) { - havoc_register(m_inv, Reg{R0_RETURN_VALUE}); - } else if (a.fetch) { - havoc_register(m_inv, a.valreg); - } - return; - } - - // Fetch the current value into the R11 pseudo-register. - constexpr Reg r11{R11_ATOMIC_SCRATCH}; - (*this)(Mem{.access = a.access, .value = r11, .is_load = true}); - - // Compute the new value in R11. - (*this)(atomic_to_bin(a)); - - if (a.op == Atomic::Op::CMPXCHG) { - // For CMPXCHG, store the original value in r0. - (*this)(Mem{.access = a.access, .value = Reg{R0_RETURN_VALUE}, .is_load = true}); - - // For the destination, there are 3 possibilities: - // 1) dst.value == r0.value : set R11 to valreg - // 2) dst.value != r0.value : don't modify R11 - // 3) dst.value may or may not == r0.value : set R11 to the union of R11 and valreg - // For now we just havoc the value of R11. - havoc_register(m_inv, r11); - } else if (a.fetch) { - // For other FETCH operations, store the original value in the src register. - (*this)(Mem{.access = a.access, .value = a.valreg, .is_load = true}); - } - - // Store the new value back in the original shared memory location. - // Note that do_mem_store() currently doesn't track shared memory values, - // but stack memory values are tracked and are legal here. - (*this)(Mem{.access = a.access, .value = r11, .is_load = false}); - - // Clear the R11 pseudo-register. - havoc_register(m_inv, r11); - type_inv.havoc_type(m_inv, r11); -} - -void ebpf_transformer::operator()(const Call& call) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - std::optional maybe_fd_reg{}; - for (ArgSingle param : call.singles) { - switch (param.kind) { - case ArgSingle::Kind::MAP_FD: maybe_fd_reg = param.reg; break; - case ArgSingle::Kind::ANYTHING: - case ArgSingle::Kind::MAP_FD_PROGRAMS: - case ArgSingle::Kind::PTR_TO_MAP_KEY: - case ArgSingle::Kind::PTR_TO_MAP_VALUE: - case ArgSingle::Kind::PTR_TO_CTX: - // Do nothing. We don't track the content of relevant memory regions - break; - } - } - for (ArgPair param : call.pairs) { - switch (param.kind) { - case ArgPair::Kind::PTR_TO_READABLE_MEM_OR_NULL: - case ArgPair::Kind::PTR_TO_READABLE_MEM: - // Do nothing. No side effect allowed. - break; - - case ArgPair::Kind::PTR_TO_WRITABLE_MEM: { - bool store_numbers = true; - auto variable = dom.get_type_offset_variable(param.mem); - if (!variable.has_value()) { - // checked by the checker - break; - } - variable_t addr = variable.value(); - variable_t width = reg_pack(param.size).svalue; - - m_inv = type_inv.join_over_types(m_inv, param.mem, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_STACK) { - // Pointer to a memory region that the called function may change, - // so we must havoc. - stack.havoc(inv, data_kind_t::types, addr, width); - stack.havoc(inv, data_kind_t::svalues, addr, width); - stack.havoc(inv, data_kind_t::uvalues, addr, width); - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - stack.havoc(inv, data_kind_t::map_fds, addr, width); - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - } else { - store_numbers = false; - } - }); - if (store_numbers) { - // Functions are not allowed to write sensitive data, - // and initialization is guaranteed - stack.store_numbers(m_inv, addr, width); - } - } - } - } - - constexpr Reg r0_reg{R0_RETURN_VALUE}; - const auto r0_pack = reg_pack(r0_reg); - havoc(r0_pack.stack_numeric_size); - if (call.is_map_lookup) { - // This is the only way to get a null pointer - if (maybe_fd_reg) { - if (const auto map_type = dom.get_map_type(*maybe_fd_reg)) { - if (global_program_info->platform->get_map_type(*map_type).value_type == EbpfMapValueType::MAP) { - if (const auto inner_map_fd = dom.get_map_inner_map_fd(*maybe_fd_reg)) { - do_load_mapfd(r0_reg, to_signed(*inner_map_fd), true); - goto out; - } - } else { - assign_valid_ptr(r0_reg, true); - assign(r0_pack.shared_offset, 0); - m_inv.set(r0_pack.shared_region_size, dom.get_map_value_size(*maybe_fd_reg)); - type_inv.assign_type(m_inv, r0_reg, T_SHARED); - } - } - } - assign_valid_ptr(r0_reg, true); - assign(r0_pack.shared_offset, 0); - type_inv.assign_type(m_inv, r0_reg, T_SHARED); - } else { - havoc(r0_pack.svalue); - havoc(r0_pack.uvalue); - havoc_offsets(r0_reg); - type_inv.assign_type(m_inv, r0_reg, T_NUM); - // assume(r0_pack.value < 0); for INTEGER_OR_NO_RETURN_IF_SUCCEED. - } -out: - scratch_caller_saved_registers(); - if (call.reallocate_packet) { - forget_packet_pointers(); - } -} - -void ebpf_transformer::operator()(const CallLocal& call) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - save_callee_saved_registers(call.stack_frame_prefix); - - // Update r10. - constexpr Reg r10_reg{R10_STACK_POINTER}; - add(r10_reg, -EBPF_SUBPROGRAM_STACK_SIZE, 64); -} - -void ebpf_transformer::operator()(const Callx& callx) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - - // Look up the helper function id. - const reg_pack_t& reg = reg_pack(callx.func); - const auto src_interval = m_inv.eval_interval(reg.svalue); - if (const auto sn = src_interval.singleton()) { - if (sn->fits()) { - // We can now process it as if the id was immediate. - const int32_t imm = sn->cast_to(); - if (!global_program_info->platform->is_helper_usable(imm)) { - return; - } - const Call call = make_call(imm, *global_program_info->platform); - (*this)(call); - } - } -} - -void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { - const EbpfMapDescriptor& desc = global_program_info->platform->get_map_descriptor(mapfd); - const EbpfMapType& type = global_program_info->platform->get_map_type(desc.type); - if (type.value_type == EbpfMapValueType::PROGRAM) { - type_inv.assign_type(m_inv, dst_reg, T_MAP_PROGRAMS); - } else { - type_inv.assign_type(m_inv, dst_reg, T_MAP); - } - const reg_pack_t& dst = reg_pack(dst_reg); - assign(dst.map_fd, mapfd); - assign_valid_ptr(dst_reg, maybe_null); -} - -void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } - -void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { - using namespace crab::dsl_syntax; - const reg_pack_t& reg = reg_pack(dst_reg); - havoc(reg.svalue); - havoc(reg.uvalue); - if (maybe_null) { - m_inv += 0 <= reg.svalue; - } else { - m_inv += 0 < reg.svalue; - } - m_inv += reg.svalue <= PTR_MAX; - assign(reg.uvalue, reg.svalue); -} - -// If nothing is known of the stack_numeric_size, -// try to recompute the stack_numeric_size. -void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { - const variable_t stack_numeric_size_variable = - variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); - - if (!inv.eval_interval(stack_numeric_size_variable).is_top()) { - return; - } - - if (type_inv.has_type(inv, type_variable, T_STACK)) { - const int numeric_size = - stack.min_all_num_size(inv, variable_t::kind_var(data_kind_t::stack_offsets, type_variable)); - if (numeric_size > 0) { - inv.assign(stack_numeric_size_variable, numeric_size); - } - } -} - -void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { - recompute_stack_numeric_size(inv, reg_pack(reg).type); -} - -void ebpf_transformer::add(const Reg& reg, const int imm, const int finite_width) { - const auto dst = reg_pack(reg); - const auto offset = dom.get_type_offset_variable(reg); - add_overflow(dst.svalue, dst.uvalue, imm, finite_width); - if (offset.has_value()) { - add(offset.value(), imm); - if (imm > 0) { - // Since the start offset is increasing but - // the end offset is not, the numeric size decreases. - sub(dst.stack_numeric_size, imm); - } else if (imm < 0) { - havoc(dst.stack_numeric_size); - } - recompute_stack_numeric_size(m_inv, reg); - } -} - -void ebpf_transformer::shl(const Reg& dst_reg, int imm, const int finite_width) { - const reg_pack_t dst = reg_pack(dst_reg); - - // The BPF ISA requires masking the imm. - imm &= finite_width - 1; - - if (m_inv.entail(type_is_number(dst))) { - const auto interval = m_inv.eval_interval(dst.uvalue); - if (interval.finite_size()) { - const number_t lb = interval.lb().number().value(); - const number_t ub = interval.ub().number().value(); - uint64_t lb_n = lb.cast_to(); - uint64_t ub_n = ub.cast_to(); - const uint64_t uint_max = finite_width == 64 ? uint64_t{std::numeric_limits::max()} - : uint64_t{std::numeric_limits::max()}; - if (lb_n >> (finite_width - imm) != ub_n >> (finite_width - imm)) { - // The bits that will be shifted out to the left are different, - // which means all combinations of remaining bits are possible. - lb_n = 0; - ub_n = uint_max << imm & uint_max; - } else { - // The bits that will be shifted out to the left are identical - // for all values in the interval, so we can safely shift left - // to get a new interval. - lb_n = lb_n << imm & uint_max; - ub_n = ub_n << imm & uint_max; - } - m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.svalue, dst.uvalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; - } - } - shl_overflow(dst.svalue, dst.uvalue, imm); - havoc_offsets(dst_reg); -} - -void ebpf_transformer::lshr(const Reg& dst_reg, int imm, int finite_width) { - reg_pack_t dst = reg_pack(dst_reg); - - // The BPF ISA requires masking the imm. - imm &= finite_width - 1; - - if (m_inv.entail(type_is_number(dst))) { - auto interval = m_inv.eval_interval(dst.uvalue); - number_t lb_n{0}; - number_t ub_n{std::numeric_limits::max() >> imm}; - if (interval.finite_size()) { - number_t lb = interval.lb().number().value(); - number_t ub = interval.ub().number().value(); - if (finite_width == 64) { - lb_n = lb.cast_to() >> imm; - ub_n = ub.cast_to() >> imm; - } else { - number_t lb_w = lb.cast_to_sint(finite_width); - number_t ub_w = ub.cast_to_sint(finite_width); - lb_n = lb_w.cast_to() >> imm; - ub_n = ub_w.cast_to() >> imm; - - // The interval must be valid since a signed range crossing 0 - // was earlier converted to a full unsigned range. - assert(lb_n <= ub_n); - } + return interval_t::top(); } - m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.svalue, dst.uvalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(dst_reg); -} - -static int _movsx_bits(const Bin::Op op) { - switch (op) { - case Bin::Op::MOVSX8: return 8; - case Bin::Op::MOVSX16: return 16; - case Bin::Op::MOVSX32: return 32; - default: throw std::exception(); } + return result; } -void ebpf_transformer::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, - const Bin::Op op) { - using namespace crab; - - const int bits = _movsx_bits(op); - const reg_pack_t dst = reg_pack(dst_reg); - interval_t right_interval = m_inv.eval_interval(right_svalue); - type_inv.assign_type(m_inv, dst_reg, T_NUM); - havoc_offsets(dst_reg); - const int64_t span = 1ULL << bits; - if (right_interval.ub() - right_interval.lb() >= span) { - // Interval covers the full space. - if (bits == 64) { - havoc(dst.svalue); - return; - } - right_interval = interval_t::signed_int(bits); - } - const int64_t mask = 1ULL << (bits - 1); - - // Sign extend each bound. - int64_t lb = right_interval.lb().number().value().cast_to(); - lb &= span - 1; - lb = (lb ^ mask) - mask; - int64_t ub = right_interval.ub().number().value().cast_to(); - ub &= span - 1; - ub = (ub ^ mask) - mask; - m_inv.set(dst.svalue, interval_t{lb, ub}); - - if (finite_width) { - m_inv.assign(dst.uvalue, dst.svalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); +// We can deal with a range of max_entries values. +interval_t ebpf_domain_t::get_map_max_entries(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } -} - -void ebpf_transformer::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { - using namespace crab; - reg_pack_t dst = reg_pack(dst_reg); - if (m_inv.entail(type_is_number(dst))) { - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_positive = interval_t::bottom(); - interval_t left_interval_negative = interval_t::bottom(); - get_signed_intervals(m_inv, finite_width == 64, dst.svalue, dst.uvalue, right_svalue, left_interval, - right_interval, left_interval_positive, left_interval_negative); - if (auto sn = right_interval.singleton()) { - // The BPF ISA requires masking the imm. - int64_t imm = sn->cast_to() & (finite_width - 1); - - int64_t lb_n = std::numeric_limits::min() >> imm; - int64_t ub_n = std::numeric_limits::max() >> imm; - if (left_interval.finite_size()) { - const auto [lb, ub] = left_interval.pair_number(); - if (finite_width == 64) { - lb_n = lb.cast_to() >> imm; - ub_n = ub.cast_to() >> imm; - } else { - number_t lb_w = lb.cast_to_sint(finite_width) >> gsl::narrow(imm); - number_t ub_w = ub.cast_to_sint(finite_width) >> gsl::narrow(imm); - if (lb_w.cast_to() <= ub_w.cast_to()) { - lb_n = lb_w.cast_to(); - ub_n = ub_w.cast_to(); - } - } - } - m_inv.set(dst.svalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.uvalue, dst.svalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t(map->max_entries); + } else { + return interval_t::top(); } } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(dst_reg); -} - -static void apply(NumAbsDomain& inv, const binop_t& op, const variable_t x, const variable_t y, const variable_t z) { - inv.apply(op, x, y, z, 0); + return result; } -void ebpf_transformer::operator()(const Bin& bin) { - using namespace crab::dsl_syntax; - - auto dst = reg_pack(bin.dst); - int finite_width = bin.is64 ? 64 : 32; - - if (auto pimm = std::get_if(&bin.v)) { - // dst += K - int64_t imm; - if (bin.is64) { - // Use the full signed value. - imm = to_signed(pimm->v); - } else { - // Use only the low 32 bits of the value. - imm = gsl::narrow_cast(pimm->v); - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - // If this is a 32-bit operation and the destination is not a number, forget everything about the register. - if (!type_inv.has_type(m_inv, bin.dst, T_NUM)) { - havoc_register(m_inv, bin.dst); - havoc_offsets(bin.dst); - havoc(dst.type); - } - } - switch (bin.op) { - case Bin::Op::MOV: - assign(dst.svalue, imm); - assign(dst.uvalue, imm); - overflow_unsigned(m_inv, dst.uvalue, bin.is64 ? 64 : 32); - type_inv.assign_type(m_inv, bin.dst, T_NUM); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOVSX8: - case Bin::Op::MOVSX16: - case Bin::Op::MOVSX32: CRAB_ERROR("Unsupported operation"); - case Bin::Op::ADD: - if (imm == 0) { - return; - } - add(bin.dst, gsl::narrow(imm), finite_width); - break; - case Bin::Op::SUB: - if (imm == 0) { - return; - } - add(bin.dst, gsl::narrow(-imm), finite_width); - break; - case Bin::Op::MUL: - mul(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UDIV: - udiv(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UMOD: - urem(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SDIV: - sdiv(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SMOD: - srem(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::OR: - bitwise_or(dst.svalue, dst.uvalue, imm); - havoc_offsets(bin.dst); - break; - case Bin::Op::AND: - // FIX: what to do with ptr&-8 as in counter/simple_loop_unrolled? - bitwise_and(dst.svalue, dst.uvalue, imm); - if (gsl::narrow(imm) > 0) { - // AND with immediate is only a 32-bit operation so svalue and uvalue are the same. - assume(dst.svalue <= imm); - assume(dst.uvalue <= imm); - assume(0 <= dst.svalue); - assume(0 <= dst.uvalue); - } - havoc_offsets(bin.dst); - break; - case Bin::Op::LSH: shl(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::RSH: lshr(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::ARSH: ashr(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::XOR: - bitwise_xor(dst.svalue, dst.uvalue, imm); - havoc_offsets(bin.dst); - break; - } - } else { - // dst op= src - auto src_reg = std::get(bin.v); - auto src = reg_pack(src_reg); - switch (bin.op) { - case Bin::Op::ADD: { - if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { - // both must be numbers - add_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); - } else { - // Here we're not sure that lhs and rhs are the same type; they might be. - // But previous assertions should fail unless we know that exactly one of lhs or rhs is a pointer. - m_inv = - type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t dst_type) { - inv = type_inv.join_over_types( - inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t src_type) { - if (dst_type == T_NUM && src_type != T_NUM) { - // num += ptr - type_inv.assign_type(inv, bin.dst, src_type); - if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, src_type)) { - crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst.svalue, - dom.get_type_offset_variable(src_reg, src_type).value()); - } - if (src_type == T_SHARED) { - inv.assign(dst.shared_region_size, src.shared_region_size); - } - } else if (dst_type != T_NUM && src_type == T_NUM) { - // ptr += num - type_inv.assign_type(inv, bin.dst, dst_type); - if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, dst_type)) { - crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst_offset.value(), - src.svalue); - if (dst_type == T_STACK) { - // Reduce the numeric size. - using namespace crab::dsl_syntax; - if (m_inv.intersect(src.svalue < 0)) { - inv -= dst.stack_numeric_size; - recompute_stack_numeric_size(inv, dst.type); - } else { - apply_signed(inv, arith_binop_t::SUB, dst.stack_numeric_size, - dst.stack_numeric_size, dst.stack_numeric_size, src.svalue, - 0); - } - } - } - } else if (dst_type == T_NUM && src_type == T_NUM) { - // dst and src don't necessarily have the same type, but among the possibilities - // enumerated is the case where they are both numbers. - apply_signed(inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, - src.svalue, finite_width); - } else { - // We ignore the cases here that do not match the assumption described - // above. Joining bottom with another results will leave the other - // results unchanged. - inv.set_to_bottom(); - } - }); - }); - // careful: change dst.value only after dealing with offset - apply_signed(m_inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, src.svalue, finite_width); - } - break; - } - case Bin::Op::SUB: { - if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { - // src and dest have the same type. - m_inv = type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t type) { - switch (type) { - case T_NUM: - // This is: sub_overflow(inv, dst.value, src.value, finite_width); - apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst.svalue, src.svalue, - finite_width); - type_inv.assign_type(inv, bin.dst, T_NUM); - crab::havoc_offsets(inv, bin.dst); - break; - default: - // ptr -= ptr - // Assertions should make sure we only perform this on non-shared pointers. - if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, type)) { - apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst_offset.value(), - dom.get_type_offset_variable(src_reg, type).value(), finite_width); - inv -= dst_offset.value(); - } - crab::havoc_offsets(inv, bin.dst); - type_inv.assign_type(inv, bin.dst, T_NUM); - break; - } - }); - } else { - // We're not sure that lhs and rhs are the same type. - // Either they're different, or at least one is not a singleton. - if (type_inv.get_type(m_inv, std::get(bin.v)) != T_NUM) { - type_inv.havoc_type(m_inv, bin.dst); - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - } else { - sub_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); - if (auto dst_offset = dom.get_type_offset_variable(bin.dst)) { - sub(dst_offset.value(), src.svalue); - if (type_inv.has_type(m_inv, dst.type, T_STACK)) { - // Reduce the numeric size. - using namespace crab::dsl_syntax; - if (m_inv.intersect(src.svalue > 0)) { - m_inv -= dst.stack_numeric_size; - recompute_stack_numeric_size(m_inv, dst.type); - } else { - crab::apply(m_inv, arith_binop_t::ADD, dst.stack_numeric_size, dst.stack_numeric_size, - src.svalue); - } - } - } - } - } - break; - } - case Bin::Op::MUL: - mul(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UDIV: - udiv(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UMOD: - urem(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SDIV: - sdiv(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SMOD: - srem(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::OR: - bitwise_or(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::AND: - bitwise_and(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::LSH: - if (m_inv.entail(type_is_number(src_reg))) { - auto src_interval = m_inv.eval_interval(src.uvalue); - if (std::optional sn = src_interval.singleton()) { - // truncate to uint64? - uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); - if (imm <= std::numeric_limits::max()) { - if (!bin.is64) { - // Use only the low 32 bits of the value. - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - } - shl(bin.dst, gsl::narrow_cast(imm), finite_width); - break; - } - } - } - shl_overflow(dst.svalue, dst.uvalue, src.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::RSH: - if (m_inv.entail(type_is_number(src_reg))) { - auto src_interval = m_inv.eval_interval(src.uvalue); - if (std::optional sn = src_interval.singleton()) { - uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); - if (imm <= std::numeric_limits::max()) { - if (!bin.is64) { - // Use only the low 32 bits of the value. - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - } - lshr(bin.dst, gsl::narrow_cast(imm), finite_width); - break; - } - } - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::ARSH: - if (m_inv.entail(type_is_number(src_reg))) { - ashr(bin.dst, src.svalue, finite_width); - break; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::XOR: - bitwise_xor(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOVSX8: - case Bin::Op::MOVSX16: - case Bin::Op::MOVSX32: - // Keep relational information if operation is a no-op. - if (dst.svalue == src.svalue && - m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(_movsx_bits(bin.op))) { - return; - } - if (m_inv.entail(type_is_number(src_reg))) { - sign_extend(bin.dst, src.svalue, finite_width, bin.op); - break; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOV: - // Keep relational information if operation is a no-op. - if (dst.svalue == src.svalue && - m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(bin.is64 ? 64 : 32)) { - return; - } - assign(dst.svalue, src.svalue); - assign(dst.uvalue, src.uvalue); - havoc_offsets(bin.dst); - m_inv = type_inv.join_over_types(m_inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t type) { - switch (type) { - case T_CTX: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.ctx_offset, src.ctx_offset); - } - break; - case T_MAP: - case T_MAP_PROGRAMS: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.map_fd, src.map_fd); - } - break; - case T_PACKET: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.packet_offset, src.packet_offset); - } - break; - case T_SHARED: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.shared_region_size, src.shared_region_size); - inv.assign(dst.shared_offset, src.shared_offset); - } - break; - case T_STACK: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.stack_offset, src.stack_offset); - inv.assign(dst.stack_numeric_size, src.stack_numeric_size); - } - break; - default: inv.assign(dst.type, type); break; - } - }); - if (bin.is64) { - // Add dst.type=src.type invariant. - if (bin.dst.v != std::get(bin.v).v || type_inv.get_type(m_inv, dst.type) == T_UNINIT) { - // Only forget the destination type if we're copying from a different register, - // or from the same uninitialized register. - havoc(dst.type); - } - type_inv.assign_type(m_inv, bin.dst, std::get(bin.v)); - } - break; - } - } - if (!bin.is64) { - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); +extended_number ebpf_domain_t::get_loop_count_upper_bound() const { + extended_number ub{0}; + for (const variable_t counter : variable_t::get_loop_counters()) { + ub = std::max(ub, m_inv[counter].ub()); } + return ub; } -string_invariant ebpf_domain_t::to_set() const { return this->m_inv.to_set() + this->stack.to_set(); } - std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom) { if (dom.is_bottom()) { o << "_|_"; @@ -2892,9 +291,9 @@ std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom) { return o; } -void ebpf_transformer::initialize_packet(ebpf_domain_t& inv) { +void ebpf_domain_t::initialize_packet() { using namespace crab::dsl_syntax; - + ebpf_domain_t& inv = *this; inv -= variable_t::packet_size(); inv -= variable_t::meta_offset(); @@ -2905,14 +304,14 @@ void ebpf_transformer::initialize_packet(ebpf_domain_t& inv) { inv += variable_t::meta_offset() <= 0; inv += variable_t::meta_offset() >= -4098; } else { - ebpf_transformer{inv}.assign(variable_t::meta_offset(), 0); + inv.m_inv.assign(variable_t::meta_offset(), 0); } } ebpf_domain_t ebpf_domain_t::from_constraints(const std::set& constraints, const bool setup_constraints) { ebpf_domain_t inv; if (setup_constraints) { - inv = ebpf_transformer::setup_entry(false); + inv = setup_entry(false); } auto numeric_ranges = std::vector(); for (const auto& cst : parse_linear_constraints(constraints, numeric_ranges)) { @@ -2927,7 +326,7 @@ ebpf_domain_t ebpf_domain_t::from_constraints(const std::set& const return inv; } -ebpf_domain_t ebpf_transformer::setup_entry(const bool init_r1) { +ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { using namespace crab::dsl_syntax; ebpf_domain_t inv; @@ -2949,24 +348,8 @@ ebpf_domain_t ebpf_transformer::setup_entry(const bool init_r1) { inv.type_inv.assign_type(inv.m_inv, r1_reg, T_CTX); } - initialize_packet(inv); + inv.initialize_packet(); return inv; } -void ebpf_transformer::initialize_loop_counter(const label_t& label) { - m_inv.assign(variable_t::loop_counter(to_string(label)), 0); -} - -extended_number ebpf_domain_t::get_loop_count_upper_bound() const { - extended_number ub{0}; - for (const variable_t counter : variable_t::get_loop_counters()) { - ub = std::max(ub, m_inv[counter].ub()); - } - return ub; -} - -void ebpf_transformer::operator()(const IncrementLoopCounter& ins) { - const auto counter = variable_t::loop_counter(to_string(ins.name)); - this->add(counter, 1); -} } // namespace crab diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index 880830a93..001dcfaf3 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -15,6 +15,24 @@ namespace crab { +// Pointers in the BPF VM are defined to be 64 bits. Some contexts, like +// data, data_end, and meta in Linux's struct xdp_md are only 32 bit offsets +// from a base address not exposed to the program, but when a program is loaded, +// the offsets get replaced with 64-bit address pointers. However, we currently +// need to do pointer arithmetic on 64-bit numbers so for now we cap the interval +// to 32 bits. +constexpr int MAX_PACKET_SIZE = 0xffff; +constexpr int64_t PTR_MAX = std::numeric_limits::max() - MAX_PACKET_SIZE; + +class ebpf_domain_t; + +void ebpf_domain_transform(ebpf_domain_t& inv, const Instruction& ins); +void ebpf_domain_assume(ebpf_domain_t& dom, const Assertion& assertion); +std::vector ebpf_domain_check(ebpf_domain_t& dom, const label_t& label, const Assertion& assertion); + +// TODO: make this an explicit instruction +void ebpf_domain_initialize_loop_counter(ebpf_domain_t& dom, const label_t& label); + class ebpf_domain_t final { friend class ebpf_checker; friend class ebpf_transformer; @@ -49,7 +67,10 @@ class ebpf_domain_t final { static ebpf_domain_t calculate_constant_limits(); extended_number get_loop_count_upper_bound() const; + static ebpf_domain_t setup_entry(bool init_r1); static ebpf_domain_t from_constraints(const std::set& constraints, bool setup_constraints); + void initialize_packet(); + string_invariant to_set() const; private: @@ -89,157 +110,4 @@ class ebpf_domain_t final { TypeDomain type_inv; }; -class ebpf_checker final { - ebpf_domain_t& dom; - // shorthands: - NumAbsDomain& m_inv; - domains::array_domain_t& stack; - TypeDomain& type_inv; - - public: - explicit ebpf_checker(ebpf_domain_t& dom) : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} - - void operator()(const Assertion&); - - void operator()(const Addable&); - void operator()(const Comparable&); - void operator()(const FuncConstraint&); - void operator()(const ValidDivisor&); - void operator()(const TypeConstraint&); - void operator()(const ValidAccess&); - void operator()(const ValidCall&); - void operator()(const ValidMapKeyValue&); - void operator()(const ValidSize&); - void operator()(const ValidStore&); - void operator()(const ZeroCtxOffset&); - void operator()(const BoundedLoopCount&); - - typedef bool check_require_func_t(NumAbsDomain&, const linear_constraint_t&, std::string); - void set_require_check(std::function f); - - private: - // memory check / load / store - void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; - void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; - void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - std::optional packet_size) const; - void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - variable_t shared_region_size) const; - std::function check_require{}; - std::string current_assertion; - void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const; -}; - -class ebpf_transformer final { - ebpf_domain_t& dom; - // shorthands: - NumAbsDomain& m_inv; - domains::array_domain_t& stack; - TypeDomain& type_inv; - - public: - explicit ebpf_transformer(ebpf_domain_t& dom) - : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} - - // abstract transformers - void operator()(const Assume&); - void operator()(const Bin&); - void operator()(const Call&); - void operator()(const CallLocal&); - void operator()(const Callx&); - void operator()(const Exit&); - void operator()(const Jmp&) const; - void operator()(const LoadMapFd&); - void operator()(const Atomic&); - void operator()(const Mem&); - void operator()(const Packet&); - void operator()(const Un&); - void operator()(const Undefined&); - void operator()(const IncrementLoopCounter&); - - void initialize_loop_counter(const label_t& label); - - static ebpf_domain_t setup_entry(bool init_r1); - - private: - void assign(variable_t lhs, variable_t rhs); - void assign(variable_t x, const linear_expression_t& e); - void assign(variable_t x, int64_t e); - - void apply(arith_binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); - void apply(arith_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - void apply(bitwise_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - void apply(bitwise_binop_t op, variable_t x, variable_t y, const number_t& k, int finite_width); - void apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); - void apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - - void add(const Reg& reg, int imm, int finite_width); - void add(variable_t lhs, variable_t op2); - void add(variable_t lhs, const number_t& op2); - void sub(variable_t lhs, variable_t op2); - void sub(variable_t lhs, const number_t& op2); - void add_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void add_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void sub_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void sub_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void neg(variable_t lhss, variable_t lhsu, int finite_width); - void mul(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void mul(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void sdiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void sdiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void udiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void udiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void srem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void srem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void urem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void urem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - - void bitwise_and(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_and(variable_t lhss, variable_t lhsu, const number_t& op2); - void bitwise_or(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_or(variable_t lhss, variable_t lhsu, const number_t& op2); - void bitwise_xor(variable_t lhsss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_xor(variable_t lhss, variable_t lhsu, const number_t& op2); - void shl(const Reg& reg, int imm, int finite_width); - void shl_overflow(variable_t lhss, variable_t lhsu, variable_t op2); - void shl_overflow(variable_t lhss, variable_t lhsu, const number_t& op2); - void lshr(const Reg& reg, int imm, int finite_width); - void ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width); - void sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, Bin::Op op); - - void assume(const linear_constraint_t& cst); - - /// Forget everything we know about the value of a variable. - void havoc(variable_t v); - - /// Forget everything about all offset variables for a given register. - void havoc_offsets(const Reg& reg); - - void scratch_caller_saved_registers(); - void save_callee_saved_registers(const std::string& prefix); - void restore_callee_saved_registers(const std::string& prefix); - void havoc_subprogram_stack(const std::string& prefix); - void forget_packet_pointers(); - void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); - - void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); - - void recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const; - void recompute_stack_numeric_size(NumAbsDomain& inv, variable_t type_variable) const; - void do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width, - const Reg& src_reg); - void do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, int width); - void do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width); - void do_load(const Mem& b, const Reg& target_reg); - - void do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, int width, - const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); - - void do_mem_store(const Mem& b, const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); - - static void initialize_packet(ebpf_domain_t& inv); -}; // end ebpf_domain_t - } // namespace crab diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp new file mode 100644 index 000000000..7936600c8 --- /dev/null +++ b/src/crab/ebpf_transformer.cpp @@ -0,0 +1,2430 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT + +// This file is eBPF-specific, not derived from CRAB. + +#include +#include +#include +#include + +#include "boost/endian/conversion.hpp" + +#include "asm_ostream.hpp" +#include "asm_unmarshal.hpp" +#include "config.hpp" +#include "crab/array_domain.hpp" +#include "crab/ebpf_domain.hpp" +#include "crab_utils/num_safety.hpp" +#include "dsl_syntax.hpp" +#include "platform.hpp" +#include "string_constraints.hpp" + +using crab::domains::NumAbsDomain; +namespace crab { + +class ebpf_transformer final { + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; + + public: + explicit ebpf_transformer(ebpf_domain_t& dom) + : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} + + // abstract transformers + void operator()(const Assume&); + void operator()(const Atomic&); + void operator()(const Bin&); + void operator()(const Call&); + void operator()(const CallLocal&); + void operator()(const Callx&); + void operator()(const Exit&); + void operator()(const IncrementLoopCounter&); + void operator()(const Jmp&) const; + void operator()(const LoadMapFd&); + void operator()(const Mem&); + void operator()(const Packet&); + void operator()(const Un&); + void operator()(const Undefined&); + + void initialize_loop_counter(const label_t& label); + + static ebpf_domain_t setup_entry(bool init_r1); + + private: + void assign(variable_t lhs, variable_t rhs); + void assign(variable_t x, const linear_expression_t& e); + void assign(variable_t x, int64_t e); + + void apply(arith_binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); + void apply(arith_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + void apply(bitwise_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + void apply(bitwise_binop_t op, variable_t x, variable_t y, const number_t& k, int finite_width); + void apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); + void apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + + void add(const Reg& reg, int imm, int finite_width); + void add(variable_t lhs, variable_t op2); + void add(variable_t lhs, const number_t& op2); + void sub(variable_t lhs, variable_t op2); + void sub(variable_t lhs, const number_t& op2); + void add_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void add_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void sub_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void sub_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void neg(variable_t lhss, variable_t lhsu, int finite_width); + void mul(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void mul(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void sdiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void sdiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void udiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void udiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void srem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void srem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void urem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void urem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + + void bitwise_and(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_and(variable_t lhss, variable_t lhsu, const number_t& op2); + void bitwise_or(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_or(variable_t lhss, variable_t lhsu, const number_t& op2); + void bitwise_xor(variable_t lhsss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_xor(variable_t lhss, variable_t lhsu, const number_t& op2); + void shl(const Reg& reg, int imm, int finite_width); + void shl_overflow(variable_t lhss, variable_t lhsu, variable_t op2); + void shl_overflow(variable_t lhss, variable_t lhsu, const number_t& op2); + void lshr(const Reg& reg, int imm, int finite_width); + void ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width); + void sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, Bin::Op op); + + void assume(const linear_constraint_t& cst); + + /// Forget everything we know about the value of a variable. + void havoc(variable_t v); + + /// Forget everything about all offset variables for a given register. + void havoc_offsets(const Reg& reg); + + void scratch_caller_saved_registers(); + void save_callee_saved_registers(const std::string& prefix); + void restore_callee_saved_registers(const std::string& prefix); + void havoc_subprogram_stack(const std::string& prefix); + void forget_packet_pointers(); + void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); + + void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); + + void recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const; + void recompute_stack_numeric_size(NumAbsDomain& inv, variable_t type_variable) const; + void do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width, + const Reg& src_reg); + void do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, int width); + void do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width); + void do_load(const Mem& b, const Reg& target_reg); + + void do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, int width, + const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); + + void do_mem_store(const Mem& b, const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); +}; // end ebpf_domain_t + +void ebpf_domain_transform(ebpf_domain_t& inv, const Instruction& ins) { std::visit(ebpf_transformer{inv}, ins); } + +/** Linear constraint for a pointer comparison. + */ +static linear_constraint_t assume_cst_offsets_reg(const Condition::Op op, const variable_t dst_offset, + const variable_t src_offset) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + switch (op) { + case Op::EQ: return eq(dst_offset, src_offset); + case Op::NE: return neq(dst_offset, src_offset); + case Op::GE: return dst_offset >= src_offset; + case Op::SGE: return dst_offset >= src_offset; // pointer comparison is unsigned + case Op::LE: return dst_offset <= src_offset; + case Op::SLE: return dst_offset <= src_offset; // pointer comparison is unsigned + case Op::GT: return dst_offset > src_offset; + case Op::SGT: return dst_offset > src_offset; // pointer comparison is unsigned + case Op::SLT: return src_offset > dst_offset; + // Note: reverse the test as a workaround strange lookup: + case Op::LT: return src_offset > dst_offset; // FIX unsigned + default: return dst_offset - dst_offset == 0; + } +} + +static std::vector assume_bit_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, + variable_t dst_uvalue, interval_t src_interval) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + + auto dst_interval = inv.eval_interval(dst_uvalue); + std::optional dst_n = dst_interval.singleton(); + if (!dst_n || !dst_n.value().fits_cast_to()) { + return {}; + } + + std::optional src_n = src_interval.singleton(); + if (!src_n || !src_n->fits_cast_to()) { + return {}; + } + uint64_t src_int_value = src_n.value().cast_to(); + if (!is64) { + src_int_value = gsl::narrow_cast(src_int_value); + } + + bool result; + switch (op) { + case Op::SET: result = (dst_n.value().cast_to() & src_int_value) != 0; break; + case Op::NSET: result = (dst_n.value().cast_to() & src_int_value) == 0; break; + default: throw std::exception(); + } + + return {result ? linear_constraint_t::true_const() : linear_constraint_t::false_const()}; +} + +static std::vector assume_signed_64bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, + const variable_t left_uvalue, + const interval_t& right_interval, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using namespace crab::dsl_syntax; + if (right_interval <= interval_t::nonnegative(64) && !right_interval.is_singleton()) { + return {(left_svalue == right_svalue), (left_uvalue == right_uvalue), eq(left_svalue, left_uvalue)}; + } else { + return {(left_svalue == right_svalue), (left_uvalue == right_uvalue)}; + } +} + +static std::vector assume_signed_32bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, + const variable_t left_uvalue, + const interval_t& right_interval) { + using namespace crab::dsl_syntax; + + if (const auto rn = right_interval.singleton()) { + const auto left_svalue_interval = inv.eval_interval(left_svalue); + if (auto size = left_svalue_interval.finite_size()) { + // Find the lowest 64-bit svalue whose low 32 bits match the singleton. + + // Get lower bound as a 64-bit value. + int64_t lb = left_svalue_interval.lb().number()->cast_to(); + + // Use the high 32-bits from the left lower bound and the low 32-bits from the right singleton. + // The result might be lower than the lower bound. + const int64_t lb_match = (lb & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); + if (lb_match < lb) { + // The result is lower than the left interval, so try the next higher matching 64-bit value. + // It's ok if this goes higher than the left upper bound. + lb += 0x100000000; + } + + // Find the highest 64-bit svalue whose low 32 bits match the singleton. + + // Get upper bound as a 64-bit value. + const int64_t ub = left_svalue_interval.ub().number()->cast_to(); + + // Use the high 32-bits from the left upper bound and the low 32-bits from the right singleton. + // The result might be higher than the upper bound. + const int64_t ub_match = (ub & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); + if (ub_match > ub) { + // The result is higher than the left interval, so try the next lower matching 64-bit value. + // It's ok if this goes lower than the left lower bound. + lb -= 0x100000000; + } + + if (to_unsigned(lb_match) <= to_unsigned(ub_match)) { + // The interval is also valid when cast to a uvalue, meaning + // both bounds are positive or both are negative. + return {left_svalue >= lb_match, left_svalue <= ub_match, left_uvalue >= to_unsigned(lb_match), + left_uvalue <= to_unsigned(ub_match)}; + } else { + // The interval can only be represented as an svalue. + return {left_svalue >= lb_match, left_svalue <= ub_match}; + } + } + } + return {}; +} + +// Given left and right values, get the left and right intervals, and also split +// the left interval into separate negative and positive intervals. +static void get_signed_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, + const variable_t left_uvalue, const linear_expression_t& right_svalue, + interval_t& left_interval, interval_t& right_interval, + interval_t& left_interval_positive, interval_t& left_interval_negative) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + // Get intervals as 32-bit or 64-bit as appropriate. + left_interval = inv.eval_interval(left_svalue); + right_interval = inv.eval_interval(right_svalue); + if (!is64) { + if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || + (left_interval <= interval_t::negative(32) && right_interval <= interval_t::negative(32))) { + is64 = true; + // fallthrough as 64bit, including deduction of relational information + } else { + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); + // continue as 32bit + } + } + + if (!left_interval.is_top()) { + left_interval_positive = left_interval & interval_t::nonnegative(64); + left_interval_negative = left_interval & interval_t::negative(64); + } else { + left_interval = inv.eval_interval(left_uvalue); + if (!left_interval.is_top()) { + // The interval is TOP as a signed interval but is represented precisely as an unsigned interval, + // so split into two signed intervals that can be treated separately. + left_interval_positive = left_interval & interval_t::nonnegative(64); + const number_t lih_ub = + left_interval.ub().number() ? left_interval.ub().number()->truncate_to() : -1; + left_interval_negative = interval_t{std::numeric_limits::min(), lih_ub}; + } else { + left_interval_positive = interval_t::nonnegative(64); + left_interval_negative = interval_t::negative(64); + } + } + + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); +} + +// Given left and right values, get the left and right intervals, and also split +// the left interval into separate low and high intervals. +static void get_unsigned_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, + const variable_t left_uvalue, const linear_expression_t& right_uvalue, + interval_t& left_interval, interval_t& right_interval, interval_t& left_interval_low, + interval_t& left_interval_high) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + // Get intervals as 32-bit or 64-bit as appropriate. + left_interval = inv.eval_interval(left_uvalue); + right_interval = inv.eval_interval(right_uvalue); + if (!is64) { + if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || + (left_interval <= interval_t::unsigned_high(32) && right_interval <= interval_t::unsigned_high(32))) { + is64 = true; + // fallthrough as 64bit, including deduction of relational information + } else { + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); + // continue as 32bit + } + } + + if (!left_interval.is_top()) { + left_interval_low = left_interval & interval_t::nonnegative(64); + left_interval_high = left_interval & interval_t::unsigned_high(64); + } else { + left_interval = inv.eval_interval(left_svalue); + if (!left_interval.is_top()) { + // The interval is TOP as an unsigned interval but is represented precisely as a signed interval, + // so split into two unsigned intervals that can be treated separately. + left_interval_low = interval_t(0, left_interval.ub()).truncate_to(); + left_interval_high = interval_t(left_interval.lb(), -1).truncate_to(); + } else { + left_interval_low = interval_t::nonnegative(64); + left_interval_high = interval_t::unsigned_high(64); + } + } + + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); +} + +static std::vector +assume_signed_64bit_lt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, + const interval_t& left_interval_positive, const interval_t& left_interval_negative, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::negative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1]. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(64) && + right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // Interval can only be represented as an svalue. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } +} + +static std::vector +assume_signed_32bit_lt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_positive, + const interval_t& left_interval_negative, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {std::numeric_limits::max() < left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(32) && + right_interval <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX] + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && + inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { + // Interval can only be represented as an svalue. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else { + // We can't directly compare the svalues since they may differ in high order bits. + return {}; + } +} + +static std::vector +assume_signed_64bit_gt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, + const interval_t& left_interval_positive, const interval_t& left_interval_negative, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(64) && + right_interval <= interval_t::negative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {std::numeric_limits::max() < left_uvalue, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // Interval can only be represented as an svalue. + return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } +} + +static std::vector +assume_signed_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_positive, + const interval_t& left_interval_negative, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(32) && + right_interval <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {left_uvalue >= number_t{std::numeric_limits::max()} + 1, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && + inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { + // Interval can only be represented as an svalue. + return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // We can't directly compare the svalues since they may differ in high order bits. + return {}; + } +} + +static std::vector assume_signed_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, + variable_t left_svalue, variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_positive = interval_t::bottom(); + interval_t left_interval_negative = interval_t::bottom(); + get_signed_intervals(inv, is64, left_svalue, left_uvalue, right_svalue, left_interval, right_interval, + left_interval_positive, left_interval_negative); + + if (op == Condition::Op::EQ) { + // Handle svalue == right. + if (is64) { + return assume_signed_64bit_eq(inv, left_svalue, left_uvalue, right_interval, right_svalue, right_uvalue); + } else { + return assume_signed_32bit_eq(inv, left_svalue, left_uvalue, right_interval); + } + } + + const bool is_lt = op == Condition::Op::SLT || op == Condition::Op::SLE; + bool strict = op == Condition::Op::SLT || op == Condition::Op::SGT; + + auto llb = left_interval.lb(); + auto lub = left_interval.ub(); + auto rlb = right_interval.lb(); + auto rub = right_interval.ub(); + if (!is_lt && (strict ? lub <= rlb : lub < rlb)) { + // Left signed interval is lower than right signed interval. + return {linear_constraint_t::false_const()}; + } else if (is_lt && (strict ? llb >= rub : llb > rub)) { + // Left signed interval is higher than right signed interval. + return {linear_constraint_t::false_const()}; + } + if (is_lt && (strict ? lub < rlb : lub <= rlb)) { + // Left signed interval is lower than right signed interval. + return {linear_constraint_t::true_const()}; + } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { + // Left signed interval is higher than right signed interval. + return {linear_constraint_t::true_const()}; + } + + if (is64) { + if (is_lt) { + return assume_signed_64bit_lt(strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } else { + return assume_signed_64bit_gt(strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } + } else { + // 32-bit compare. + if (is_lt) { + return assume_signed_32bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } else { + return assume_signed_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } + } + return {}; +} + +static std::vector +assume_unsigned_64bit_lt(const NumAbsDomain& inv, bool strict, variable_t left_svalue, variable_t left_uvalue, + const interval_t& left_interval_low, const interval_t& left_interval_high, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + auto rub = right_interval.ub(); + auto lllb = left_interval_low.truncate_to().lb(); + if (right_interval <= interval_t::nonnegative(64) && (strict ? lllb >= rub : lllb > rub)) { + // The high interval is out of range. + if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { + return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), + left_uvalue <= *lsubn, left_svalue >= 0}; + } else { + return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), + left_svalue >= 0}; + } + } + auto lhlb = left_interval_high.truncate_to().lb(); + if (right_interval <= interval_t::unsigned_high(64) && (strict ? lhlb >= rub : lhlb > rub)) { + // The high interval is out of range. + if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { + return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), + left_uvalue <= *lsubn, left_svalue >= 0}; + } else { + return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), + left_svalue >= 0}; + } + } + if (right_interval <= interval_t::signed_int(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + auto llub = left_interval_low.truncate_to().ub(); + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + left_uvalue <= *llub.number(), 0 <= left_svalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (left_interval_low.is_bottom() && right_interval <= interval_t::unsigned_high(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if ((left_interval_low | left_interval_high) == interval_t::unsigned_int(64)) { + // Interval can only be represented as a uvalue, and was TOP before. + return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } +} + +static std::vector assume_unsigned_32bit_lt(const NumAbsDomain& inv, const bool strict, + const variable_t left_svalue, + const variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (inv.eval_interval(left_uvalue) <= interval_t::nonnegative(32) && + inv.eval_interval(right_uvalue) <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT32_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (inv.eval_interval(left_svalue) <= interval_t::negative(32) && + inv.eval_interval(right_svalue) <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT32_MIN, -1]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && + inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // We can't directly compare the uvalues since they may differ in high order bits. + return {}; + } +} + +static std::vector +assume_unsigned_64bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_low, + const interval_t& left_interval_high, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + const auto rlb = right_interval.lb(); + const auto llub = left_interval_low.truncate_to().ub(); + const auto lhlb = left_interval_high.truncate_to().lb(); + + if (right_interval <= interval_t::nonnegative(64) && (strict ? llub <= rlb : llub < rlb)) { + // The low interval is out of range. + return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + *lhlb.number() == std::numeric_limits::max() ? left_uvalue == *lhlb.number() + : left_uvalue >= *lhlb.number(), + left_svalue < 0}; + } else if (right_interval <= interval_t::unsigned_high(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if ((left_interval_low | left_interval_high) <= interval_t::nonnegative(64) && + right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } +} + +static std::vector +assume_unsigned_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_low, + const interval_t& left_interval_high, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::unsigned_high(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && + inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } else { + // We can't directly compare the uvalues since they may differ in high order bits. + return {}; + }; +} + +static std::vector assume_unsigned_cst_interval(const NumAbsDomain& inv, Condition::Op op, + bool is64, variable_t left_svalue, + variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_low = interval_t::bottom(); + interval_t left_interval_high = interval_t::bottom(); + get_unsigned_intervals(inv, is64, left_svalue, left_uvalue, right_uvalue, left_interval, right_interval, + left_interval_low, left_interval_high); + + // Handle uvalue != right. + if (op == Condition::Op::NE) { + if (auto rn = right_interval.singleton()) { + if (rn == left_interval.truncate_to_uint(is64 ? 64 : 32).lb().number()) { + // "NE lower bound" is equivalent to "GT lower bound". + op = Condition::Op::GT; + right_interval = interval_t{left_interval.lb()}; + } else if (rn == left_interval.ub().number()) { + // "NE upper bound" is equivalent to "LT upper bound". + op = Condition::Op::LT; + right_interval = interval_t{left_interval.ub()}; + } else { + return {}; + } + } else { + return {}; + } + } + + const bool is_lt = op == Condition::Op::LT || op == Condition::Op::LE; + bool strict = op == Condition::Op::LT || op == Condition::Op::GT; + + auto [llb, lub] = left_interval.pair(); + auto [rlb, rub] = right_interval.pair(); + if (is_lt ? (strict ? llb >= rub : llb > rub) : (strict ? lub <= rlb : lub < rlb)) { + // Left unsigned interval is lower than right unsigned interval. + return {linear_constraint_t::false_const()}; + } + if (is_lt && (strict ? lub < rlb : lub <= rlb)) { + // Left unsigned interval is lower than right unsigned interval. We still add a + // relationship for use when widening, such as is used in the prime conformance test. + if (is64) { + return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } + return {}; + } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { + // Left unsigned interval is higher than right unsigned interval. We still add a + // relationship for use when widening, such as is used in the prime conformance test. + if (is64) { + return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } else { + return {}; + } + } + + if (is64) { + if (is_lt) { + return assume_unsigned_64bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } else { + return assume_unsigned_64bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } + } else { + if (is_lt) { + return assume_unsigned_32bit_lt(inv, strict, left_svalue, left_uvalue, right_svalue, right_uvalue); + } else { + return assume_unsigned_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } + } +} + +/** Linear constraints for a comparison with a constant. + */ +static std::vector assume_cst_imm(const NumAbsDomain& inv, const Condition::Op op, const bool is64, + const variable_t dst_svalue, const variable_t dst_uvalue, + const int64_t imm) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + switch (op) { + case Op::EQ: + case Op::SGE: + case Op::SLE: + case Op::SGT: + case Op::SLT: + return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, gsl::narrow_cast(imm)); + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, interval_t{imm}); + case Op::NE: + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: + return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, + gsl::narrow_cast(imm)); + } + return {}; +} + +/** Linear constraint for a numerical comparison between registers. + */ +static std::vector assume_cst_reg(const NumAbsDomain& inv, const Condition::Op op, const bool is64, + const variable_t dst_svalue, const variable_t dst_uvalue, + const variable_t src_svalue, const variable_t src_uvalue) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + if (is64) { + switch (op) { + case Op::EQ: { + const interval_t src_interval = inv.eval_interval(src_svalue); + if (!src_interval.is_singleton() && src_interval <= interval_t::nonnegative(64)) { + return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue), eq(dst_svalue, dst_uvalue)}; + } else { + return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue)}; + } + } + case Op::NE: return {neq(dst_svalue, src_svalue)}; + case Op::SGE: return {dst_svalue >= src_svalue}; + case Op::SLE: return {dst_svalue <= src_svalue}; + case Op::SGT: return {dst_svalue > src_svalue}; + // Note: reverse the test as a workaround strange lookup: + case Op::SLT: return {src_svalue > dst_svalue}; + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + } + } else { + switch (op) { + case Op::EQ: + case Op::SGE: + case Op::SLE: + case Op::SGT: + case Op::SLT: return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); + case Op::NE: + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + } + } + assert(false); + throw std::exception(); +} + +void ebpf_transformer::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } +void ebpf_transformer::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } + +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, + const int finite_width) { + m_inv.apply(op, x, y, k, finite_width); +} + +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { + std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); +} + +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { + std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); +} + +static void havoc_offsets(NumAbsDomain& inv, const Reg& reg) { + const reg_pack_t r = reg_pack(reg); + inv -= r.ctx_offset; + inv -= r.map_fd; + inv -= r.packet_offset; + inv -= r.shared_offset; + inv -= r.shared_region_size; + inv -= r.stack_offset; + inv -= r.stack_numeric_size; +} +static void havoc_register(NumAbsDomain& inv, const Reg& reg) { + const reg_pack_t r = reg_pack(reg); + havoc_offsets(inv, reg); + inv -= r.svalue; + inv -= r.uvalue; +} + +void ebpf_transformer::scratch_caller_saved_registers() { + for (int i = R1_ARG; i <= R5_ARG; i++) { + Reg r{gsl::narrow(i)}; + havoc_register(m_inv, r); + type_inv.havoc_type(m_inv, r); + } +} + +void ebpf_transformer::save_callee_saved_registers(const std::string& prefix) { + // Create variables specific to the new call stack frame that store + // copies of the states of r6 through r9. + for (int r = R6; r <= R9; r++) { + for (const data_kind_t kind : iterate_kinds()) { + const variable_t src_var = variable_t::reg(kind, r); + if (!m_inv[src_var].is_top()) { + assign(variable_t::stack_frame_var(kind, r, prefix), src_var); + } + } + } +} + +void ebpf_transformer::restore_callee_saved_registers(const std::string& prefix) { + for (int r = R6; r <= R9; r++) { + for (const data_kind_t kind : iterate_kinds()) { + const variable_t src_var = variable_t::stack_frame_var(kind, r, prefix); + if (!m_inv[src_var].is_top()) { + assign(variable_t::reg(kind, r), src_var); + } else { + havoc(variable_t::reg(kind, r)); + } + havoc(src_var); + } + } +} + +void ebpf_transformer::havoc_subprogram_stack(const std::string& prefix) { + const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; + const auto intv = m_inv.eval_interval(r10_stack_offset); + if (!intv.is_singleton()) { + return; + } + const int64_t stack_start = intv.singleton()->cast_to() - EBPF_SUBPROGRAM_STACK_SIZE; + for (const data_kind_t kind : iterate_kinds()) { + stack.havoc(m_inv, kind, stack_start, EBPF_SUBPROGRAM_STACK_SIZE); + } +} + +void ebpf_transformer::forget_packet_pointers() { + using namespace crab::dsl_syntax; + + for (const variable_t type_variable : variable_t::get_type_variables()) { + if (type_inv.has_type(m_inv, type_variable, T_PACKET)) { + havoc(variable_t::kind_var(data_kind_t::types, type_variable)); + havoc(variable_t::kind_var(data_kind_t::packet_offsets, type_variable)); + havoc(variable_t::kind_var(data_kind_t::svalues, type_variable)); + havoc(variable_t::kind_var(data_kind_t::uvalues, type_variable)); + } + } + + dom.initialize_packet(); +} + +static void overflow_bounds(NumAbsDomain& inv, variable_t lhs, number_t span, int finite_width, bool issigned) { + using namespace crab::dsl_syntax; + auto interval = inv[lhs]; + if (interval.ub() - interval.lb() >= span) { + // Interval covers the full space. + inv -= lhs; + return; + } + if (interval.is_bottom()) { + inv -= lhs; + return; + } + number_t lb_value = interval.lb().number().value(); + number_t ub_value = interval.ub().number().value(); + + // Compute the interval, taking overflow into account. + // For a signed result, we need to ensure the signed and unsigned results match + // so for a 32-bit operation, 0x80000000 should be a positive 64-bit number not + // a sign extended negative one. + number_t lb = lb_value.truncate_to_uint(finite_width); + number_t ub = ub_value.truncate_to_uint(finite_width); + if (issigned) { + lb = lb.truncate_to(); + ub = ub.truncate_to(); + } + if (lb > ub) { + // Range wraps in the middle, so we cannot represent as an unsigned interval. + inv -= lhs; + return; + } + auto new_interval = interval_t{lb, ub}; + if (new_interval != interval) { + // Update the variable, which will lose any relationships to other variables. + inv.set(lhs, new_interval); + } +} + +static void overflow_signed(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { + const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} + : finite_width == 32 ? number_t{std::numeric_limits::max()} + : throw std::exception()}; + overflow_bounds(inv, lhs, span, finite_width, true); +} + +static void overflow_unsigned(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { + const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} + : finite_width == 32 ? number_t{std::numeric_limits::max()} + : throw std::exception()}; + overflow_bounds(inv, lhs, span, finite_width, false); +} +static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const number_t& z, const int finite_width) { + inv.apply(op, xs, y, z, finite_width); + if (finite_width) { + inv.assign(xu, xs); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const number_t& z, const int finite_width) { + inv.apply(op, xu, y, z, finite_width); + if (finite_width) { + inv.assign(xs, xu); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const variable_t z, const int finite_width) { + inv.apply(op, xs, y, z, finite_width); + if (finite_width) { + inv.assign(xu, xs); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const variable_t z, const int finite_width) { + inv.apply(op, xu, y, z, finite_width); + if (finite_width) { + inv.assign(xs, xu); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +void ebpf_transformer::add(const variable_t lhs, const variable_t op2) { + apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::add(const variable_t lhs, const number_t& op2) { + apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::sub(const variable_t lhs, const variable_t op2) { + apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::sub(const variable_t lhs, const number_t& op2) { + apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); +} + +// Add/subtract with overflow are both signed and unsigned. We can use either one of the two to compute the +// result before adjusting for overflow, though if one is top we want to use the other to retain precision. +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} + +void ebpf_transformer::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, -1, finite_width); +} +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); +} + +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + // Use finite width 64 to make the svalue be set as well as the uvalue. + apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { + apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); +} + +static void assume(NumAbsDomain& inv, const linear_constraint_t& cst) { inv += cst; } +void ebpf_transformer::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } + +/// Forget everything we know about the value of a variable. +void ebpf_transformer::havoc(const variable_t v) { m_inv -= v; } +void ebpf_transformer::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } + +void ebpf_transformer::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } + +static linear_constraint_t type_is_pointer(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type >= T_CTX; +} + +static linear_constraint_t type_is_number(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type == T_NUM; +} + +static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } + +static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type != T_STACK; +} + +void ebpf_transformer::operator()(const Assume& s) { + const Condition cond = s.cond; + const auto dst = reg_pack(cond.left); + if (const auto psrc_reg = std::get_if(&cond.right)) { + const auto src_reg = *psrc_reg; + const auto src = reg_pack(src_reg); + if (type_inv.same_type(m_inv, cond.left, std::get(cond.right))) { + m_inv = type_inv.join_over_types(m_inv, cond.left, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_NUM) { + for (const linear_constraint_t& cst : + assume_cst_reg(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, src.svalue, src.uvalue)) { + inv += cst; + } + } else { + // Either pointers to a singleton region, + // or an equality comparison on map descriptors/pointers to non-singleton locations + if (const auto dst_offset = dom.get_type_offset_variable(cond.left, type)) { + if (const auto src_offset = dom.get_type_offset_variable(src_reg, type)) { + inv += assume_cst_offsets_reg(cond.op, dst_offset.value(), src_offset.value()); + } + } + } + }); + } else { + // We should only reach here if `--assume-assert` is off + assert(!thread_local_options.assume_assertions || dom.is_bottom()); + // be sound in any case, it happens to flush out bugs: + m_inv.set_to_top(); + } + } else { + const int64_t imm = gsl::narrow_cast(std::get(cond.right).v); + for (const linear_constraint_t& cst : assume_cst_imm(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, imm)) { + assume(cst); + } + } +} + +void ebpf_transformer::operator()(const Undefined& a) {} + +// Simple truncation function usable with swap_endianness(). +template +constexpr T truncate(T x) noexcept { + return x; +} + +void ebpf_transformer::operator()(const Un& stmt) { + const auto dst = reg_pack(stmt.dst); + auto swap_endianness = [&](const variable_t v, auto be_or_le) { + if (m_inv.entail(type_is_number(stmt.dst))) { + if (const auto n = m_inv.eval_interval(v).singleton()) { + if (n->fits_cast_to()) { + m_inv.set(v, interval_t{be_or_le(n->cast_to())}); + return; + } + } + } + havoc(v); + havoc_offsets(stmt.dst); + }; + // Swap bytes if needed. For 64-bit types we need the weights to fit in a + // signed int64, but for smaller types we don't want sign extension, + // so we use unsigned which still fits in a signed int64. + switch (stmt.op) { + case Un::Op::BE16: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::BE32: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::BE64: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } + break; + case Un::Op::LE16: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::LE32: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::LE64: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } + break; + case Un::Op::SWAP16: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::SWAP32: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::SWAP64: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::NEG: + neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); + havoc_offsets(stmt.dst); + break; + } +} + +void ebpf_transformer::operator()(const Exit& a) { + // Clean up any state for the current stack frame. + const std::string prefix = a.stack_frame_prefix; + if (prefix.empty()) { + return; + } + havoc_subprogram_stack(prefix); + restore_callee_saved_registers(prefix); + + // Restore r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, EBPF_SUBPROGRAM_STACK_SIZE, 64); +} + +void ebpf_transformer::operator()(const Jmp&) const { + // This is a NOP. It only exists to hold the jump preconditions. +} + +void ebpf_transformer::operator()(const Packet& a) { + const auto reg = reg_pack(R0_RETURN_VALUE); + constexpr Reg r0_reg{R0_RETURN_VALUE}; + type_inv.assign_type(m_inv, r0_reg, T_NUM); + havoc_offsets(r0_reg); + havoc(reg.svalue); + havoc(reg.uvalue); + scratch_caller_saved_registers(); +} + +void ebpf_transformer::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, + const int width, const Reg& src_reg) { + type_inv.assign_type(inv, target_reg, stack.load(inv, data_kind_t::types, addr, width)); + using namespace crab::dsl_syntax; + if (inv.entail(width <= reg_pack(src_reg).stack_numeric_size)) { + type_inv.assign_type(inv, target_reg, T_NUM); + } + + const reg_pack_t& target = reg_pack(target_reg); + if (width == 1 || width == 2 || width == 4 || width == 8) { + // Use the addr before we havoc the destination register since we might be getting the + // addr from that same register. + const std::optional sresult = stack.load(inv, data_kind_t::svalues, addr, width); + const std::optional uresult = stack.load(inv, data_kind_t::uvalues, addr, width); + havoc_register(inv, target_reg); + inv.assign(target.svalue, sresult); + inv.assign(target.uvalue, uresult); + + if (type_inv.has_type(inv, target.type, T_CTX)) { + inv.assign(target.ctx_offset, stack.load(inv, data_kind_t::ctx_offsets, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_MAP) || type_inv.has_type(inv, target.type, T_MAP_PROGRAMS)) { + inv.assign(target.map_fd, stack.load(inv, data_kind_t::map_fds, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_PACKET)) { + inv.assign(target.packet_offset, stack.load(inv, data_kind_t::packet_offsets, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_SHARED)) { + inv.assign(target.shared_offset, stack.load(inv, data_kind_t::shared_offsets, addr, width)); + inv.assign(target.shared_region_size, stack.load(inv, data_kind_t::shared_region_sizes, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_STACK)) { + inv.assign(target.stack_offset, stack.load(inv, data_kind_t::stack_offsets, addr, width)); + inv.assign(target.stack_numeric_size, stack.load(inv, data_kind_t::stack_numeric_sizes, addr, width)); + } + } else { + havoc_register(inv, target_reg); + } +} + +void ebpf_transformer::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, + const int width) { + using namespace crab::dsl_syntax; + if (inv.is_bottom()) { + return; + } + + const ebpf_context_descriptor_t* desc = global_program_info->type.context_descriptor; + + const reg_pack_t& target = reg_pack(target_reg); + + if (desc->end < 0) { + havoc_register(inv, target_reg); + type_inv.assign_type(inv, target_reg, T_NUM); + return; + } + + const interval_t interval = inv.eval_interval(addr_vague); + const std::optional maybe_addr = interval.singleton(); + havoc_register(inv, target_reg); + + const bool may_touch_ptr = + interval.contains(desc->data) || interval.contains(desc->meta) || interval.contains(desc->end); + + if (!maybe_addr) { + if (may_touch_ptr) { + type_inv.havoc_type(inv, target_reg); + } else { + type_inv.assign_type(inv, target_reg, T_NUM); + } + return; + } + + const number_t addr = *maybe_addr; + + // We use offsets for packet data, data_end, and meta during verification, + // but at runtime they will be 64-bit pointers. We can use the offset values + // for verification like we use map_fd's as a proxy for maps which + // at runtime are actually 64-bit memory pointers. + const int offset_width = desc->end - desc->data; + if (addr == desc->data) { + if (width == offset_width) { + inv.assign(target.packet_offset, 0); + } + } else if (addr == desc->end) { + if (width == offset_width) { + inv.assign(target.packet_offset, variable_t::packet_size()); + } + } else if (addr == desc->meta) { + if (width == offset_width) { + inv.assign(target.packet_offset, variable_t::meta_offset()); + } + } else { + if (may_touch_ptr) { + type_inv.havoc_type(inv, target_reg); + } else { + type_inv.assign_type(inv, target_reg, T_NUM); + } + return; + } + if (width == offset_width) { + type_inv.assign_type(inv, target_reg, T_PACKET); + inv += 4098 <= target.svalue; + inv += target.svalue <= PTR_MAX; + } +} + +void ebpf_transformer::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, + const linear_expression_t& addr, const int width) { + if (inv.is_bottom()) { + return; + } + const reg_pack_t& target = reg_pack(target_reg); + + type_inv.assign_type(inv, target_reg, T_NUM); + havoc_register(inv, target_reg); + + // A 1 or 2 byte copy results in a limited range of values that may be used as array indices. + if (width == 1) { + const interval_t full = interval_t::full(); + inv.set(target.svalue, full); + inv.set(target.uvalue, full); + } else if (width == 2) { + const interval_t full = interval_t::full(); + inv.set(target.svalue, full); + inv.set(target.uvalue, full); + } +} + +void ebpf_transformer::do_load(const Mem& b, const Reg& target_reg) { + using namespace crab::dsl_syntax; + + const auto mem_reg = reg_pack(b.access.basereg); + const int width = b.access.width; + const int offset = b.access.offset; + + if (b.access.basereg.v == R10_STACK_POINTER) { + const linear_expression_t addr = mem_reg.stack_offset + offset; + do_load_stack(m_inv, target_reg, addr, width, b.access.basereg); + return; + } + + m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, type_encoding_t type) { + switch (type) { + case T_UNINIT: return; + case T_MAP: return; + case T_MAP_PROGRAMS: return; + case T_NUM: return; + case T_CTX: { + linear_expression_t addr = mem_reg.ctx_offset + offset; + do_load_ctx(inv, target_reg, addr, width); + break; + } + case T_STACK: { + linear_expression_t addr = mem_reg.stack_offset + offset; + do_load_stack(inv, target_reg, addr, width, b.access.basereg); + break; + } + case T_PACKET: { + linear_expression_t addr = mem_reg.packet_offset + offset; + do_load_packet_or_shared(inv, target_reg, addr, width); + break; + } + default: { + linear_expression_t addr = mem_reg.shared_offset + offset; + do_load_packet_or_shared(inv, target_reg, addr, width); + break; + } + } + }); +} + +void ebpf_transformer::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, + const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { + { + const std::optional var = stack.store_type(inv, addr, width, val_type); + type_inv.assign_type(inv, var, val_type); + } + if (width == 8) { + inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); + inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_CTX)) { + inv.assign(stack.store(inv, data_kind_t::ctx_offsets, addr, width, opt_val_reg->ctx_offset), + opt_val_reg->ctx_offset); + } else { + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + } + + if (opt_val_reg && + (type_inv.has_type(m_inv, val_type, T_MAP) || type_inv.has_type(m_inv, val_type, T_MAP_PROGRAMS))) { + inv.assign(stack.store(inv, data_kind_t::map_fds, addr, width, opt_val_reg->map_fd), opt_val_reg->map_fd); + } else { + stack.havoc(inv, data_kind_t::map_fds, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_PACKET)) { + inv.assign(stack.store(inv, data_kind_t::packet_offsets, addr, width, opt_val_reg->packet_offset), + opt_val_reg->packet_offset); + } else { + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_SHARED)) { + inv.assign(stack.store(inv, data_kind_t::shared_offsets, addr, width, opt_val_reg->shared_offset), + opt_val_reg->shared_offset); + inv.assign(stack.store(inv, data_kind_t::shared_region_sizes, addr, width, opt_val_reg->shared_region_size), + opt_val_reg->shared_region_size); + } else { + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_STACK)) { + inv.assign(stack.store(inv, data_kind_t::stack_offsets, addr, width, opt_val_reg->stack_offset), + opt_val_reg->stack_offset); + inv.assign(stack.store(inv, data_kind_t::stack_numeric_sizes, addr, width, opt_val_reg->stack_numeric_size), + opt_val_reg->stack_numeric_size); + } else { + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); + } + } else { + if ((width == 1 || width == 2 || width == 4) && type_inv.get_type(m_inv, val_type) == T_NUM) { + // Keep track of numbers on the stack that might be used as array indices. + inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); + inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); + } else { + stack.havoc(inv, data_kind_t::svalues, addr, width); + stack.havoc(inv, data_kind_t::uvalues, addr, width); + } + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + stack.havoc(inv, data_kind_t::map_fds, addr, width); + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); + } + + // Update stack_numeric_size for any stack type variables. + // stack_numeric_size holds the number of continuous bytes starting from stack_offset that are known to be numeric. + auto updated_lb = m_inv.eval_interval(addr).lb(); + auto updated_ub = m_inv.eval_interval(addr).ub() + width; + for (const variable_t type_variable : variable_t::get_type_variables()) { + if (!type_inv.has_type(inv, type_variable, T_STACK)) { + continue; + } + const variable_t stack_offset_variable = variable_t::kind_var(data_kind_t::stack_offsets, type_variable); + const variable_t stack_numeric_size_variable = + variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); + + using namespace crab::dsl_syntax; + // See if the variable's numeric interval overlaps with changed bytes. + if (m_inv.intersect(dsl_syntax::operator<=(addr, stack_offset_variable + stack_numeric_size_variable)) && + m_inv.intersect(operator>=(addr + width, stack_offset_variable))) { + havoc(stack_numeric_size_variable); + recompute_stack_numeric_size(m_inv, type_variable); + } + } +} + +void ebpf_transformer::operator()(const Mem& b) { + if (m_inv.is_bottom()) { + return; + } + if (const auto preg = std::get_if(&b.value)) { + if (b.is_load) { + do_load(b, *preg); + } else { + const auto data_reg = reg_pack(*preg); + do_mem_store(b, data_reg.type, data_reg.svalue, data_reg.uvalue, data_reg); + } + } else { + const uint64_t imm = std::get(b.value).v; + do_mem_store(b, T_NUM, to_signed(imm), imm, {}); + } +} + +void ebpf_transformer::do_mem_store(const Mem& b, const linear_expression_t& val_type, + const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { + if (m_inv.is_bottom()) { + return; + } + const int width = b.access.width; + const number_t offset{b.access.offset}; + if (b.access.basereg.v == R10_STACK_POINTER) { + const auto r10_stack_offset = reg_pack(b.access.basereg).stack_offset; + const auto r10_interval = m_inv.eval_interval(r10_stack_offset); + if (r10_interval.is_singleton()) { + const int32_t stack_offset = r10_interval.singleton()->cast_to(); + const number_t base_addr{stack_offset}; + do_store_stack(m_inv, base_addr + offset, width, val_type, val_svalue, val_uvalue, opt_val_reg); + } + return; + } + m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_STACK) { + const auto base_addr = linear_expression_t(dom.get_type_offset_variable(b.access.basereg, type).value()); + do_store_stack(inv, dsl_syntax::operator+(base_addr, offset), width, val_type, val_svalue, val_uvalue, + opt_val_reg); + } + // do nothing for any other type + }); +} + +// Construct a Bin operation that does the main operation that a given Atomic operation does atomically. +static Bin atomic_to_bin(const Atomic& a) { + Bin bin{.dst = Reg{R11_ATOMIC_SCRATCH}, .v = a.valreg, .is64 = a.access.width == sizeof(uint64_t), .lddw = false}; + switch (a.op) { + case Atomic::Op::ADD: bin.op = Bin::Op::ADD; break; + case Atomic::Op::OR: bin.op = Bin::Op::OR; break; + case Atomic::Op::AND: bin.op = Bin::Op::AND; break; + case Atomic::Op::XOR: bin.op = Bin::Op::XOR; break; + case Atomic::Op::XCHG: + case Atomic::Op::CMPXCHG: bin.op = Bin::Op::MOV; break; + default: throw std::exception(); + } + return bin; +} + +void ebpf_transformer::operator()(const Atomic& a) { + if (m_inv.is_bottom()) { + return; + } + if (!m_inv.entail(type_is_pointer(reg_pack(a.access.basereg))) || + !m_inv.entail(type_is_number(reg_pack(a.valreg)))) { + return; + } + if (m_inv.entail(type_is_not_stack(reg_pack(a.access.basereg)))) { + // Shared memory regions are volatile so we can just havoc + // any register that will be updated. + if (a.op == Atomic::Op::CMPXCHG) { + havoc_register(m_inv, Reg{R0_RETURN_VALUE}); + } else if (a.fetch) { + havoc_register(m_inv, a.valreg); + } + return; + } + + // Fetch the current value into the R11 pseudo-register. + constexpr Reg r11{R11_ATOMIC_SCRATCH}; + (*this)(Mem{.access = a.access, .value = r11, .is_load = true}); + + // Compute the new value in R11. + (*this)(atomic_to_bin(a)); + + if (a.op == Atomic::Op::CMPXCHG) { + // For CMPXCHG, store the original value in r0. + (*this)(Mem{.access = a.access, .value = Reg{R0_RETURN_VALUE}, .is_load = true}); + + // For the destination, there are 3 possibilities: + // 1) dst.value == r0.value : set R11 to valreg + // 2) dst.value != r0.value : don't modify R11 + // 3) dst.value may or may not == r0.value : set R11 to the union of R11 and valreg + // For now we just havoc the value of R11. + havoc_register(m_inv, r11); + } else if (a.fetch) { + // For other FETCH operations, store the original value in the src register. + (*this)(Mem{.access = a.access, .value = a.valreg, .is_load = true}); + } + + // Store the new value back in the original shared memory location. + // Note that do_mem_store() currently doesn't track shared memory values, + // but stack memory values are tracked and are legal here. + (*this)(Mem{.access = a.access, .value = r11, .is_load = false}); + + // Clear the R11 pseudo-register. + havoc_register(m_inv, r11); + type_inv.havoc_type(m_inv, r11); +} + +void ebpf_transformer::operator()(const Call& call) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + std::optional maybe_fd_reg{}; + for (ArgSingle param : call.singles) { + switch (param.kind) { + case ArgSingle::Kind::MAP_FD: maybe_fd_reg = param.reg; break; + case ArgSingle::Kind::ANYTHING: + case ArgSingle::Kind::MAP_FD_PROGRAMS: + case ArgSingle::Kind::PTR_TO_MAP_KEY: + case ArgSingle::Kind::PTR_TO_MAP_VALUE: + case ArgSingle::Kind::PTR_TO_CTX: + // Do nothing. We don't track the content of relevant memory regions + break; + } + } + for (ArgPair param : call.pairs) { + switch (param.kind) { + case ArgPair::Kind::PTR_TO_READABLE_MEM_OR_NULL: + case ArgPair::Kind::PTR_TO_READABLE_MEM: + // Do nothing. No side effect allowed. + break; + + case ArgPair::Kind::PTR_TO_WRITABLE_MEM: { + bool store_numbers = true; + auto variable = dom.get_type_offset_variable(param.mem); + if (!variable.has_value()) { + // checked by the checker + break; + } + variable_t addr = variable.value(); + variable_t width = reg_pack(param.size).svalue; + + m_inv = type_inv.join_over_types(m_inv, param.mem, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_STACK) { + // Pointer to a memory region that the called function may change, + // so we must havoc. + stack.havoc(inv, data_kind_t::types, addr, width); + stack.havoc(inv, data_kind_t::svalues, addr, width); + stack.havoc(inv, data_kind_t::uvalues, addr, width); + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + stack.havoc(inv, data_kind_t::map_fds, addr, width); + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + } else { + store_numbers = false; + } + }); + if (store_numbers) { + // Functions are not allowed to write sensitive data, + // and initialization is guaranteed + stack.store_numbers(m_inv, addr, width); + } + } + } + } + + constexpr Reg r0_reg{R0_RETURN_VALUE}; + const auto r0_pack = reg_pack(r0_reg); + havoc(r0_pack.stack_numeric_size); + if (call.is_map_lookup) { + // This is the only way to get a null pointer + if (maybe_fd_reg) { + if (const auto map_type = dom.get_map_type(*maybe_fd_reg)) { + if (global_program_info->platform->get_map_type(*map_type).value_type == EbpfMapValueType::MAP) { + if (const auto inner_map_fd = dom.get_map_inner_map_fd(*maybe_fd_reg)) { + do_load_mapfd(r0_reg, to_signed(*inner_map_fd), true); + goto out; + } + } else { + assign_valid_ptr(r0_reg, true); + assign(r0_pack.shared_offset, 0); + m_inv.set(r0_pack.shared_region_size, dom.get_map_value_size(*maybe_fd_reg)); + type_inv.assign_type(m_inv, r0_reg, T_SHARED); + } + } + } + assign_valid_ptr(r0_reg, true); + assign(r0_pack.shared_offset, 0); + type_inv.assign_type(m_inv, r0_reg, T_SHARED); + } else { + havoc(r0_pack.svalue); + havoc(r0_pack.uvalue); + havoc_offsets(r0_reg); + type_inv.assign_type(m_inv, r0_reg, T_NUM); + // assume(r0_pack.value < 0); for INTEGER_OR_NO_RETURN_IF_SUCCEED. + } +out: + scratch_caller_saved_registers(); + if (call.reallocate_packet) { + forget_packet_pointers(); + } +} + +void ebpf_transformer::operator()(const CallLocal& call) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + save_callee_saved_registers(call.stack_frame_prefix); + + // Update r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, -EBPF_SUBPROGRAM_STACK_SIZE, 64); +} + +void ebpf_transformer::operator()(const Callx& callx) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + + // Look up the helper function id. + const reg_pack_t& reg = reg_pack(callx.func); + const auto src_interval = m_inv.eval_interval(reg.svalue); + if (const auto sn = src_interval.singleton()) { + if (sn->fits()) { + // We can now process it as if the id was immediate. + const int32_t imm = sn->cast_to(); + if (!global_program_info->platform->is_helper_usable(imm)) { + return; + } + const Call call = make_call(imm, *global_program_info->platform); + (*this)(call); + } + } +} + +void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { + const EbpfMapDescriptor& desc = global_program_info->platform->get_map_descriptor(mapfd); + const EbpfMapType& type = global_program_info->platform->get_map_type(desc.type); + if (type.value_type == EbpfMapValueType::PROGRAM) { + type_inv.assign_type(m_inv, dst_reg, T_MAP_PROGRAMS); + } else { + type_inv.assign_type(m_inv, dst_reg, T_MAP); + } + const reg_pack_t& dst = reg_pack(dst_reg); + assign(dst.map_fd, mapfd); + assign_valid_ptr(dst_reg, maybe_null); +} + +void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } + +void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { + using namespace crab::dsl_syntax; + const reg_pack_t& reg = reg_pack(dst_reg); + havoc(reg.svalue); + havoc(reg.uvalue); + if (maybe_null) { + m_inv += 0 <= reg.svalue; + } else { + m_inv += 0 < reg.svalue; + } + m_inv += reg.svalue <= PTR_MAX; + assign(reg.uvalue, reg.svalue); +} + +// If nothing is known of the stack_numeric_size, +// try to recompute the stack_numeric_size. +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { + const variable_t stack_numeric_size_variable = + variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); + + if (!inv.eval_interval(stack_numeric_size_variable).is_top()) { + return; + } + + if (type_inv.has_type(inv, type_variable, T_STACK)) { + const int numeric_size = + stack.min_all_num_size(inv, variable_t::kind_var(data_kind_t::stack_offsets, type_variable)); + if (numeric_size > 0) { + inv.assign(stack_numeric_size_variable, numeric_size); + } + } +} + +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { + recompute_stack_numeric_size(inv, reg_pack(reg).type); +} + +void ebpf_transformer::add(const Reg& reg, const int imm, const int finite_width) { + const auto dst = reg_pack(reg); + const auto offset = dom.get_type_offset_variable(reg); + add_overflow(dst.svalue, dst.uvalue, imm, finite_width); + if (offset.has_value()) { + add(offset.value(), imm); + if (imm > 0) { + // Since the start offset is increasing but + // the end offset is not, the numeric size decreases. + sub(dst.stack_numeric_size, imm); + } else if (imm < 0) { + havoc(dst.stack_numeric_size); + } + recompute_stack_numeric_size(m_inv, reg); + } +} + +void ebpf_transformer::shl(const Reg& dst_reg, int imm, const int finite_width) { + const reg_pack_t dst = reg_pack(dst_reg); + + // The BPF ISA requires masking the imm. + imm &= finite_width - 1; + + if (m_inv.entail(type_is_number(dst))) { + const auto interval = m_inv.eval_interval(dst.uvalue); + if (interval.finite_size()) { + const number_t lb = interval.lb().number().value(); + const number_t ub = interval.ub().number().value(); + uint64_t lb_n = lb.cast_to(); + uint64_t ub_n = ub.cast_to(); + const uint64_t uint_max = finite_width == 64 ? uint64_t{std::numeric_limits::max()} + : uint64_t{std::numeric_limits::max()}; + if (lb_n >> (finite_width - imm) != ub_n >> (finite_width - imm)) { + // The bits that will be shifted out to the left are different, + // which means all combinations of remaining bits are possible. + lb_n = 0; + ub_n = uint_max << imm & uint_max; + } else { + // The bits that will be shifted out to the left are identical + // for all values in the interval, so we can safely shift left + // to get a new interval. + lb_n = lb_n << imm & uint_max; + ub_n = ub_n << imm & uint_max; + } + m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.svalue, dst.uvalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + } + shl_overflow(dst.svalue, dst.uvalue, imm); + havoc_offsets(dst_reg); +} + +void ebpf_transformer::lshr(const Reg& dst_reg, int imm, int finite_width) { + reg_pack_t dst = reg_pack(dst_reg); + + // The BPF ISA requires masking the imm. + imm &= finite_width - 1; + + if (m_inv.entail(type_is_number(dst))) { + auto interval = m_inv.eval_interval(dst.uvalue); + number_t lb_n{0}; + number_t ub_n{std::numeric_limits::max() >> imm}; + if (interval.finite_size()) { + number_t lb = interval.lb().number().value(); + number_t ub = interval.ub().number().value(); + if (finite_width == 64) { + lb_n = lb.cast_to() >> imm; + ub_n = ub.cast_to() >> imm; + } else { + number_t lb_w = lb.cast_to_sint(finite_width); + number_t ub_w = ub.cast_to_sint(finite_width); + lb_n = lb_w.cast_to() >> imm; + ub_n = ub_w.cast_to() >> imm; + + // The interval must be valid since a signed range crossing 0 + // was earlier converted to a full unsigned range. + assert(lb_n <= ub_n); + } + } + m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.svalue, dst.uvalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(dst_reg); +} + +static int _movsx_bits(const Bin::Op op) { + switch (op) { + case Bin::Op::MOVSX8: return 8; + case Bin::Op::MOVSX16: return 16; + case Bin::Op::MOVSX32: return 32; + default: throw std::exception(); + } +} + +void ebpf_transformer::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, + const Bin::Op op) { + using namespace crab; + + const int bits = _movsx_bits(op); + const reg_pack_t dst = reg_pack(dst_reg); + interval_t right_interval = m_inv.eval_interval(right_svalue); + type_inv.assign_type(m_inv, dst_reg, T_NUM); + havoc_offsets(dst_reg); + const int64_t span = 1ULL << bits; + if (right_interval.ub() - right_interval.lb() >= span) { + // Interval covers the full space. + if (bits == 64) { + havoc(dst.svalue); + return; + } + right_interval = interval_t::signed_int(bits); + } + const int64_t mask = 1ULL << (bits - 1); + + // Sign extend each bound. + int64_t lb = right_interval.lb().number().value().cast_to(); + lb &= span - 1; + lb = (lb ^ mask) - mask; + int64_t ub = right_interval.ub().number().value().cast_to(); + ub &= span - 1; + ub = (ub ^ mask) - mask; + m_inv.set(dst.svalue, interval_t{lb, ub}); + + if (finite_width) { + m_inv.assign(dst.uvalue, dst.svalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + } +} + +void ebpf_transformer::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { + using namespace crab; + + reg_pack_t dst = reg_pack(dst_reg); + if (m_inv.entail(type_is_number(dst))) { + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_positive = interval_t::bottom(); + interval_t left_interval_negative = interval_t::bottom(); + get_signed_intervals(m_inv, finite_width == 64, dst.svalue, dst.uvalue, right_svalue, left_interval, + right_interval, left_interval_positive, left_interval_negative); + if (auto sn = right_interval.singleton()) { + // The BPF ISA requires masking the imm. + int64_t imm = sn->cast_to() & (finite_width - 1); + + int64_t lb_n = std::numeric_limits::min() >> imm; + int64_t ub_n = std::numeric_limits::max() >> imm; + if (left_interval.finite_size()) { + const auto [lb, ub] = left_interval.pair_number(); + if (finite_width == 64) { + lb_n = lb.cast_to() >> imm; + ub_n = ub.cast_to() >> imm; + } else { + number_t lb_w = lb.cast_to_sint(finite_width) >> gsl::narrow(imm); + number_t ub_w = ub.cast_to_sint(finite_width) >> gsl::narrow(imm); + if (lb_w.cast_to() <= ub_w.cast_to()) { + lb_n = lb_w.cast_to(); + ub_n = ub_w.cast_to(); + } + } + } + m_inv.set(dst.svalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.uvalue, dst.svalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(dst_reg); +} + +static void apply(NumAbsDomain& inv, const binop_t& op, const variable_t x, const variable_t y, const variable_t z) { + inv.apply(op, x, y, z, 0); +} + +void ebpf_transformer::operator()(const Bin& bin) { + using namespace crab::dsl_syntax; + + auto dst = reg_pack(bin.dst); + int finite_width = bin.is64 ? 64 : 32; + + if (auto pimm = std::get_if(&bin.v)) { + // dst += K + int64_t imm; + if (bin.is64) { + // Use the full signed value. + imm = to_signed(pimm->v); + } else { + // Use only the low 32 bits of the value. + imm = gsl::narrow_cast(pimm->v); + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + // If this is a 32-bit operation and the destination is not a number, forget everything about the register. + if (!type_inv.has_type(m_inv, bin.dst, T_NUM)) { + havoc_register(m_inv, bin.dst); + havoc_offsets(bin.dst); + havoc(dst.type); + } + } + switch (bin.op) { + case Bin::Op::MOV: + assign(dst.svalue, imm); + assign(dst.uvalue, imm); + overflow_unsigned(m_inv, dst.uvalue, bin.is64 ? 64 : 32); + type_inv.assign_type(m_inv, bin.dst, T_NUM); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: CRAB_ERROR("Unsupported operation"); + case Bin::Op::ADD: + if (imm == 0) { + return; + } + add(bin.dst, gsl::narrow(imm), finite_width); + break; + case Bin::Op::SUB: + if (imm == 0) { + return; + } + add(bin.dst, gsl::narrow(-imm), finite_width); + break; + case Bin::Op::MUL: + mul(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UDIV: + udiv(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UMOD: + urem(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SDIV: + sdiv(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SMOD: + srem(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::OR: + bitwise_or(dst.svalue, dst.uvalue, imm); + havoc_offsets(bin.dst); + break; + case Bin::Op::AND: + // FIX: what to do with ptr&-8 as in counter/simple_loop_unrolled? + bitwise_and(dst.svalue, dst.uvalue, imm); + if (gsl::narrow(imm) > 0) { + // AND with immediate is only a 32-bit operation so svalue and uvalue are the same. + assume(dst.svalue <= imm); + assume(dst.uvalue <= imm); + assume(0 <= dst.svalue); + assume(0 <= dst.uvalue); + } + havoc_offsets(bin.dst); + break; + case Bin::Op::LSH: shl(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::RSH: lshr(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::ARSH: ashr(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::XOR: + bitwise_xor(dst.svalue, dst.uvalue, imm); + havoc_offsets(bin.dst); + break; + } + } else { + // dst op= src + auto src_reg = std::get(bin.v); + auto src = reg_pack(src_reg); + switch (bin.op) { + case Bin::Op::ADD: { + if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { + // both must be numbers + add_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); + } else { + // Here we're not sure that lhs and rhs are the same type; they might be. + // But previous assertions should fail unless we know that exactly one of lhs or rhs is a pointer. + m_inv = + type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t dst_type) { + inv = type_inv.join_over_types( + inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t src_type) { + if (dst_type == T_NUM && src_type != T_NUM) { + // num += ptr + type_inv.assign_type(inv, bin.dst, src_type); + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, src_type)) { + crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst.svalue, + dom.get_type_offset_variable(src_reg, src_type).value()); + } + if (src_type == T_SHARED) { + inv.assign(dst.shared_region_size, src.shared_region_size); + } + } else if (dst_type != T_NUM && src_type == T_NUM) { + // ptr += num + type_inv.assign_type(inv, bin.dst, dst_type); + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, dst_type)) { + crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst_offset.value(), + src.svalue); + if (dst_type == T_STACK) { + // Reduce the numeric size. + using namespace crab::dsl_syntax; + if (m_inv.intersect(src.svalue < 0)) { + inv -= dst.stack_numeric_size; + recompute_stack_numeric_size(inv, dst.type); + } else { + apply_signed(inv, arith_binop_t::SUB, dst.stack_numeric_size, + dst.stack_numeric_size, dst.stack_numeric_size, src.svalue, + 0); + } + } + } + } else if (dst_type == T_NUM && src_type == T_NUM) { + // dst and src don't necessarily have the same type, but among the possibilities + // enumerated is the case where they are both numbers. + apply_signed(inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, + src.svalue, finite_width); + } else { + // We ignore the cases here that do not match the assumption described + // above. Joining bottom with another results will leave the other + // results unchanged. + inv.set_to_bottom(); + } + }); + }); + // careful: change dst.value only after dealing with offset + apply_signed(m_inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, src.svalue, finite_width); + } + break; + } + case Bin::Op::SUB: { + if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { + // src and dest have the same type. + m_inv = type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t type) { + switch (type) { + case T_NUM: + // This is: sub_overflow(inv, dst.value, src.value, finite_width); + apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst.svalue, src.svalue, + finite_width); + type_inv.assign_type(inv, bin.dst, T_NUM); + crab::havoc_offsets(inv, bin.dst); + break; + default: + // ptr -= ptr + // Assertions should make sure we only perform this on non-shared pointers. + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, type)) { + apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst_offset.value(), + dom.get_type_offset_variable(src_reg, type).value(), finite_width); + inv -= dst_offset.value(); + } + crab::havoc_offsets(inv, bin.dst); + type_inv.assign_type(inv, bin.dst, T_NUM); + break; + } + }); + } else { + // We're not sure that lhs and rhs are the same type. + // Either they're different, or at least one is not a singleton. + if (type_inv.get_type(m_inv, std::get(bin.v)) != T_NUM) { + type_inv.havoc_type(m_inv, bin.dst); + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + } else { + sub_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); + if (auto dst_offset = dom.get_type_offset_variable(bin.dst)) { + sub(dst_offset.value(), src.svalue); + if (type_inv.has_type(m_inv, dst.type, T_STACK)) { + // Reduce the numeric size. + using namespace crab::dsl_syntax; + if (m_inv.intersect(src.svalue > 0)) { + m_inv -= dst.stack_numeric_size; + recompute_stack_numeric_size(m_inv, dst.type); + } else { + crab::apply(m_inv, arith_binop_t::ADD, dst.stack_numeric_size, dst.stack_numeric_size, + src.svalue); + } + } + } + } + } + break; + } + case Bin::Op::MUL: + mul(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UDIV: + udiv(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UMOD: + urem(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SDIV: + sdiv(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SMOD: + srem(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::OR: + bitwise_or(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::AND: + bitwise_and(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::LSH: + if (m_inv.entail(type_is_number(src_reg))) { + auto src_interval = m_inv.eval_interval(src.uvalue); + if (std::optional sn = src_interval.singleton()) { + // truncate to uint64? + uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); + if (imm <= std::numeric_limits::max()) { + if (!bin.is64) { + // Use only the low 32 bits of the value. + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } + shl(bin.dst, gsl::narrow_cast(imm), finite_width); + break; + } + } + } + shl_overflow(dst.svalue, dst.uvalue, src.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::RSH: + if (m_inv.entail(type_is_number(src_reg))) { + auto src_interval = m_inv.eval_interval(src.uvalue); + if (std::optional sn = src_interval.singleton()) { + uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); + if (imm <= std::numeric_limits::max()) { + if (!bin.is64) { + // Use only the low 32 bits of the value. + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } + lshr(bin.dst, gsl::narrow_cast(imm), finite_width); + break; + } + } + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::ARSH: + if (m_inv.entail(type_is_number(src_reg))) { + ashr(bin.dst, src.svalue, finite_width); + break; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::XOR: + bitwise_xor(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: + // Keep relational information if operation is a no-op. + if (dst.svalue == src.svalue && + m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(_movsx_bits(bin.op))) { + return; + } + if (m_inv.entail(type_is_number(src_reg))) { + sign_extend(bin.dst, src.svalue, finite_width, bin.op); + break; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOV: + // Keep relational information if operation is a no-op. + if (dst.svalue == src.svalue && + m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(bin.is64 ? 64 : 32)) { + return; + } + assign(dst.svalue, src.svalue); + assign(dst.uvalue, src.uvalue); + havoc_offsets(bin.dst); + m_inv = type_inv.join_over_types(m_inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t type) { + switch (type) { + case T_CTX: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.ctx_offset, src.ctx_offset); + } + break; + case T_MAP: + case T_MAP_PROGRAMS: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.map_fd, src.map_fd); + } + break; + case T_PACKET: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.packet_offset, src.packet_offset); + } + break; + case T_SHARED: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.shared_region_size, src.shared_region_size); + inv.assign(dst.shared_offset, src.shared_offset); + } + break; + case T_STACK: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.stack_offset, src.stack_offset); + inv.assign(dst.stack_numeric_size, src.stack_numeric_size); + } + break; + default: inv.assign(dst.type, type); break; + } + }); + if (bin.is64) { + // Add dst.type=src.type invariant. + if (bin.dst.v != std::get(bin.v).v || type_inv.get_type(m_inv, dst.type) == T_UNINIT) { + // Only forget the destination type if we're copying from a different register, + // or from the same uninitialized register. + havoc(dst.type); + } + type_inv.assign_type(m_inv, bin.dst, std::get(bin.v)); + } + break; + } + } + if (!bin.is64) { + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } +} + +void ebpf_transformer::initialize_loop_counter(const label_t& label) { + m_inv.assign(variable_t::loop_counter(to_string(label)), 0); +} + +void ebpf_transformer::operator()(const IncrementLoopCounter& ins) { + const auto counter = variable_t::loop_counter(to_string(ins.name)); + this->add(counter, 1); +} + +void ebpf_domain_initialize_loop_counter(ebpf_domain_t& dom, const label_t& label) { + ebpf_transformer{dom}.initialize_loop_counter(label); +} + +} // namespace crab diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index 9486f5012..5afa2e0a2 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -71,7 +71,13 @@ class interleaved_fwd_fixpoint_iterator_t final { const basic_block_t& bb = _cfg.get_node(label); for (const GuardedInstruction& ins : bb) { - std::visit(ebpf_transformer{pre}, ins.cmd); + if (thread_local_options.assume_assertions) { + for (const auto& assertion : ins.preconditions) { + // avoid redundant errors + ebpf_domain_assume(pre, assertion); + } + } + ebpf_domain_transform(pre, ins.cmd); }; _post[label] = std::move(pre); } @@ -133,7 +139,7 @@ std::pair run_forward_analyzer(const cfg_t // during program verification. // TODO: Consider making this an instruction instead of an explicit call. analyzer._wto.for_each_loop_head( - [&](const label_t& label) { ebpf_transformer{entry_inv}.initialize_loop_counter(label); }); + [&](const label_t& label) { ebpf_domain_initialize_loop_counter(entry_inv, label); }); } analyzer.set_pre(cfg.entry_label(), entry_inv); for (const auto& component : analyzer._wto) { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 43e7afb43..6680c884b 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -61,38 +61,17 @@ static checks_db generate_report(const cfg_t& cfg, const crab::invariant_table_t checks_db m_db; for (const label_t& label : cfg.sorted_labels()) { const basic_block_t& bb = cfg.get_node(label); - ebpf_domain_t from_inv(pre_invariants.at(label)); + ebpf_domain_t from_inv{pre_invariants.at(label)}; const bool pre_bot = from_inv.is_bottom(); - crab::ebpf_checker checker{from_inv}; - checker.set_require_check( - [&m_db, label](auto& inv, const crab::linear_constraint_t& cst, const std::string& s) { - if (inv.is_bottom()) { - return true; - } - if (cst.is_contradiction()) { - m_db.add_warning(label, s); - return false; - } - - if (inv.entail(cst)) { - // add_redundant(s); - return true; - } else if (inv.intersect(cst)) { - // TODO: add_error() if imply negation - m_db.add_warning(label, s); - return false; - } else { - m_db.add_warning(label, s); - return false; - } - }); for (const GuardedInstruction& ins : bb) { for (const Assertion& assertion : ins.preconditions) { - checker(assertion); + for (const auto& warning : ebpf_domain_check(from_inv, label, assertion)) { + m_db.add_warning(label, warning); + } } - std::visit(crab::ebpf_transformer{from_inv}, ins.cmd); - }; + ebpf_domain_transform(from_inv, ins.cmd); + } if (!pre_bot && from_inv.is_bottom()) { m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(bb.label())); @@ -177,7 +156,7 @@ static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info try { // Get dictionaries of pre-invariants and post-invariants for each basic block. - ebpf_domain_t entry_dom = crab::ebpf_transformer::setup_entry(true); + ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { diff --git a/test-data/jump.yaml b/test-data/jump.yaml index 772b6c9ec..2ff819504 100644 --- a/test-data/jump.yaml +++ b/test-data/jump.yaml @@ -760,10 +760,7 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0:1: Code is unreachable after 0:1" @@ -785,10 +782,7 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "2: Code is unreachable after 2" @@ -809,10 +803,7 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "2: Code is unreachable after 2" @@ -887,11 +878,7 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx - - r1.uvalue=[1, +oo] +post: [] messages: - "0:1: Code is unreachable after 0:1" @@ -913,11 +900,7 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx - - r1.uvalue=[0, +oo] +post: [] messages: - "0:1: Code is unreachable after 0:1" @@ -939,17 +922,10 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - - "0:2: Code is unreachable after 0:2" - "0: Invalid type (r1.type == number)" --- @@ -967,18 +943,11 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:2: Code is unreachable after 0:2" --- test-case: JSGT with imm 0 and pointer @@ -995,17 +964,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JSGE with imm 0 and pointer @@ -1022,17 +985,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JEQ32 with imm 0 and pointer @@ -1049,18 +1006,11 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:2: Code is unreachable after 0:2" --- test-case: JNE32 with imm 0 and pointer @@ -1077,17 +1027,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JSET32 with imm 0 and pointer @@ -1104,16 +1048,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JNSET32 with imm 0 and pointer @@ -1130,16 +1069,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JLT32 with imm 0 and pointer @@ -1156,18 +1090,11 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:2: Code is unreachable after 0:2" --- test-case: JLE32 with imm 0 and pointer @@ -1184,16 +1111,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JGT32 with imm 0 and pointer @@ -1210,16 +1132,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JGE32 with imm 0 and pointer @@ -1236,17 +1153,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JSLT32 with imm 0 and pointer @@ -1263,18 +1174,11 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:2: Code is unreachable after 0:2" --- test-case: JSLE32 with imm 0 and pointer @@ -1291,18 +1195,11 @@ code: : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:2: Code is unreachable after 0:2" --- test-case: JSGT32 with imm 0 and pointer @@ -1319,17 +1216,11 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" --- test-case: JSGE32 with imm 0 and pointer @@ -1346,14 +1237,8 @@ code: : | exit -post: - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx +post: [] messages: - "0: Code is unreachable after 0" - "0: Invalid type (r1.type == number)" - - "0:1: Code is unreachable after 0:1" - - "2: Code is unreachable after 2" - - "2: Invalid type (r0.type == number)" diff --git a/test-data/packet.yaml b/test-data/packet.yaml index 682665dee..fd7e46bee 100644 --- a/test-data/packet.yaml +++ b/test-data/packet.yaml @@ -1,5 +1,34 @@ # Copyright (c) Prevail Verifier contributors. # SPDX-License-Identifier: MIT +--- +test-case: read 8bit is unsigned + +pre: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]"] + +code: + : | + r2 = *(u8 *)(r1 + 0) + +post: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]", + "r2.type=number", "r2.svalue=[0, 255]", "r2.uvalue=[0, 255]"] +messages: [] +--- +test-case: read 16bit is unsigned + +pre: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]"] + +code: + : | + r2 = *(u16 *)(r1 + 0) + +post: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]", + "r2.type=number", "r2.svalue=[0, 65535]", "r2.uvalue=[0, 65535]"] +messages: [] + --- test-case: simple invalid write From c34b769978786a70797c544cba5961732109dbb2 Mon Sep 17 00:00:00 2001 From: Elazar Gershuni Date: Sun, 10 Nov 2024 23:15:52 +0200 Subject: [PATCH 7/8] Use GuardedInstruction for cfg_t node, instead of basic_block (#789) cfg node is GuardedInstruction, not basic_block_t. basic_block is destined to be a view of the CFG, not an essential part of it. The new node is called value_t since node_t is taken. We should find a better name though. The checker still performs transfer after checking the assertions, but it's only needed in order to find out locations where the invariants becomes bottom, and there are better way to do it. avoiding this should improve performance significantly. There are "Undefined" instructions instead of a NOP on entry and exit. * simplify no longer affects the cfg itself. It should be relevant mainly for printing, and possibly as an optimization hint regarding where to keep the results of the fixpoint computation. * Assume instructions get special nodes * Loop counters get special nodes * label_t is moved to a dedicated header. Added a special_label field, allowing distinguishing nodes for loop counters (and potentially jumps to fallthrough). Also, using the default "spaceship" comparison, and exit is not INT_MAX instead of -2. * cfg_rev_t is removed. Signed-off-by: Elazar Gershuni --- src/asm_cfg.cpp | 291 ++++++++---------- src/asm_marshal.cpp | 3 +- src/asm_ostream.cpp | 214 +++++++------- src/asm_ostream.hpp | 58 ---- src/asm_parse.cpp | 8 +- src/asm_syntax.hpp | 106 ++----- src/assertions.cpp | 16 +- src/config.hpp | 5 +- src/crab/cfg.hpp | 534 +++++++++++++--------------------- src/crab/ebpf_checker.cpp | 2 +- src/crab/ebpf_domain.cpp | 1 - src/crab/ebpf_transformer.cpp | 1 - src/crab/fwd_analyzer.cpp | 17 +- src/crab/interval.hpp | 3 +- src/crab/label.hpp | 94 ++++++ src/crab/split_dbm.cpp | 10 +- src/crab/thresholds.cpp | 9 +- src/crab/thresholds.hpp | 2 +- src/crab/var_factory.cpp | 2 +- src/crab_utils/debug.hpp | 15 +- src/crab_verifier.cpp | 14 +- src/main/check.cpp | 2 +- src/spec_type_descriptors.hpp | 6 +- src/test/ebpf_yaml.cpp | 10 +- src/test/test_conformance.cpp | 2 +- src/test/test_marshal.cpp | 1 - src/test/test_print.cpp | 1 - src/test/test_wto.cpp | 60 ++-- test-data/jump.yaml | 18 ++ test-data/loop.yaml | 24 +- test-data/uninit.yaml | 4 +- 31 files changed, 690 insertions(+), 843 deletions(-) delete mode 100644 src/asm_ostream.hpp create mode 100644 src/crab/label.hpp diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index d1fb4a7b8..cd1ade2a1 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -18,13 +18,36 @@ using std::string; using std::to_string; using std::vector; -static optional get_jump(Instruction ins) { - if (const auto pins = std::get_if(&ins)) { - return pins->target; +/// Get the inverse of a given comparison operation. +static Condition::Op reverse(const Condition::Op op) { + switch (op) { + case Condition::Op::EQ: return Condition::Op::NE; + case Condition::Op::NE: return Condition::Op::EQ; + + case Condition::Op::GE: return Condition::Op::LT; + case Condition::Op::LT: return Condition::Op::GE; + + case Condition::Op::SGE: return Condition::Op::SLT; + case Condition::Op::SLT: return Condition::Op::SGE; + + case Condition::Op::LE: return Condition::Op::GT; + case Condition::Op::GT: return Condition::Op::LE; + + case Condition::Op::SLE: return Condition::Op::SGT; + case Condition::Op::SGT: return Condition::Op::SLE; + + case Condition::Op::SET: return Condition::Op::NSET; + case Condition::Op::NSET: return Condition::Op::SET; } + assert(false); return {}; } +/// Get the inverse of a given comparison condition. +static Condition reverse(const Condition& cond) { + return {.op = reverse(cond.op), .left = cond.left, .right = cond.right, .is64 = cond.is64}; +} + static bool has_fall(const Instruction& ins) { if (std::holds_alternative(ins)) { return false; @@ -44,17 +67,15 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t bool first = true; // Get the label of the node to go to on returning from the macro. - basic_block_t& exit_to_node = cfg.get_node(cfg.next_nodes(caller_label).front()); + crab::value_t& exit_to_node = cfg.get_node(cfg.next_nodes(caller_label).front()); // Construct the variable prefix to use for the new stack frame, // and store a copy in the CallLocal instruction since the instruction-specific // labels may only exist until the CFG is simplified. - basic_block_t& caller_node = cfg.get_node(caller_label); + crab::value_t& caller_node = cfg.get_node(caller_label); const std::string stack_frame_prefix = to_string(caller_label); - for (auto& inst : caller_node) { - if (const auto pcall = std::get_if(&inst.cmd)) { - pcall->stack_frame_prefix = stack_frame_prefix; - } + if (const auto pcall = std::get_if(&caller_node.instruction().cmd)) { + pcall->stack_frame_prefix = stack_frame_prefix; } // Walk the transitive closure of CFG nodes starting at entry_label and ending at @@ -71,15 +92,13 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t // Clone the macro block into a new block with the new stack frame prefix. const label_t label{macro_label.from, macro_label.to, stack_frame_prefix}; - auto& bb = cfg.insert(label); - for (auto inst : cfg.get_node(macro_label)) { - if (const auto pexit = std::get_if(&inst.cmd)) { - pexit->stack_frame_prefix = label.stack_frame_prefix; - } else if (const auto pcall = std::get_if(&inst.cmd)) { - pcall->stack_frame_prefix = label.stack_frame_prefix; - } - bb.insert(inst); + auto inst = cfg.at(macro_label); + if (const auto pexit = std::get_if(&inst.cmd)) { + pexit->stack_frame_prefix = label.stack_frame_prefix; + } else if (const auto pcall = std::get_if(&inst.cmd)) { + pcall->stack_frame_prefix = label.stack_frame_prefix; } + crab::value_t& bb = cfg.insert(label, inst.cmd); if (first) { // Add an edge from the caller to the new block. @@ -121,14 +140,12 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t string caller_label_str = to_string(caller_label); const long stack_frame_depth = std::ranges::count(caller_label_str, STACK_FRAME_DELIMITER) + 2; for (const auto& macro_label : seen_labels) { - for (const label_t label(macro_label.from, macro_label.to, caller_label_str); - const auto& inst : cfg.get_node(label)) { - if (const auto pins = std::get_if(&inst.cmd)) { - if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { - throw std::runtime_error{"too many call stack frames"}; - } - add_cfg_nodes(cfg, label, pins->target); + const label_t label(macro_label.from, macro_label.to, caller_label_str); + if (const auto pins = std::get_if(&cfg.at(label).cmd)) { + if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { + throw std::runtime_error{"too many call stack frames"}; } + add_cfg_nodes(cfg, label, pins->target); } } } @@ -136,44 +153,67 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t /// Convert an instruction sequence to a control-flow graph (CFG). static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must_have_exit) { cfg_t cfg; - std::optional falling_from = {}; - bool first = true; - // Do a first pass ignoring all function macro calls. + // First add all instructions to the CFG without connecting for (const auto& [label, inst, _] : insts) { - if (std::holds_alternative(inst)) { continue; } + cfg.insert(label, inst); + } - auto& bb = cfg.insert(label); + if (insts.size() == 0) { + throw std::invalid_argument{"empty instruction sequence"}; + } else { + const auto& [label, inst, _0] = insts[0]; + cfg.get_node(cfg.entry_label()) >> cfg.get_node(label); + } - if (first) { - first = false; - cfg.get_node(cfg.entry_label()) >> bb; - } + // Do a first pass ignoring all function macro calls. + for (size_t i = 0; i < insts.size(); i++) { + const auto& [label, inst, _0] = insts[i]; - bb.insert({.cmd = inst}); - if (falling_from) { - cfg.get_node(*falling_from) >> bb; - falling_from = {}; - } - if (has_fall(inst)) { - falling_from = label; - } - if (auto jump_target = get_jump(inst)) { - bb >> cfg.insert(*jump_target); + if (std::holds_alternative(inst)) { + continue; } + auto& value = cfg.get_node(label); - if (std::holds_alternative(inst)) { - bb >> cfg.get_node(cfg.exit_label()); + label_t fallthrough{cfg.exit_label()}; + if (i + 1 < insts.size()) { + fallthrough = std::get<0>(insts[i + 1]); + } else { + if (has_fall(inst) && must_have_exit) { + throw std::invalid_argument{"fallthrough in last instruction"}; + } } - } - if (falling_from) { - if (must_have_exit) { - throw std::invalid_argument{"fallthrough in last instruction"}; + if (const auto jmp = std::get_if(&inst)) { + if (const auto cond = jmp->cond) { + label_t target_label = jmp->target; + if (target_label == fallthrough) { + value >> cfg.get_node(fallthrough); + continue; + } + + vector> jumps{ + {target_label, *cond}, + {fallthrough, reverse(*cond)}, + }; + for (const auto& [next_label, cond1] : jumps) { + label_t jump_label = label_t::make_jump(label, next_label); + crab::value_t& jump_node = cfg.insert(jump_label, Assume{.cond = cond1, .is_explicit = false}); + value >> jump_node; + jump_node >> cfg.get_node(next_label); + } + } else { + value >> cfg.get_node(jmp->target); + } } else { - cfg.get_node(*falling_from) >> cfg.get_node(cfg.exit_label()); + if (has_fall(inst)) { + value >> cfg.get_node(fallthrough); + } + } + if (std::holds_alternative(inst)) { + value >> cfg.get_node(cfg.exit_label()); } } @@ -189,34 +229,24 @@ static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must return cfg; } -/// Get the inverse of a given comparison operation. -static Condition::Op reverse(const Condition::Op op) { - switch (op) { - case Condition::Op::EQ: return Condition::Op::NE; - case Condition::Op::NE: return Condition::Op::EQ; - - case Condition::Op::GE: return Condition::Op::LT; - case Condition::Op::LT: return Condition::Op::GE; - - case Condition::Op::SGE: return Condition::Op::SLT; - case Condition::Op::SLT: return Condition::Op::SGE; - - case Condition::Op::LE: return Condition::Op::GT; - case Condition::Op::GT: return Condition::Op::LE; - - case Condition::Op::SLE: return Condition::Op::SGT; - case Condition::Op::SGT: return Condition::Op::SLE; +cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { + // Convert the instruction sequence to a deterministic control-flow graph. + cfg_t cfg = instruction_seq_to_cfg(prog, options.must_have_exit); - case Condition::Op::SET: return Condition::Op::NSET; - case Condition::Op::NSET: return Condition::Op::SET; + // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a + // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry + // points. These entry points serve as natural locations for loop counters that help verify program termination. + if (options.check_for_termination) { + const wto_t wto{cfg}; + wto.for_each_loop_head([&](const label_t& label) -> void { + cfg.insert_after(label, label_t::make_increment_counter(label), IncrementLoopCounter{label}); + }); } - assert(false); - return {}; -} -/// Get the inverse of a given comparison condition. -static Condition reverse(const Condition& cond) { - return {.op = reverse(cond.op), .left = cond.left, .right = cond.right, .is64 = cond.is64}; + // Annotate the CFG by adding in assertions before every memory instruction. + explicate_assertions(cfg, info); + + return cfg; } template @@ -226,53 +256,6 @@ static vector unique(const std::pair& be) { return res; } -/// Get a non-deterministic version of a control-flow graph, -/// i.e., where instead of using if/else, both branches are taken -/// simultaneously, and are replaced by Assume instructions -/// immediately after the branch. -static cfg_t to_nondet(const cfg_t& cfg) { - cfg_t res; - for (const auto& [this_label, bb] : cfg) { - basic_block_t& newbb = res.insert(this_label); - - for (const auto& ins : bb) { - newbb.insert(ins); - } - - for (const label_t& prev_label : bb.prev_blocks_set()) { - bool is_one = cfg.get_node(prev_label).next_blocks_set().size() > 1; - basic_block_t& pbb = res.insert(is_one ? label_t::make_jump(prev_label, this_label) : prev_label); - pbb >> newbb; - } - // note the special case where we jump to fallthrough - auto nextlist = bb.next_blocks_set(); - if (nextlist.size() == 2) { - label_t mid_label = this_label; - auto jmp = std::get(bb.rbegin()->cmd); - - nextlist.erase(jmp.target); - label_t fallthrough = *nextlist.begin(); - - vector> jumps{ - {jmp.target, *jmp.cond}, - {fallthrough, reverse(*jmp.cond)}, - }; - for (const auto& [next_label, cond1] : jumps) { - label_t jump_label = label_t::make_jump(mid_label, next_label); - basic_block_t& jump_bb = res.insert(jump_label); - jump_bb.insert({.cmd = Assume{cond1}}); - newbb >> jump_bb; - jump_bb >> res.insert(next_label); - } - } else { - for (const auto& label : nextlist) { - newbb >> res.insert(label); - } - } - } - return res; -} - /// Get the type of given Instruction. /// Most of these type names are also statistics header labels. static std::string instype(Instruction ins) { @@ -329,62 +312,28 @@ std::map collect_stats(const cfg_t& cfg) { } for (const auto& this_label : cfg.labels()) { res["basic_blocks"]++; - basic_block_t const& bb = cfg.get_node(this_label); - - for (const auto& ins : bb) { - if (const auto pins = std::get_if(&ins.cmd)) { - if (pins->mapfd == -1) { - res["map_in_map"] = 1; - } + const crab::value_t& value = cfg.get_node(this_label); + const auto cmd = value.instruction().cmd; + if (const auto pins = std::get_if(&cmd)) { + if (pins->mapfd == -1) { + res["map_in_map"] = 1; } - if (const auto pins = std::get_if(&ins.cmd)) { - if (pins->reallocate_packet) { - res["reallocate"] = 1; - } - } - if (const auto pins = std::get_if(&ins.cmd)) { - res[pins->is64 ? "arith64" : "arith32"]++; + } + if (const auto pins = std::get_if(&cmd)) { + if (pins->reallocate_packet) { + res["reallocate"] = 1; } - res[instype(ins.cmd)]++; } - if (unique(bb.prev_blocks()).size() > 1) { + if (const auto pins = std::get_if(&cmd)) { + res[pins->is64 ? "arith64" : "arith32"]++; + } + res[instype(cmd)]++; + if (unique(value.prev_labels()).size() > 1) { res["joins"]++; } - if (unique(bb.prev_blocks()).size() > 1) { + if (unique(value.prev_labels()).size() > 1) { res["jumps"]++; } } return res; } - -cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { - // Convert the instruction sequence to a deterministic control-flow graph. - cfg_t det_cfg = instruction_seq_to_cfg(prog, options.must_have_exit); - - // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a - // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry - // points. These entry points serve as natural locations for loop counters that help verify program termination. - if (options.check_for_termination) { - const wto_t wto(det_cfg); - wto.for_each_loop_head( - [&](const label_t& label) { det_cfg.get_node(label).insert_front({.cmd = IncrementLoopCounter{label}}); }); - } - - // Annotate the CFG by adding in assertions before every memory instruction. - explicate_assertions(det_cfg, info); - - // Translate conditional jumps to non-deterministic jumps. - cfg_t cfg = to_nondet(det_cfg); - - // Except when debugging, combine chains of instructions into - // basic blocks where possible, i.e., into a range of instructions - // where there is a single entry point and a single exit point. - // An abstract interpreter will keep values at every basic block, - // so the fewer basic blocks we have, the less information it has to - // keep track of. - if (options.simplify) { - cfg.simplify(); - } - - return cfg; -} diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 9ce4d2f09..953435b70 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -6,7 +6,6 @@ #include #include "asm_marshal.hpp" -#include "asm_ostream.hpp" #include "crab_utils/num_safety.hpp" using std::vector; @@ -293,7 +292,7 @@ struct MarshalVisitor { }; vector marshal(const Instruction& ins, const pc_t pc) { - return std::visit(MarshalVisitor{label_to_offset16(pc), label_to_offset32(pc)}, ins); + return std::visit(MarshalVisitor{crab::label_to_offset16(pc), crab::label_to_offset32(pc)}, ins); } static int size(const Instruction& inst) { diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index f76e023e1..ddd7022c6 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -6,12 +6,12 @@ #include #include -#include "asm_ostream.hpp" #include "asm_syntax.hpp" #include "crab/cfg.hpp" #include "crab/interval.hpp" #include "crab/type_encoding.hpp" #include "crab/variable.hpp" +#include "crab_utils/num_big.hpp" #include "helpers.hpp" #include "platform.hpp" #include "spec_type_descriptors.hpp" @@ -21,6 +21,113 @@ using std::optional; using std::string; using std::vector; +namespace crab { + +std::string number_t::to_string() const { return _n.str(); } + +std::string interval_t::to_string() const { + std::ostringstream s; + s << *this; + return s.str(); +} + +std::ostream& operator<<(std::ostream& os, const label_t& label) { + if (label == label_t::entry) { + return os << "entry"; + } + if (label == label_t::exit) { + return os << "exit"; + } + if (!label.stack_frame_prefix.empty()) { + os << label.stack_frame_prefix << STACK_FRAME_DELIMITER; + } + os << label.from; + if (label.to != -1) { + os << ":" << label.to; + } + if (!label.special_label.empty()) { + os << " (" << label.special_label << ")"; + } + return os; +} + +string to_string(label_t const& label) { + std::stringstream str; + str << label; + return str.str(); +} + +void print_dot(const cfg_t& cfg, std::ostream& out) { + out << "digraph program {\n"; + out << " node [shape = rectangle];\n"; + for (const auto& label : cfg.labels()) { + out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; + + const auto& value = cfg.get_node(label); + const auto& ins = value.instruction(); + for (const auto& pre : ins.preconditions) { + out << "assert " << pre << "\\l"; + } + out << ins.cmd << "\\l"; + + out << "\"];\n"; + for (const label_t& next : value.next_labels_set()) { + out << " \"" << label << "\" -> \"" << next << "\";\n"; + } + out << "\n"; + } + out << "}\n"; +} + +void print_dot(const cfg_t& cfg, const std::string& outfile) { + std::ofstream out{outfile}; + if (out.fail()) { + throw std::runtime_error(std::string("Could not open file ") + outfile); + } + print_dot(cfg, out); +} + +std::ostream& operator<<(std::ostream& o, const value_t& value) { + o << value.label() << ":\n"; + const auto ins = value.instruction(); + for (const auto& pre : ins.preconditions) { + o << " " + << "assert " << pre << ";\n"; + } + o << " " << ins.cmd << ";\n"; + auto [it, et] = value.next_labels(); + if (it != et) { + o << " " + << "goto "; + while (it != et) { + o << *it; + ++it; + if (it == et) { + o << ";"; + } else { + o << ","; + } + } + } + o << "\n"; + return o; +} + +std::ostream& operator<<(std::ostream& o, const cfg_t& cfg) { + for (const label_t& label : cfg.sorted_labels()) { + o << cfg.get_node(label); + o << "edges to:"; + for (const label_t& next_label : cfg.next_nodes(label)) { + o << " " << next_label; + } + o << "\n"; + } + return o; +} + +} // namespace crab + +namespace asm_syntax { std::ostream& operator<<(std::ostream& os, const ArgSingle::Kind kind) { switch (kind) { case ArgSingle::Kind::ANYTHING: return os << "uint64_t"; @@ -355,12 +462,6 @@ struct CommandPrinterVisitor { }; // ReSharper restore CppMemberFunctionMayBeConst -string to_string(label_t const& label) { - std::stringstream str; - str << label; - return str.str(); -} - std::ostream& operator<<(std::ostream& os, Instruction const& ins) { std::visit(CommandPrinterVisitor{os}, ins); return os; @@ -433,7 +534,7 @@ void print(const InstructionSeq& insts, std::ostream& out, const std::optional(&ins)) { if (!pc_of_label.contains(jmp->target)) { - throw std::runtime_error(string("Cannot find label ") + to_string(jmp->target)); + throw std::runtime_error(string("Cannot find label ") + crab::to_string(jmp->target)); } const pc_t target_pc = pc_of_label.at(jmp->target); visitor(*jmp, target_pc - static_cast(pc) - 1); @@ -446,6 +547,8 @@ void print(const InstructionSeq& insts, std::ostream& out, const std::optional& descriptors, st } } -void print_dot(const cfg_t& cfg, std::ostream& out) { - out << "digraph program {\n"; - out << " node [shape = rectangle];\n"; - for (const auto& label : cfg.labels()) { - out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; - - const auto& bb = cfg.get_node(label); - for (const auto& ins : bb) { - for (const auto& pre : ins.preconditions) { - out << "assert " << pre << "\\l"; - } - out << ins.cmd << "\\l"; - } - - out << "\"];\n"; - for (const label_t& next : bb.next_blocks_set()) { - out << " \"" << label << "\" -> \"" << next << "\";\n"; - } - out << "\n"; - } - out << "}\n"; -} - -void print_dot(const cfg_t& cfg, const std::string& outfile) { - std::ofstream out{outfile}; - if (out.fail()) { - throw std::runtime_error(std::string("Could not open file ") + outfile); - } - print_dot(cfg, out); -} - -std::ostream& operator<<(std::ostream& o, const basic_block_t& bb) { - o << bb.label() << ":\n"; - for (const auto& s : bb) { - for (const auto& pre : s.preconditions) { - o << " " - << "assert " << pre << ";\n"; - } - o << " " << s.cmd << ";\n"; - } - auto [it, et] = bb.next_blocks(); - if (it != et) { - o << " " - << "goto "; - while (it != et) { - o << *it; - ++it; - if (it == et) { - o << ";"; - } else { - o << ","; - } - } - } - o << "\n"; - return o; -} - -std::ostream& operator<<(std::ostream& o, const crab::basic_block_rev_t& bb) { - o << bb.label() << ":\n"; - for (const auto& s : bb) { - for (const auto& pre : s.preconditions) { - o << " " - << "assert " << pre << ";\n"; - } - o << " " << s.cmd << ";\n"; - } - o << "--> ["; - for (const label_t& label : bb.next_blocks_set()) { - o << label << ";"; - } - o << "]\n"; - return o; -} - -std::ostream& operator<<(std::ostream& o, const cfg_t& cfg) { - for (const label_t& label : cfg.sorted_labels()) { - o << cfg.get_node(label); - o << "edges to:"; - for (const label_t& next_label : cfg.next_nodes(label)) { - o << " " << next_label; - } - o << "\n"; - } - return o; -} - std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info) { os << "; " << line_info.file_name << ":" << line_info.line_number << "\n"; os << "; " << line_info.source_line << "\n"; return os; } - -std::string crab::number_t::to_string() const { return _n.str(); } - -std::string crab::interval_t::to_string() const { - std::ostringstream s; - s << *this; - return s.str(); -} diff --git a/src/asm_ostream.hpp b/src/asm_ostream.hpp deleted file mode 100644 index 5f9d0e190..000000000 --- a/src/asm_ostream.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) Prevail Verifier contributors. -// SPDX-License-Identifier: MIT -#pragma once - -#include -#include -#include - -#include - -#include "asm_syntax.hpp" -#include "crab_utils/num_safety.hpp" - -// We use a 16-bit offset whenever it fits in 16 bits. -inline std::function label_to_offset16(const pc_t pc) { - return [=](const label_t& label) { - const int64_t offset = label.from - gsl::narrow(pc) - 1; - const bool is16 = - std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); - return gsl::narrow(is16 ? offset : 0); - }; -} - -// We use the JA32 opcode with the offset in 'imm' when the offset -// of an unconditional jump doesn't fit in an int16_t. -inline std::function label_to_offset32(const pc_t pc) { - return [=](const label_t& label) { - const int64_t offset = label.from - gsl::narrow(pc) - 1; - const bool is16 = - std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); - return is16 ? 0 : gsl::narrow(offset); - }; -} - -std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info); - -void print(const InstructionSeq& insts, std::ostream& out, const std::optional& label_to_print, - bool print_line_info = false); - -std::string to_string(label_t const& label); - -std::ostream& operator<<(std::ostream& os, Instruction const& ins); -std::string to_string(Instruction const& ins); - -std::ostream& operator<<(std::ostream& os, Bin::Op op); -std::ostream& operator<<(std::ostream& os, Condition::Op op); - -inline std::ostream& operator<<(std::ostream& os, const Imm imm) { return os << crab::to_signed(imm.v); } -inline std::ostream& operator<<(std::ostream& os, Reg const& a) { return os << "r" << gsl::narrow(a.v); } -inline std::ostream& operator<<(std::ostream& os, Value const& a) { - if (const auto pa = std::get_if(&a)) { - return os << *pa; - } - return os << std::get(a); -} - -std::ostream& operator<<(std::ostream& os, const Assertion& a); -std::string to_string(const Assertion& constraint); diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 792d6ef43..50c99efdc 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -208,8 +208,12 @@ Instruction parse_instruction(const std::string& line, const std::map +#include #include #include #include #include #include -#include "crab/variable.hpp" +#include "crab/label.hpp" +#include "crab/type_encoding.hpp" +#include "crab_utils/num_safety.hpp" #include "spec_type_descriptors.hpp" -constexpr char STACK_FRAME_DELIMITER = '/'; - -namespace crab { -struct label_t { - int from; ///< Jump source, or simply index of instruction - int to; ///< Jump target or -1 - std::string stack_frame_prefix; ///< Variable prefix when calling this label. - - explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept - : from(index), to(to), stack_frame_prefix(std::move(stack_frame_prefix)) {} - - static label_t make_jump(const label_t& src_label, const label_t& target_label) { - return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; - } - - bool operator==(const label_t& other) const noexcept = default; - - constexpr bool operator<(const label_t& other) const { - if (this == &other) { - return false; - } - if (*this == label_t::exit) { - return false; - } - if (other == label_t::exit) { - return true; - } - return (stack_frame_prefix < other.stack_frame_prefix || - (stack_frame_prefix == other.stack_frame_prefix && - (from < other.from || (from == other.from && to < other.to)))); - } - - // no hash; intended for use in ordered containers. - - [[nodiscard]] - constexpr bool isjump() const { - return to != -1; - } - - [[nodiscard]] - int call_stack_depth() const { - // The call stack depth is the number of '/' separated components in the label, - // which is one more than the number of '/' separated components in the prefix, - // hence two more than the number of '/' in the prefix, if any. - if (stack_frame_prefix.empty()) { - return 1; - } - return gsl::narrow(2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER)); - } - - friend std::ostream& operator<<(std::ostream& os, const label_t& label) { - if (label == entry) { - return os << "entry"; - } - if (label == exit) { - return os << "exit"; - } - if (!label.stack_frame_prefix.empty()) { - os << label.stack_frame_prefix << STACK_FRAME_DELIMITER; - } - if (label.to == -1) { - return os << label.from; - } - return os << label.from << ":" << label.to; - } - - static const label_t entry; - static const label_t exit; -}; - -inline const label_t label_t::entry{-1}; -inline const label_t label_t::exit{-2}; - -} // namespace crab using crab::label_t; // Assembly syntax. @@ -307,6 +236,10 @@ struct Undefined { /// the branch and before each jump target. struct Assume { Condition cond; + + // True if the condition is explicitly written in the program (for tests). + bool is_explicit{}; + constexpr bool operator==(const Assume&) const = default; }; @@ -426,12 +359,31 @@ struct GuardedInstruction { bool operator==(const GuardedInstruction&) const = default; }; -// cpu=v4 supports 32-bit PC offsets so we need a large enough type. -using pc_t = uint32_t; +std::ostream& operator<<(std::ostream& os, Instruction const& ins); +std::string to_string(Instruction const& ins); + +std::ostream& operator<<(std::ostream& os, Bin::Op op); +std::ostream& operator<<(std::ostream& os, Condition::Op op); + +inline std::ostream& operator<<(std::ostream& os, const Imm imm) { return os << crab::to_signed(imm.v); } +inline std::ostream& operator<<(std::ostream& os, Reg const& a) { return os << "r" << gsl::narrow(a.v); } +inline std::ostream& operator<<(std::ostream& os, Value const& a) { + if (const auto pa = std::get_if(&a)) { + return os << *pa; + } + return os << std::get(a); +} + +std::ostream& operator<<(std::ostream& os, const Assertion& a); +std::string to_string(const Assertion& constraint); + +void print(const InstructionSeq& insts, std::ostream& out, const std::optional& label_to_print, + bool print_line_info = false); } // namespace asm_syntax using namespace asm_syntax; +using crab::pc_t; template struct overloaded : Ts... { diff --git a/src/assertions.cpp b/src/assertions.cpp index 96ce593e5..faebc9579 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -38,7 +38,7 @@ class AssertExtractor { : info{std::move(info)}, current_label(label) {} vector operator()(const Undefined&) const { - assert(false); + // assert(false); return {}; } @@ -176,7 +176,12 @@ class AssertExtractor { return res; } - vector operator()(const Assume& ins) const { return explicate(ins.cond); } + vector operator()(const Assume& ins) const { + if (ins.is_explicit) { + return explicate(ins.cond); + } + return {}; + } vector operator()(const Jmp& ins) const { if (!ins.cond) { @@ -298,10 +303,9 @@ vector get_assertions(Instruction ins, const program_info& info, cons /// regions. The verifier will use these assertions to treat the program as /// unsafe unless it can prove that the assertions can never fail. void explicate_assertions(cfg_t& cfg, const program_info& info) { - for (auto& [label, bb] : cfg) { + for (auto& [label, value] : cfg) { (void)label; // unused - for (auto& ins : bb) { - ins.preconditions = get_assertions(ins.cmd, info, bb.label()); - } + auto& ins = value.instruction(); + ins.preconditions = get_assertions(ins.cmd, info, value.label()); } } diff --git a/src/config.hpp b/src/config.hpp index b109a35c8..09d5d919e 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -8,6 +8,9 @@ struct ebpf_verifier_options_t { // Options that control how the control flow graph is built. prepare_cfg_options cfg_opts; + /// When true, simplifies the control flow graph by merging basic blocks. + bool simplify = true; + // True to assume prior failed assertions are true and continue verification. bool assume_assertions = false; @@ -20,7 +23,7 @@ struct ebpf_verifier_options_t { // True to allow division by zero and assume BPF ISA defined semantics. bool allow_division_by_zero = true; - // Setup the entry constraints for a BPF program. + // Set up the entry constraints for a BPF program. bool setup_constraints = true; // True if the ELF file is built on a big endian system. diff --git a/src/crab/cfg.hpp b/src/crab/cfg.hpp index 8dd6e9ba0..678d1312e 100644 --- a/src/crab/cfg.hpp +++ b/src/crab/cfg.hpp @@ -3,14 +3,7 @@ #pragma once /* - * Build a CFG to interface with the abstract domains and fixpoint - * iterators. - * - * All the CFG statements are strongly typed. However, only variables - * need to be typed. The types of constants can be inferred from the - * context since they always appear together with at least one - * variable. - * + * Build a CFG to interface with the abstract domains and fixpoint iterators. */ #include #include @@ -22,7 +15,6 @@ #include #include -#include "asm_ostream.hpp" #include "asm_syntax.hpp" #include "crab_utils/debug.hpp" #include "crab_utils/num_big.hpp" @@ -32,103 +24,68 @@ namespace crab { class cfg_t; -class basic_block_t final { +// Node type for the CFG +class value_t final { friend class cfg_t; public: - basic_block_t(const basic_block_t&) = delete; + value_t(const value_t&) = delete; using label_vec_t = std::set; - using stmt_list_t = std::vector; using neighbour_const_iterator = label_vec_t::const_iterator; using neighbour_const_reverse_iterator = label_vec_t::const_reverse_iterator; - using iterator = stmt_list_t::iterator; - using const_iterator = stmt_list_t::const_iterator; - using reverse_iterator = stmt_list_t::reverse_iterator; - using const_reverse_iterator = stmt_list_t::const_reverse_iterator; private: label_t m_label; - stmt_list_t m_ts; + GuardedInstruction m_instruction{.cmd = Undefined{}}; label_vec_t m_prev, m_next; public: - void insert(const GuardedInstruction& arg) { - assert(label() != label_t::entry); - assert(label() != label_t::exit); - m_ts.push_back(arg); - } + explicit value_t(label_t _label) : m_label{std::move(_label)} {} - /// Insert a GuardedInstruction at the front of the basic block. - /// @note Cannot modify entry or exit blocks. - void insert_front(const GuardedInstruction& arg) { - assert(label() != label_t::entry); - assert(label() != label_t::exit); - m_ts.insert(m_ts.begin(), arg); - } - - explicit basic_block_t(label_t _label) : m_label(std::move(_label)) {} - - ~basic_block_t() = default; + ~value_t() = default; [[nodiscard]] label_t label() const { return m_label; } - iterator begin() { return (m_ts.begin()); } - iterator end() { return (m_ts.end()); } [[nodiscard]] - const_iterator begin() const { - return (m_ts.begin()); - } - [[nodiscard]] - const_iterator end() const { - return (m_ts.end()); + GuardedInstruction& instruction() { + return m_instruction; } - reverse_iterator rbegin() { return (m_ts.rbegin()); } - reverse_iterator rend() { return (m_ts.rend()); } [[nodiscard]] - const_reverse_iterator rbegin() const { - return (m_ts.rbegin()); - } - [[nodiscard]] - const_reverse_iterator rend() const { - return (m_ts.rend()); - } - - [[nodiscard]] - size_t size() const { - return gsl::narrow(std::distance(begin(), end())); + const GuardedInstruction& instruction() const { + return m_instruction; } [[nodiscard]] - std::pair next_blocks() const { + std::pair next_labels() const { return std::make_pair(m_next.begin(), m_next.end()); } [[nodiscard]] - std::pair next_blocks_reversed() const { + std::pair next_labels_reversed() const { return std::make_pair(m_next.rbegin(), m_next.rend()); } [[nodiscard]] - std::pair prev_blocks() const { + std::pair prev_labels() const { return std::make_pair(m_prev.begin(), m_prev.end()); } [[nodiscard]] - const label_vec_t& next_blocks_set() const { + const label_vec_t& next_labels_set() const { return m_next; } [[nodiscard]] - const label_vec_t& prev_blocks_set() const { + const label_vec_t& prev_labels_set() const { return m_prev; } // Add a cfg_t edge from *this to b - void operator>>(basic_block_t& b) { + void operator>>(value_t& b) { assert(b.label() != label_t::entry); assert(this->label() != label_t::exit); m_next.insert(b.m_label); @@ -136,17 +93,11 @@ class basic_block_t final { } // Remove a cfg_t edge from *this to b - void operator-=(basic_block_t& b) { + void operator-=(value_t& b) { m_next.erase(b.m_label); b.m_prev.erase(m_label); } - // insert all statements of other at the back - void move_back(basic_block_t& other) { - m_ts.reserve(m_ts.size() + other.m_ts.size()); - std::ranges::move(other.m_ts, std::back_inserter(m_ts)); - } - [[nodiscard]] size_t in_degree() const { return m_prev.size(); @@ -156,108 +107,47 @@ class basic_block_t final { size_t out_degree() const { return m_next.size(); } - - void swap_instructions(stmt_list_t& ts) { std::swap(m_ts, ts); } -}; - -// Viewing basic_block_t with all statements reversed. Useful for -// backward analysis. -class basic_block_rev_t final { - public: - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; - - using iterator = basic_block_t::reverse_iterator; - using const_iterator = basic_block_t::const_reverse_iterator; - - public: - basic_block_t& _bb; - - explicit basic_block_rev_t(basic_block_t& bb) : _bb(bb) {} - - [[nodiscard]] - label_t label() const { - return _bb.label(); - } - - iterator begin() { return _bb.rbegin(); } - - iterator end() { return _bb.rend(); } - - [[nodiscard]] - const_iterator begin() const { - return _bb.rbegin(); - } - - [[nodiscard]] - const_iterator end() const { - return _bb.rend(); - } - - [[nodiscard]] - std::size_t size() const { - return gsl::narrow(std::distance(begin(), end())); - } - - [[nodiscard]] - std::pair next_blocks() const { - return _bb.prev_blocks(); - } - - [[nodiscard]] - std::pair prev_blocks() const { - return _bb.next_blocks(); - } - - [[nodiscard]] - const basic_block_t::label_vec_t& next_blocks_set() const { - return _bb.prev_blocks_set(); - } - - [[nodiscard]] - const basic_block_t::label_vec_t& prev_blocks_set() const { - return _bb.next_blocks_set(); - } }; -/// Control-Flow Graph. +/// Control-Flow Graph class cfg_t final { public: using node_t = label_t; // for Bgl graphs - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; - using neighbour_const_reverse_iterator = basic_block_t::neighbour_const_reverse_iterator; + using neighbour_const_iterator = value_t::neighbour_const_iterator; + using neighbour_const_reverse_iterator = value_t::neighbour_const_reverse_iterator; using neighbour_const_range = boost::iterator_range; using neighbour_const_reverse_range = boost::iterator_range; private: - using basic_block_map_t = std::map; - using binding_t = basic_block_map_t::value_type; + using map_t = std::map; + using binding_t = map_t::value_type; struct get_label { label_t operator()(const binding_t& p) const { return p.second.label(); } }; public: - using iterator = basic_block_map_t::iterator; - using const_iterator = basic_block_map_t::const_iterator; - using label_iterator = boost::transform_iterator; - using const_label_iterator = boost::transform_iterator; + using iterator = map_t::iterator; + using const_iterator = map_t::const_iterator; + using label_iterator = boost::transform_iterator; + using const_label_iterator = boost::transform_iterator; private: - basic_block_map_t m_blocks; + map_t m_map; using visited_t = std::set; public: cfg_t() { - m_blocks.emplace(entry_label(), entry_label()); - m_blocks.emplace(exit_label(), exit_label()); + m_map.emplace(entry_label(), entry_label()); + m_map.emplace(exit_label(), exit_label()); } cfg_t(const cfg_t&) = delete; - cfg_t(cfg_t&& o) noexcept : m_blocks(std::move(o.m_blocks)) {} + cfg_t(cfg_t&& o) noexcept : m_map(std::move(o.m_map)) {} ~cfg_t() = default; @@ -275,45 +165,89 @@ class cfg_t final { [[nodiscard]] neighbour_const_range next_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_blocks()); + return boost::make_iterator_range(get_node(_label).next_labels()); } + [[nodiscard]] neighbour_const_reverse_range next_nodes_reversed(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_blocks_reversed()); + return boost::make_iterator_range(get_node(_label).next_labels_reversed()); } [[nodiscard]] neighbour_const_range prev_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).prev_blocks()); + return boost::make_iterator_range(get_node(_label).prev_labels()); } - basic_block_t& get_node(const label_t& _label) { - auto it = m_blocks.find(_label); - if (it == m_blocks.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); + value_t& get_node(const label_t& _label) { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } - [[nodiscard]] - const basic_block_t& get_node(const label_t& _label) const { - auto it = m_blocks.find(_label); - if (it == m_blocks.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); + const value_t& get_node(const label_t& _label) const { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } + GuardedInstruction& at(const label_t& _label) { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + } + return it->second.instruction(); + } + + [[nodiscard]] + const GuardedInstruction& at(const label_t& _label) const { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + } + return it->second.instruction(); + } + // --- End ikos fixpoint API - basic_block_t& insert(const label_t& _label) { - auto it = m_blocks.find(_label); - if (it != m_blocks.end()) { + value_t& insert_after(const label_t& prev_label, const label_t& new_label, const Instruction& _ins) { + value_t& res = insert(new_label, GuardedInstruction{.cmd = _ins}); + value_t& prev = get_node(prev_label); + std::vector nexts; + for (const label_t& next : prev.next_labels_set()) { + nexts.push_back(next); + } + prev.m_next.clear(); + + std::vector prevs; + for (const label_t& next_label : nexts) { + get_node(next_label).m_prev.erase(prev_label); + } + + for (const label_t& next : nexts) { + get_node(prev_label) >> res; + res >> get_node(next); + } + return res; + } + + value_t& insert(const label_t& _label, const Instruction& _ins) { + return insert(_label, GuardedInstruction{.cmd = _ins}); + } + + value_t& insert(const label_t& _label, GuardedInstruction&& _ins) { + const auto it = m_map.find(_label); + if (it != m_map.end()) { return it->second; } - m_blocks.emplace(_label, _label); - return get_node(_label); + m_map.emplace(_label, _label); + value_t& v = get_node(_label); + v.m_instruction = std::move(_ins); + return v; } void remove(const label_t& _label) { @@ -325,16 +259,16 @@ class cfg_t final { CRAB_ERROR("Cannot remove exit block"); } - std::vector> dead_edges; + std::vector> dead_edges; auto& bb = get_node(_label); - for (const auto& id : boost::make_iterator_range(bb.prev_blocks())) { + for (const auto& id : boost::make_iterator_range(bb.prev_labels())) { if (_label != id) { dead_edges.emplace_back(&get_node(id), &bb); } } - for (const auto& id : boost::make_iterator_range(bb.next_blocks())) { + for (const auto& id : boost::make_iterator_range(bb.next_labels())) { if (_label != id) { dead_edges.emplace_back(&bb, &get_node(id)); } @@ -344,37 +278,37 @@ class cfg_t final { *p.first -= *p.second; } - m_blocks.erase(_label); + m_map.erase(_label); } //! return a begin iterator of basic_block_t's - iterator begin() { return m_blocks.begin(); } + iterator begin() { return m_map.begin(); } //! return an end iterator of basic_block_t's - iterator end() { return m_blocks.end(); } + iterator end() { return m_map.end(); } [[nodiscard]] const_iterator begin() const { - return m_blocks.begin(); + return m_map.begin(); } [[nodiscard]] const_iterator end() const { - return m_blocks.end(); + return m_map.end(); } //! return a begin iterator of label_t's - const_label_iterator label_begin() const { return boost::make_transform_iterator(m_blocks.begin(), get_label()); } + const_label_iterator label_begin() const { return boost::make_transform_iterator(m_map.begin(), get_label()); } //! return an end iterator of label_t's - const_label_iterator label_end() const { return boost::make_transform_iterator(m_blocks.end(), get_label()); } + const_label_iterator label_end() const { return boost::make_transform_iterator(m_map.end(), get_label()); } //! return a begin iterator of label_t's [[nodiscard]] std::vector labels() const { std::vector res; - res.reserve(m_blocks.size()); - for (const auto& p : m_blocks) { + res.reserve(m_map.size()); + for (const auto& p : m_map) { res.push_back(p.first); } return res; @@ -385,41 +319,6 @@ class cfg_t final { return gsl::narrow(std::distance(begin(), end())); } - void simplify() { - std::set worklist(this->label_begin(), this->label_end()); - while (!worklist.empty()) { - label_t label = *worklist.begin(); - worklist.erase(label); - - basic_block_t& bb = get_node(label); - if (bb.in_degree() == 1 && get_parent(label).out_degree() == 1) { - continue; - } - while (bb.out_degree() == 1) { - basic_block_t& next_bb = get_child(label); - - if (&next_bb == &bb || next_bb.in_degree() != 1) { - break; - } - if (next_bb.label() == exit_label()) { - break; - } - worklist.erase(next_bb.label()); - - bb.move_back(next_bb); - bb -= next_bb; - auto children = next_bb.m_next; - for (const label_t& next_next_label : children) { - basic_block_t& next_next_bb = get_node(next_next_label); - bb >> next_next_bb; - } - - // delete next_bb entirely - remove(next_bb.label()); - } - } - } - [[nodiscard]] std::vector sorted_labels() const { std::vector labels = this->labels(); @@ -427,30 +326,30 @@ class cfg_t final { return labels; } + value_t& get_child(const label_t& b) { + assert(has_one_child(b)); + const auto rng = next_nodes(b); + return get_node(*rng.begin()); + } + + value_t& get_parent(const label_t& b) { + assert(has_one_parent(b)); + const auto rng = prev_nodes(b); + return get_node(*rng.begin()); + } + private: // Helpers [[nodiscard]] bool has_one_child(const label_t& b) const { - auto rng = next_nodes(b); - return (std::distance(rng.begin(), rng.end()) == 1); + const auto rng = next_nodes(b); + return std::distance(rng.begin(), rng.end()) == 1; } [[nodiscard]] bool has_one_parent(const label_t& b) const { - auto rng = prev_nodes(b); - return (std::distance(rng.begin(), rng.end()) == 1); - } - - basic_block_t& get_child(const label_t& b) { - assert(has_one_child(b)); - auto rng = next_nodes(b); - return get_node(*(rng.begin())); - } - - basic_block_t& get_parent(const label_t& b) { - assert(has_one_parent(b)); - auto rng = prev_nodes(b); - return get_node(*(rng.begin())); + const auto rng = prev_nodes(b); + return std::distance(rng.begin(), rng.end()) == 1; } // mark reachable blocks from curId @@ -466,142 +365,130 @@ class cfg_t final { } void remove_unreachable_blocks(); - - // remove blocks that cannot reach the exit block - void remove_useless_blocks(); }; -// Viewing a cfg_t with all edges and block statements reversed. Useful for backward analysis. -class cfg_rev_t final { - public: - using node_t = label_t; // for Bgl graphs - - using neighbour_const_range = cfg_t::neighbour_const_range; +class basic_block_t final { + friend class cfg_t; - // For BGL - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; + public: + basic_block_t(const basic_block_t&) = delete; - using basic_block_rev_map_t = std::map; - using iterator = basic_block_rev_map_t::iterator; - using const_iterator = basic_block_rev_map_t::const_iterator; - using label_iterator = cfg_t::label_iterator; - using const_label_iterator = cfg_t::const_label_iterator; + using label_vec_t = std::set; + using stmt_list_t = std::vector; + using iterator = stmt_list_t::iterator; + using const_iterator = stmt_list_t::const_iterator; + using reverse_iterator = stmt_list_t::reverse_iterator; + using const_reverse_iterator = stmt_list_t::const_reverse_iterator; private: - cfg_t& _cfg; - basic_block_rev_map_t _rev_bbs; + label_t m_label; + stmt_list_t m_ts; public: - explicit cfg_rev_t(cfg_t& cfg) : _cfg(cfg) { - // Create basic_block_rev_t from basic_block_t objects - // Note that basic_block_rev_t is also a view of basic_block_t so it - // doesn't modify basic_block_t objects. - for (auto& [label, bb] : cfg) { - _rev_bbs.emplace(label, bb); - } - } + static std::map collect_basic_blocks(cfg_t& cfg) { + std::map res; - cfg_rev_t(const cfg_rev_t& o) = default; + std::set worklist(cfg.label_begin(), cfg.label_end()); + std::set seen; + while (!worklist.empty()) { + label_t label = *worklist.begin(); + worklist.erase(label); + if (seen.contains(label)) { + continue; + } + seen.insert(label); - cfg_rev_t(cfg_rev_t&& o) noexcept : _cfg(o._cfg), _rev_bbs(std::move(o._rev_bbs)) {} + const value_t& value = cfg.get_node(label); + if (value.in_degree() == 1 && cfg.get_parent(label).out_degree() == 1) { + continue; + } + res.emplace(label, label); + basic_block_t& bb = res.at(label); + while (value.out_degree() == 1) { + value_t& next_value = cfg.get_child(label); - [[nodiscard]] - label_t entry_label() const { - return _cfg.exit_label(); - } + if (&next_value == &value || next_value.in_degree() != 1) { + break; + } + if (next_value.label() == cfg.exit_label()) { + break; + } + worklist.erase(next_value.label()); - [[nodiscard]] - neighbour_const_range next_nodes(const label_t& bb) const { - return _cfg.prev_nodes(bb); + bb.m_ts.push_back(&next_value.instruction()); + + // delete next_bb entirely + // remove(next_value.label()); + seen.insert(next_value.label()); + } + } + return res; } - [[nodiscard]] - neighbour_const_range prev_nodes(const label_t& bb) const { - return _cfg.next_nodes(bb); + void insert(GuardedInstruction* arg) { + assert(label() != label_t::entry); + assert(label() != label_t::exit); + m_ts.push_back(arg); } - neighbour_const_range next_nodes(const label_t& bb) { return _cfg.prev_nodes(bb); } + /// Insert a GuardedInstruction at the front of the basic block. + /// @note Cannot modify entry or exit blocks. + void insert_front(GuardedInstruction* arg) { + assert(label() != label_t::entry); + assert(label() != label_t::exit); + m_ts.insert(m_ts.begin(), arg); + } - neighbour_const_range prev_nodes(const label_t& bb) { return _cfg.next_nodes(bb); } + explicit basic_block_t(label_t _label) : m_label(std::move(_label)) {} - basic_block_rev_t& get_node(const label_t& _label) { - auto it = _rev_bbs.find(_label); - if (it == _rev_bbs.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); - } - return it->second; - } + ~basic_block_t() = default; [[nodiscard]] - const basic_block_rev_t& get_node(const label_t& _label) const { - auto it = _rev_bbs.find(_label); - if (it == _rev_bbs.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); - } - return it->second; + label_t label() const { + return m_label; } - iterator begin() { return _rev_bbs.begin(); } - - iterator end() { return _rev_bbs.end(); } - + iterator begin() { return (m_ts.begin()); } + iterator end() { return (m_ts.end()); } [[nodiscard]] const_iterator begin() const { - return _rev_bbs.begin(); + return m_ts.begin(); } - [[nodiscard]] const_iterator end() const { - return _rev_bbs.end(); + return m_ts.end(); } - const_label_iterator label_begin() const { return _cfg.label_begin(); } - - const_label_iterator label_end() const { return _cfg.label_end(); } - + reverse_iterator rbegin() { return (m_ts.rbegin()); } + reverse_iterator rend() { return (m_ts.rend()); } [[nodiscard]] - label_t exit_label() const { - return _cfg.entry_label(); + const_reverse_iterator rbegin() const { + return m_ts.rbegin(); } -}; - -inline void cfg_t::remove_useless_blocks() { - cfg_rev_t rev_cfg(*this); - - visited_t useful, useless; - mark_alive_blocks(rev_cfg.entry_label(), rev_cfg, useful); - - if (!useful.contains(exit_label())) { - CRAB_ERROR("Exit block must be reachable"); + [[nodiscard]] + const_reverse_iterator rend() const { + return m_ts.rend(); } - for (const auto& label : labels()) { - if (!useful.contains(label)) { - useless.insert(label); - } + + [[nodiscard]] + size_t size() const { + return gsl::narrow(std::distance(begin(), end())); } - for (const auto& _label : useless) { - remove(_label); + // insert all statements of other at the back + void move_back(basic_block_t& other) { + m_ts.reserve(m_ts.size() + other.m_ts.size()); + std::ranges::move(other.m_ts, std::back_inserter(m_ts)); } -} -inline void cfg_t::remove_unreachable_blocks() { - visited_t alive, dead; - mark_alive_blocks(entry_label(), *this, alive); + void swap_instructions(stmt_list_t& ts) { std::swap(m_ts, ts); } +}; - for (const auto& label : labels()) { - if (!alive.contains(label)) { - dead.insert(label); - } - } +void print_dot(const cfg_t& cfg, std::ostream& out); +void print_dot(const cfg_t& cfg, const std::string& outfile); - if (dead.contains(exit_label())) { - CRAB_ERROR("Exit block must be reachable"); - } - for (const auto& _label : dead) { - remove(_label); - } -} +std::ostream& operator<<(std::ostream& o, const value_t& value); +std::ostream& operator<<(std::ostream& o, const cfg_t& cfg); } // end namespace crab @@ -613,8 +500,6 @@ std::vector stats_headers(); std::map collect_stats(const cfg_t&); struct prepare_cfg_options { - /// When true, simplifies the control flow graph by merging basic blocks. - bool simplify = true; /// When true, verifies that the program terminates. bool check_for_termination = false; /// When true, ensures the program has a valid exit block. @@ -625,10 +510,3 @@ cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const pr void explicate_assertions(cfg_t& cfg, const program_info& info); std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); - -void print_dot(const cfg_t& cfg, std::ostream& out); -void print_dot(const cfg_t& cfg, const std::string& outfile); - -std::ostream& operator<<(std::ostream& o, const basic_block_t& bb); -std::ostream& operator<<(std::ostream& o, const crab::basic_block_rev_t& bb); -std::ostream& operator<<(std::ostream& o, const cfg_t& cfg); diff --git a/src/crab/ebpf_checker.cpp b/src/crab/ebpf_checker.cpp index 3ec0ed85f..4d734780d 100644 --- a/src/crab/ebpf_checker.cpp +++ b/src/crab/ebpf_checker.cpp @@ -7,7 +7,7 @@ #include #include -#include "asm_ostream.hpp" +#include "asm_syntax.hpp" #include "asm_unmarshal.hpp" #include "config.hpp" #include "crab/array_domain.hpp" diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 8d3ef4f48..cb4ca0b6c 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -9,7 +9,6 @@ #include "boost/endian/conversion.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" #include "config.hpp" #include "crab/array_domain.hpp" diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp index 7936600c8..743427ad0 100644 --- a/src/crab/ebpf_transformer.cpp +++ b/src/crab/ebpf_transformer.cpp @@ -10,7 +10,6 @@ #include "boost/endian/conversion.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" #include "config.hpp" #include "crab/array_domain.hpp" diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index 5afa2e0a2..f77e8e90a 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -68,17 +68,16 @@ class interleaved_fwd_fixpoint_iterator_t final { void set_pre(const label_t& label, const ebpf_domain_t& v) { _pre[label] = v; } void transform_to_post(const label_t& label, ebpf_domain_t pre) { - const basic_block_t& bb = _cfg.get_node(label); + const GuardedInstruction& ins = _cfg.at(label); - for (const GuardedInstruction& ins : bb) { - if (thread_local_options.assume_assertions) { - for (const auto& assertion : ins.preconditions) { - // avoid redundant errors - ebpf_domain_assume(pre, assertion); - } + if (thread_local_options.assume_assertions) { + for (const auto& assertion : ins.preconditions) { + // avoid redundant errors + ebpf_domain_assume(pre, assertion); } - ebpf_domain_transform(pre, ins.cmd); - }; + } + ebpf_domain_transform(pre, ins.cmd); + _post[label] = std::move(pre); } diff --git a/src/crab/interval.hpp b/src/crab/interval.hpp index c7de6dda6..c4a2c0a7d 100644 --- a/src/crab/interval.hpp +++ b/src/crab/interval.hpp @@ -104,7 +104,8 @@ class interval_t final { template [[nodiscard]] std::tuple bound(T elb, T eub) const { - auto [lb, ub] = bound(static_cast>(elb), static_cast>(eub)); + using C = std::underlying_type_t; + auto [lb, ub] = bound(static_cast(elb), static_cast(eub)); return {static_cast(lb), static_cast(ub)}; } diff --git a/src/crab/label.hpp b/src/crab/label.hpp new file mode 100644 index 000000000..db8c55ea5 --- /dev/null +++ b/src/crab/label.hpp @@ -0,0 +1,94 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crab_utils/num_safety.hpp" + +constexpr char STACK_FRAME_DELIMITER = '/'; + +namespace crab { +struct label_t { + std::string stack_frame_prefix; ///< Variable prefix when calling this label. + int from{}; ///< Jump source, or simply index of instruction + int to{}; ///< Jump target or -1 + std::string special_label; ///< Special label for special instructions. + + explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept + : stack_frame_prefix(std::move(stack_frame_prefix)), from(index), to(to) {} + + static label_t make_jump(const label_t& src_label, const label_t& target_label) { + return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; + } + + static label_t make_increment_counter(const label_t& label) { + // XXX: This is a hack to increment the loop counter. + label_t res{label.from, label.to, label.stack_frame_prefix}; + res.special_label = "counter"; + return res; + } + + std::strong_ordering operator<=>(const label_t& other) const = default; + + // no hash; intended for use in ordered containers. + + [[nodiscard]] + constexpr bool isjump() const { + return to != -1; + } + + [[nodiscard]] + int call_stack_depth() const { + // The call stack depth is the number of '/' separated components in the label, + // which is one more than the number of '/' separated components in the prefix, + // hence two more than the number of '/' in the prefix, if any. + if (stack_frame_prefix.empty()) { + return 1; + } + return gsl::narrow(2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER)); + } + + static const label_t entry; + static const label_t exit; +}; + +inline const label_t label_t::entry{-1}; +inline const label_t label_t::exit{INT_MAX}; + +std::ostream& operator<<(std::ostream& os, const label_t& label); +std::string to_string(label_t const& label); + +// cpu=v4 supports 32-bit PC offsets so we need a large enough type. +using pc_t = uint32_t; + +// We use a 16-bit offset whenever it fits in 16 bits. +inline std::function label_to_offset16(const pc_t pc) { + return [=](const label_t& label) { + const int64_t offset = label.from - gsl::narrow(pc) - 1; + const bool is16 = + std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); + return gsl::narrow(is16 ? offset : 0); + }; +} + +// We use the JA32 opcode with the offset in 'imm' when the offset +// of an unconditional jump doesn't fit in an int16_t. +inline std::function label_to_offset32(const pc_t pc) { + return [=](const label_t& label) { + const int64_t offset = label.from - gsl::narrow(pc) - 1; + const bool is16 = + std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); + return is16 ? 0 : gsl::narrow(offset); + }; +} + +} // namespace crab diff --git a/src/crab/split_dbm.cpp b/src/crab/split_dbm.cpp index 1c8620718..b8cf2ce50 100644 --- a/src/crab/split_dbm.cpp +++ b/src/crab/split_dbm.cpp @@ -1143,9 +1143,8 @@ string_invariant SplitDBM::to_set() const { if (!this->g.elem(0, v) && !this->g.elem(v, 0)) { continue; } - interval_t v_out = - interval_t(this->g.elem(v, 0) ? -number_t(this->g.edge_val(v, 0)) : extended_number::minus_infinity(), - this->g.elem(0, v) ? number_t(this->g.edge_val(0, v)) : extended_number::plus_infinity()); + interval_t v_out{this->g.elem(v, 0) ? -number_t(this->g.edge_val(v, 0)) : extended_number::minus_infinity(), + this->g.elem(0, v) ? number_t(this->g.edge_val(0, v)) : extended_number::plus_infinity()}; assert(!v_out.is_bottom()); variable_t variable = *this->rev_map[v]; @@ -1153,7 +1152,7 @@ string_invariant SplitDBM::to_set() const { std::stringstream elem; elem << variable; if (variable.is_type()) { - auto [lb, ub] = v_out.bound(T_MIN, T_MAX); + auto [lb, ub] = v_out.bound(T_UNINIT, T_MAX); if (lb == ub) { if (variable.is_in_stack() && lb == T_NUM) { // no need to show this @@ -1206,8 +1205,7 @@ string_invariant SplitDBM::to_set() const { std::ostream& operator<<(std::ostream& o, const SplitDBM& dom) { return o << dom.to_set(); } bool SplitDBM::eval_expression_overflow(const linear_expression_t& e, Weight& out) const { - [[maybe_unused]] - const bool overflow = convert_NtoW_overflow(e.constant_term(), out); + [[maybe_unused]] const bool overflow = convert_NtoW_overflow(e.constant_term(), out); assert(!overflow); for (const auto& [variable, coefficient] : e.variable_terms()) { Weight coef; diff --git a/src/crab/thresholds.cpp b/src/crab/thresholds.cpp index 88e3e689e..5cf436d30 100644 --- a/src/crab/thresholds.cpp +++ b/src/crab/thresholds.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "crab/thresholds.hpp" #include "crab/cfg.hpp" +#include "crab/label.hpp" namespace crab { @@ -48,7 +49,7 @@ std::ostream& operator<<(std::ostream& o, const thresholds_t& t) { return o; } -void wto_thresholds_t::get_thresholds(const basic_block_t& bb, thresholds_t& thresholds) const {} +void wto_thresholds_t::get_thresholds(const value_t& bb, thresholds_t& thresholds) const {} void wto_thresholds_t::operator()(const label_t& vertex) { if (m_stack.empty()) { @@ -59,7 +60,7 @@ void wto_thresholds_t::operator()(const label_t& vertex) { const auto it = m_head_to_thresholds.find(head); if (it != m_head_to_thresholds.end()) { thresholds_t& thresholds = it->second; - const basic_block_t& bb = m_cfg.get_node(vertex); + const value_t& bb = m_cfg.get_node(vertex); get_thresholds(bb, thresholds); } else { CRAB_ERROR("No head found while gathering thresholds"); @@ -73,7 +74,7 @@ void wto_thresholds_t::operator()(const std::shared_ptr& cycle) { // XXX: if we want to consider constants from loop // initializations - for (const auto& pre : boost::make_iterator_range(bb.prev_blocks())) { + for (const auto& pre : boost::make_iterator_range(bb.prev_labels())) { if (pre != cycle->head()) { auto& pred_bb = m_cfg.get_node(pre); get_thresholds(pred_bb, thresholds); @@ -90,7 +91,7 @@ void wto_thresholds_t::operator()(const std::shared_ptr& cycle) { std::ostream& operator<<(std::ostream& o, const wto_thresholds_t& t) { for (const auto& [label, th] : t.m_head_to_thresholds) { - o << label << "=" << th << "\n"; + o << to_string(label) << "=" << th << "\n"; } return o; } diff --git a/src/crab/thresholds.hpp b/src/crab/thresholds.hpp index fe5a2e079..1e3d67635 100644 --- a/src/crab/thresholds.hpp +++ b/src/crab/thresholds.hpp @@ -58,7 +58,7 @@ class wto_thresholds_t final { // the top of the stack is the current wto head std::vector m_stack; - void get_thresholds(const basic_block_t& bb, thresholds_t& thresholds) const; + void get_thresholds(const value_t& bb, thresholds_t& thresholds) const; public: wto_thresholds_t(cfg_t& cfg, const size_t max_size) : m_cfg(cfg), m_max_size(max_size) {} diff --git a/src/crab/var_factory.cpp b/src/crab/var_factory.cpp index f4c243229..00ace7328 100644 --- a/src/crab/var_factory.cpp +++ b/src/crab/var_factory.cpp @@ -4,7 +4,7 @@ * Factories for variable names. */ -#include "asm_syntax.hpp" +#include "crab/label.hpp" #include "crab/variable.hpp" #include "crab_utils/lazy_allocator.hpp" diff --git a/src/crab_utils/debug.hpp b/src/crab_utils/debug.hpp index e70d15c54..e71baa424 100644 --- a/src/crab_utils/debug.hpp +++ b/src/crab_utils/debug.hpp @@ -43,13 +43,14 @@ void ___print___(std::ostream& os, ArgTypes... args) { (void)expand_variadic_pack{0, ((os << args), void(), 0)...}; } -#define CRAB_ERROR(...) \ - do { \ - std::ostringstream os; \ - os << "CRAB ERROR: "; \ - crab::___print___(os, __VA_ARGS__); \ - os << "\n"; \ - throw std::runtime_error(os.str()); \ +#define CRAB_ERROR(...) \ + do { \ + std::ostringstream os; \ + os << "CRAB ERROR: "; \ + crab::___print___(os, __VA_ARGS__); \ + crab::___print___(os, "; function ", __func__, ", line ", __LINE__); \ + os << "\n"; \ + throw std::runtime_error(os.str()); \ } while (0) extern bool CrabWarningFlag; diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 6680c884b..00695c18e 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -60,21 +60,19 @@ static checks_db generate_report(const cfg_t& cfg, const crab::invariant_table_t const crab::invariant_table_t& post_invariants) { checks_db m_db; for (const label_t& label : cfg.sorted_labels()) { - const basic_block_t& bb = cfg.get_node(label); ebpf_domain_t from_inv{pre_invariants.at(label)}; const bool pre_bot = from_inv.is_bottom(); - for (const GuardedInstruction& ins : bb) { - for (const Assertion& assertion : ins.preconditions) { - for (const auto& warning : ebpf_domain_check(from_inv, label, assertion)) { - m_db.add_warning(label, warning); - } + const GuardedInstruction& instruction = cfg.at(label); + for (const Assertion& assertion : instruction.preconditions) { + for (const auto& warning : ebpf_domain_check(from_inv, label, assertion)) { + m_db.add_warning(label, warning); } - ebpf_domain_transform(from_inv, ins.cmd); } + ebpf_domain_transform(from_inv, instruction.cmd); if (!pre_bot && from_inv.is_bottom()) { - m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(bb.label())); + m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(label)); } } diff --git a/src/main/check.cpp b/src/main/check.cpp index 81d224de1..f6874a99a 100644 --- a/src/main/check.cpp +++ b/src/main/check.cpp @@ -124,7 +124,7 @@ int main(int argc, char** argv) { ->expected(0, _conformance_groups.size()) ->check(CLI::IsMember(_get_conformance_group_names())); - app.add_flag("--simplify,!--no-simplify", ebpf_verifier_options.cfg_opts.simplify, + app.add_flag("--simplify,!--no-simplify", ebpf_verifier_options.simplify, "Simplify the CFG before analysis by merging chains of instructions into a single basic block. " "Default: enabled") ->group("Verbosity"); diff --git a/src/spec_type_descriptors.hpp b/src/spec_type_descriptors.hpp index 3da18504e..89a7d537f 100644 --- a/src/spec_type_descriptors.hpp +++ b/src/spec_type_descriptors.hpp @@ -38,8 +38,6 @@ struct EbpfProgramType { bool is_privileged{}; }; -void print_map_descriptors(const std::vector& descriptors, std::ostream& o); - // Represents the key characteristics that determine equivalence between eBPF maps. // Used to cache and compare map configurations across the program. struct EquivalenceKey { @@ -74,4 +72,8 @@ struct raw_program { std::vector line_info{}; }; +void print_map_descriptors(const std::vector& descriptors, std::ostream& o); + +std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info); + extern thread_local crab::lazy_allocator global_program_info; diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index 017d17806..ec02500c4 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -11,8 +11,8 @@ #include -#include "asm_ostream.hpp" #include "asm_parse.hpp" +#include "asm_syntax.hpp" #include "ebpf_verifier.hpp" #include "ebpf_yaml.hpp" #include "string_constraints.hpp" @@ -171,7 +171,7 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set& ra ebpf_verifier_options_t options{}; // Use ~simplify for YAML tests unless otherwise specified. - options.cfg_opts.simplify = false; + options.simplify = false; // All YAML tests use !setup_constraints. options.setup_constraints = false; @@ -193,7 +193,7 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set& ra } else if (name == "strict") { options.strict = true; } else if (name == "simplify") { - options.cfg_opts.simplify = true; + options.simplify = true; } else if (name == "big_endian") { options.big_endian = true; } else if (name == "!big_endian") { @@ -251,7 +251,7 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { if (debug) { test_case.options.print_failures = true; test_case.options.print_invariants = true; - test_case.options.cfg_opts.simplify = false; + test_case.options.simplify = false; } ebpf_context_descriptor_t context_descriptor{64, 0, 4, -1}; @@ -360,7 +360,7 @@ ConformanceTestResult run_conformance_test_case(const std::vector& me print(prog, std::cout, {}); options.print_failures = true; options.print_invariants = true; - options.cfg_opts.simplify = false; + options.simplify = false; } try { diff --git a/src/test/test_conformance.cpp b/src/test/test_conformance.cpp index 074bae979..9d5eebb3a 100644 --- a/src/test/test_conformance.cpp +++ b/src/test/test_conformance.cpp @@ -6,7 +6,7 @@ #define CONFORMANCE_TEST_PATH "external/bpf_conformance/tests/" -static void test_conformance(const std::string& filename, bpf_conformance_test_result_t expected_result, +static void test_conformance(const std::string& filename, const bpf_conformance_test_result_t& expected_result, const std::string& expected_reason) { std::vector test_files = {CONFORMANCE_TEST_PATH + filename}; boost::filesystem::path test_path = boost::dll::program_location(); diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 230d045c9..82756292c 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -3,7 +3,6 @@ #include #include "asm_marshal.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" // Below we define a tample of instruction templates that specify diff --git a/src/test/test_print.cpp b/src/test/test_print.cpp index bcf1675f1..515c92ef6 100644 --- a/src/test/test_print.cpp +++ b/src/test/test_print.cpp @@ -12,7 +12,6 @@ #endif #include "asm_files.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" #define TEST_OBJECT_FILE_DIRECTORY "ebpf-samples/build/" diff --git a/src/test/test_wto.cpp b/src/test/test_wto.cpp index 4e5ac738a..091efebc3 100644 --- a/src/test/test_wto.cpp +++ b/src/test/test_wto.cpp @@ -12,22 +12,22 @@ TEST_CASE("wto figure 1", "[wto]") { // Add nodes. for (int i = 1; i <= 8; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(7)); - cfg.get_node(label_t(5)) >> cfg.get_node(label_t(6)); - cfg.get_node(label_t(6)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(6)) >> cfg.get_node(label_t(7)); - cfg.get_node(label_t(7)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(7)) >> cfg.get_node(label_t(8)); - cfg.get_node(label_t(8)) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{7}); + cfg.get_node(label_t{5}) >> cfg.get_node(label_t{6}); + cfg.get_node(label_t{6}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{6}) >> cfg.get_node(label_t{7}); + cfg.get_node(label_t{7}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{7}) >> cfg.get_node(label_t{8}); + cfg.get_node(label_t{8}) >> cfg.get_node(label_t::exit); const wto_t wto(cfg); @@ -44,18 +44,18 @@ TEST_CASE("wto figure 2a", "[wto]") { // Add nodes. for (int i = 1; i <= 5; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(5)) >> cfg.get_node(label_t(4)); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{5}) >> cfg.get_node(label_t{4}); const wto_t wto(cfg); @@ -72,17 +72,17 @@ TEST_CASE("wto figure 2b", "[wto]") { // Add nodes. for (int i = 1; i <= 4; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(3)); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); const wto_t wto(cfg); diff --git a/test-data/jump.yaml b/test-data/jump.yaml index 2ff819504..decbd2910 100644 --- a/test-data/jump.yaml +++ b/test-data/jump.yaml @@ -1,5 +1,23 @@ # Copyright (c) Prevail Verifier contributors. # SPDX-License-Identifier: MIT +--- +test-case: jump to fallthrough + +pre: [] + +code: + : | + r0 = 0 + if r0 != 0 goto + : | + exit + +post: + - r0.type=number + - r0.svalue=0 + - r0.uvalue=0 +messages: [] + --- test-case: simple conditional jump forward diff --git a/test-data/loop.yaml b/test-data/loop.yaml index a0f3d939a..e47f1a6e0 100644 --- a/test-data/loop.yaml +++ b/test-data/loop.yaml @@ -186,7 +186,7 @@ post: - "pc[1]=[1, +oo]" messages: - - "1: Loop counter is too large (pc[1] < 100000)" + - "1 (counter): Loop counter is too large (pc[1] < 100000)" --- test-case: realistic forward loop @@ -207,8 +207,8 @@ code: r2 -= r1 r3 = 0 r0 = 0 -# r2 <<= 32; this fails with "11: Upper bound must be at most packet_size (valid_access(r4.offset, width=1) for read)" -# r2 >>= 32 + # r2 <<= 32; this fails with "11: Upper bound must be at most packet_size (valid_access(r4.offset, width=1) for read)" + # r2 >>= 32 : | r4 = r1 r4 += r3 @@ -283,7 +283,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, less than or equal options: ["termination"] @@ -300,7 +300,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, equal @@ -318,7 +318,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, greater than @@ -336,7 +336,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, greater than or equal @@ -354,7 +354,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: infinite loop with multiple exits options: ["termination"] @@ -371,7 +371,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - "3:4: Code is unreachable after 3:4" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- # Note: This test case terminates after 1000001 iterations, but the verifier assumes that the loop is infinite @@ -394,7 +394,7 @@ post: - "r0.type=number" - "r0.uvalue=1000001" messages: - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: possible infinite loop @@ -413,7 +413,7 @@ post: - "r0.type=number" messages: - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- # Check for case where there are backwards jumps that don't form a loop. @@ -461,5 +461,5 @@ post: [] messages: - "1:3: Code is unreachable after 1:3" - - "2: Loop counter is too large (pc[2] < 100000)" + - "2 (counter): Loop counter is too large (pc[2] < 100000)" - "2:3: Code is unreachable after 2:3" diff --git a/test-data/uninit.yaml b/test-data/uninit.yaml index 9dcb18a44..aada2a9d1 100644 --- a/test-data/uninit.yaml +++ b/test-data/uninit.yaml @@ -12,11 +12,11 @@ code: : | r0 += r3 -post: [] +post: + - "r0.type=uninit" messages: - "0: Invalid type (r3.type in {number, ctx, stack, packet, shared})" - - "CRAB ERROR: Cannot convert bottom to tuple" --- test-case: subtraction of a number and an uninitialized register From 455555ba018c7dd06b37fac6e278edde948ecf48 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Mon, 11 Nov 2024 15:18:58 -0800 Subject: [PATCH 8/8] Put stats collection under static variable (#790) Disable by default unused code Signed-off-by: Alan Jowett --- src/crab_utils/stats.cpp | 4 ---- src/crab_utils/stats.hpp | 27 +++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/crab_utils/stats.cpp b/src/crab_utils/stats.cpp index f257819ae..8e9b24002 100644 --- a/src/crab_utils/stats.cpp +++ b/src/crab_utils/stats.cpp @@ -94,7 +94,6 @@ void CrabStats::reset() { sw.clear(); } -void CrabStats::count(const std::string& name) { ++(*counters)[name]; } void CrabStats::count_max(const std::string& name, const unsigned v) { (*counters)[name] = std::max((*counters)[name], v); } @@ -102,9 +101,6 @@ void CrabStats::count_max(const std::string& name, const unsigned v) { unsigned CrabStats::uset(const std::string& n, const unsigned v) { return (*counters)[n] = v; } unsigned CrabStats::get(const std::string& n) { return (*counters)[n]; } -void CrabStats::start(const std::string& name) { (*sw)[name].start(); } -void CrabStats::stop(const std::string& name) { (*sw)[name].stop(); } -void CrabStats::resume(const std::string& name) { (*sw)[name].resume(); } /** Outputs all statistics to std output */ void CrabStats::Print(std::ostream& OS) { diff --git a/src/crab_utils/stats.hpp b/src/crab_utils/stats.hpp index f402318f3..d60a1c49f 100644 --- a/src/crab_utils/stats.hpp +++ b/src/crab_utils/stats.hpp @@ -33,6 +33,9 @@ inline std::ostream& operator<<(std::ostream& OS, const Stopwatch& sw) { } class CrabStats { + /// Controls whether statistics collection is active. + /// When false, all statistics methods become no-ops for better performance. + static constexpr bool enabled = false; static thread_local lazy_allocator> counters; static thread_local lazy_allocator> sw; @@ -44,13 +47,29 @@ class CrabStats { /* counters */ static unsigned get(const std::string& n); static unsigned uset(const std::string& n, unsigned v); - static void count(const std::string& name); + static void count(const std::string& name) { + if constexpr (enabled) { + ++(*counters)[name]; + } + } static void count_max(const std::string& name, unsigned v); /* stop watch */ - static void start(const std::string& name); - static void stop(const std::string& name); - static void resume(const std::string& name); + static void start(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].start(); + } + } + static void stop(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].stop(); + } + } + static void resume(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].resume(); + } + } /** Outputs all statistics to std output */ static void Print(std::ostream& OS);