diff --git a/README.md b/README.md index fa67f563a..449606798 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,11 @@ cmake --build build #### Dependencies * Install [git](https://git-scm.com/download/win) -* Install [Visual Studio Build Tools 2022](https://aka.ms/vs/17/release/vs_buildtools.exe) and choose the "C++ build tools" workload (Visual Studio Build Tools 2022 has support for CMake Version 3.25). +* Install [Visual Studio Build Tools 2022](https://aka.ms/vs/17/release/vs_buildtools.exe) and: + * Choose the "C++ build tools" workload (Visual Studio Build Tools 2022 has support for CMake Version 3.25) + * Under Individual Components, select: + * "C++ Clang Compiler" + * "MSBuild support for LLVM" * Install [nuget.exe](https://www.nuget.org/downloads) #### Make on Windows (which uses a multi-configuration generator) diff --git a/ebpf-samples b/ebpf-samples index 325cce1bc..33f01fafa 160000 --- a/ebpf-samples +++ b/ebpf-samples @@ -1 +1 @@ -Subproject commit 325cce1bc528a8b70e02ea914d407b4e4f89731d +Subproject commit 33f01fafa414520ad56ab219db343422bb655b32 diff --git a/external/bpf_conformance b/external/bpf_conformance index 1ce286105..b54453984 160000 --- a/external/bpf_conformance +++ b/external/bpf_conformance @@ -1 +1 @@ -Subproject commit 1ce286105edf3577ef380bd8a498cfa2314d6dfa +Subproject commit b544539840db58bcae75b153d3c0368820a15828 diff --git a/external/libbtf b/external/libbtf index 69e02688e..2de28d748 160000 --- a/external/libbtf +++ b/external/libbtf @@ -1 +1 @@ -Subproject commit 69e02688e89352d49c72707c2819448515d856df +Subproject commit 2de28d74884f051a1b129a4535d46841fbf6d6de diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index d1fb4a7b8..cd1ade2a1 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -18,13 +18,36 @@ using std::string; using std::to_string; using std::vector; -static optional get_jump(Instruction ins) { - if (const auto pins = std::get_if(&ins)) { - return pins->target; +/// Get the inverse of a given comparison operation. +static Condition::Op reverse(const Condition::Op op) { + switch (op) { + case Condition::Op::EQ: return Condition::Op::NE; + case Condition::Op::NE: return Condition::Op::EQ; + + case Condition::Op::GE: return Condition::Op::LT; + case Condition::Op::LT: return Condition::Op::GE; + + case Condition::Op::SGE: return Condition::Op::SLT; + case Condition::Op::SLT: return Condition::Op::SGE; + + case Condition::Op::LE: return Condition::Op::GT; + case Condition::Op::GT: return Condition::Op::LE; + + case Condition::Op::SLE: return Condition::Op::SGT; + case Condition::Op::SGT: return Condition::Op::SLE; + + case Condition::Op::SET: return Condition::Op::NSET; + case Condition::Op::NSET: return Condition::Op::SET; } + assert(false); return {}; } +/// Get the inverse of a given comparison condition. +static Condition reverse(const Condition& cond) { + return {.op = reverse(cond.op), .left = cond.left, .right = cond.right, .is64 = cond.is64}; +} + static bool has_fall(const Instruction& ins) { if (std::holds_alternative(ins)) { return false; @@ -44,17 +67,15 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t bool first = true; // Get the label of the node to go to on returning from the macro. - basic_block_t& exit_to_node = cfg.get_node(cfg.next_nodes(caller_label).front()); + crab::value_t& exit_to_node = cfg.get_node(cfg.next_nodes(caller_label).front()); // Construct the variable prefix to use for the new stack frame, // and store a copy in the CallLocal instruction since the instruction-specific // labels may only exist until the CFG is simplified. - basic_block_t& caller_node = cfg.get_node(caller_label); + crab::value_t& caller_node = cfg.get_node(caller_label); const std::string stack_frame_prefix = to_string(caller_label); - for (auto& inst : caller_node) { - if (const auto pcall = std::get_if(&inst.cmd)) { - pcall->stack_frame_prefix = stack_frame_prefix; - } + if (const auto pcall = std::get_if(&caller_node.instruction().cmd)) { + pcall->stack_frame_prefix = stack_frame_prefix; } // Walk the transitive closure of CFG nodes starting at entry_label and ending at @@ -71,15 +92,13 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t // Clone the macro block into a new block with the new stack frame prefix. const label_t label{macro_label.from, macro_label.to, stack_frame_prefix}; - auto& bb = cfg.insert(label); - for (auto inst : cfg.get_node(macro_label)) { - if (const auto pexit = std::get_if(&inst.cmd)) { - pexit->stack_frame_prefix = label.stack_frame_prefix; - } else if (const auto pcall = std::get_if(&inst.cmd)) { - pcall->stack_frame_prefix = label.stack_frame_prefix; - } - bb.insert(inst); + auto inst = cfg.at(macro_label); + if (const auto pexit = std::get_if(&inst.cmd)) { + pexit->stack_frame_prefix = label.stack_frame_prefix; + } else if (const auto pcall = std::get_if(&inst.cmd)) { + pcall->stack_frame_prefix = label.stack_frame_prefix; } + crab::value_t& bb = cfg.insert(label, inst.cmd); if (first) { // Add an edge from the caller to the new block. @@ -121,14 +140,12 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t string caller_label_str = to_string(caller_label); const long stack_frame_depth = std::ranges::count(caller_label_str, STACK_FRAME_DELIMITER) + 2; for (const auto& macro_label : seen_labels) { - for (const label_t label(macro_label.from, macro_label.to, caller_label_str); - const auto& inst : cfg.get_node(label)) { - if (const auto pins = std::get_if(&inst.cmd)) { - if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { - throw std::runtime_error{"too many call stack frames"}; - } - add_cfg_nodes(cfg, label, pins->target); + const label_t label(macro_label.from, macro_label.to, caller_label_str); + if (const auto pins = std::get_if(&cfg.at(label).cmd)) { + if (stack_frame_depth >= MAX_CALL_STACK_FRAMES) { + throw std::runtime_error{"too many call stack frames"}; } + add_cfg_nodes(cfg, label, pins->target); } } } @@ -136,44 +153,67 @@ static void add_cfg_nodes(cfg_t& cfg, const label_t& caller_label, const label_t /// Convert an instruction sequence to a control-flow graph (CFG). static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must_have_exit) { cfg_t cfg; - std::optional falling_from = {}; - bool first = true; - // Do a first pass ignoring all function macro calls. + // First add all instructions to the CFG without connecting for (const auto& [label, inst, _] : insts) { - if (std::holds_alternative(inst)) { continue; } + cfg.insert(label, inst); + } - auto& bb = cfg.insert(label); + if (insts.size() == 0) { + throw std::invalid_argument{"empty instruction sequence"}; + } else { + const auto& [label, inst, _0] = insts[0]; + cfg.get_node(cfg.entry_label()) >> cfg.get_node(label); + } - if (first) { - first = false; - cfg.get_node(cfg.entry_label()) >> bb; - } + // Do a first pass ignoring all function macro calls. + for (size_t i = 0; i < insts.size(); i++) { + const auto& [label, inst, _0] = insts[i]; - bb.insert({.cmd = inst}); - if (falling_from) { - cfg.get_node(*falling_from) >> bb; - falling_from = {}; - } - if (has_fall(inst)) { - falling_from = label; - } - if (auto jump_target = get_jump(inst)) { - bb >> cfg.insert(*jump_target); + if (std::holds_alternative(inst)) { + continue; } + auto& value = cfg.get_node(label); - if (std::holds_alternative(inst)) { - bb >> cfg.get_node(cfg.exit_label()); + label_t fallthrough{cfg.exit_label()}; + if (i + 1 < insts.size()) { + fallthrough = std::get<0>(insts[i + 1]); + } else { + if (has_fall(inst) && must_have_exit) { + throw std::invalid_argument{"fallthrough in last instruction"}; + } } - } - if (falling_from) { - if (must_have_exit) { - throw std::invalid_argument{"fallthrough in last instruction"}; + if (const auto jmp = std::get_if(&inst)) { + if (const auto cond = jmp->cond) { + label_t target_label = jmp->target; + if (target_label == fallthrough) { + value >> cfg.get_node(fallthrough); + continue; + } + + vector> jumps{ + {target_label, *cond}, + {fallthrough, reverse(*cond)}, + }; + for (const auto& [next_label, cond1] : jumps) { + label_t jump_label = label_t::make_jump(label, next_label); + crab::value_t& jump_node = cfg.insert(jump_label, Assume{.cond = cond1, .is_explicit = false}); + value >> jump_node; + jump_node >> cfg.get_node(next_label); + } + } else { + value >> cfg.get_node(jmp->target); + } } else { - cfg.get_node(*falling_from) >> cfg.get_node(cfg.exit_label()); + if (has_fall(inst)) { + value >> cfg.get_node(fallthrough); + } + } + if (std::holds_alternative(inst)) { + value >> cfg.get_node(cfg.exit_label()); } } @@ -189,34 +229,24 @@ static cfg_t instruction_seq_to_cfg(const InstructionSeq& insts, const bool must return cfg; } -/// Get the inverse of a given comparison operation. -static Condition::Op reverse(const Condition::Op op) { - switch (op) { - case Condition::Op::EQ: return Condition::Op::NE; - case Condition::Op::NE: return Condition::Op::EQ; - - case Condition::Op::GE: return Condition::Op::LT; - case Condition::Op::LT: return Condition::Op::GE; - - case Condition::Op::SGE: return Condition::Op::SLT; - case Condition::Op::SLT: return Condition::Op::SGE; - - case Condition::Op::LE: return Condition::Op::GT; - case Condition::Op::GT: return Condition::Op::LE; - - case Condition::Op::SLE: return Condition::Op::SGT; - case Condition::Op::SGT: return Condition::Op::SLE; +cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { + // Convert the instruction sequence to a deterministic control-flow graph. + cfg_t cfg = instruction_seq_to_cfg(prog, options.must_have_exit); - case Condition::Op::SET: return Condition::Op::NSET; - case Condition::Op::NSET: return Condition::Op::SET; + // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a + // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry + // points. These entry points serve as natural locations for loop counters that help verify program termination. + if (options.check_for_termination) { + const wto_t wto{cfg}; + wto.for_each_loop_head([&](const label_t& label) -> void { + cfg.insert_after(label, label_t::make_increment_counter(label), IncrementLoopCounter{label}); + }); } - assert(false); - return {}; -} -/// Get the inverse of a given comparison condition. -static Condition reverse(const Condition& cond) { - return {.op = reverse(cond.op), .left = cond.left, .right = cond.right, .is64 = cond.is64}; + // Annotate the CFG by adding in assertions before every memory instruction. + explicate_assertions(cfg, info); + + return cfg; } template @@ -226,53 +256,6 @@ static vector unique(const std::pair& be) { return res; } -/// Get a non-deterministic version of a control-flow graph, -/// i.e., where instead of using if/else, both branches are taken -/// simultaneously, and are replaced by Assume instructions -/// immediately after the branch. -static cfg_t to_nondet(const cfg_t& cfg) { - cfg_t res; - for (const auto& [this_label, bb] : cfg) { - basic_block_t& newbb = res.insert(this_label); - - for (const auto& ins : bb) { - newbb.insert(ins); - } - - for (const label_t& prev_label : bb.prev_blocks_set()) { - bool is_one = cfg.get_node(prev_label).next_blocks_set().size() > 1; - basic_block_t& pbb = res.insert(is_one ? label_t::make_jump(prev_label, this_label) : prev_label); - pbb >> newbb; - } - // note the special case where we jump to fallthrough - auto nextlist = bb.next_blocks_set(); - if (nextlist.size() == 2) { - label_t mid_label = this_label; - auto jmp = std::get(bb.rbegin()->cmd); - - nextlist.erase(jmp.target); - label_t fallthrough = *nextlist.begin(); - - vector> jumps{ - {jmp.target, *jmp.cond}, - {fallthrough, reverse(*jmp.cond)}, - }; - for (const auto& [next_label, cond1] : jumps) { - label_t jump_label = label_t::make_jump(mid_label, next_label); - basic_block_t& jump_bb = res.insert(jump_label); - jump_bb.insert({.cmd = Assume{cond1}}); - newbb >> jump_bb; - jump_bb >> res.insert(next_label); - } - } else { - for (const auto& label : nextlist) { - newbb >> res.insert(label); - } - } - } - return res; -} - /// Get the type of given Instruction. /// Most of these type names are also statistics header labels. static std::string instype(Instruction ins) { @@ -329,62 +312,28 @@ std::map collect_stats(const cfg_t& cfg) { } for (const auto& this_label : cfg.labels()) { res["basic_blocks"]++; - basic_block_t const& bb = cfg.get_node(this_label); - - for (const auto& ins : bb) { - if (const auto pins = std::get_if(&ins.cmd)) { - if (pins->mapfd == -1) { - res["map_in_map"] = 1; - } + const crab::value_t& value = cfg.get_node(this_label); + const auto cmd = value.instruction().cmd; + if (const auto pins = std::get_if(&cmd)) { + if (pins->mapfd == -1) { + res["map_in_map"] = 1; } - if (const auto pins = std::get_if(&ins.cmd)) { - if (pins->reallocate_packet) { - res["reallocate"] = 1; - } - } - if (const auto pins = std::get_if(&ins.cmd)) { - res[pins->is64 ? "arith64" : "arith32"]++; + } + if (const auto pins = std::get_if(&cmd)) { + if (pins->reallocate_packet) { + res["reallocate"] = 1; } - res[instype(ins.cmd)]++; } - if (unique(bb.prev_blocks()).size() > 1) { + if (const auto pins = std::get_if(&cmd)) { + res[pins->is64 ? "arith64" : "arith32"]++; + } + res[instype(cmd)]++; + if (unique(value.prev_labels()).size() > 1) { res["joins"]++; } - if (unique(bb.prev_blocks()).size() > 1) { + if (unique(value.prev_labels()).size() > 1) { res["jumps"]++; } } return res; } - -cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const prepare_cfg_options& options) { - // Convert the instruction sequence to a deterministic control-flow graph. - cfg_t det_cfg = instruction_seq_to_cfg(prog, options.must_have_exit); - - // Detect loops using Weak Topological Ordering (WTO) and insert counters at loop entry points. WTO provides a - // hierarchical decomposition of the CFG that identifies all strongly connected components (cycles) and their entry - // points. These entry points serve as natural locations for loop counters that help verify program termination. - if (options.check_for_termination) { - const wto_t wto(det_cfg); - wto.for_each_loop_head( - [&](const label_t& label) { det_cfg.get_node(label).insert_front({.cmd = IncrementLoopCounter{label}}); }); - } - - // Annotate the CFG by adding in assertions before every memory instruction. - explicate_assertions(det_cfg, info); - - // Translate conditional jumps to non-deterministic jumps. - cfg_t cfg = to_nondet(det_cfg); - - // Except when debugging, combine chains of instructions into - // basic blocks where possible, i.e., into a range of instructions - // where there is a single entry point and a single exit point. - // An abstract interpreter will keep values at every basic block, - // so the fewer basic blocks we have, the less information it has to - // keep track of. - if (options.simplify) { - cfg.simplify(); - } - - return cfg; -} diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 9ce4d2f09..953435b70 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -6,7 +6,6 @@ #include #include "asm_marshal.hpp" -#include "asm_ostream.hpp" #include "crab_utils/num_safety.hpp" using std::vector; @@ -293,7 +292,7 @@ struct MarshalVisitor { }; vector marshal(const Instruction& ins, const pc_t pc) { - return std::visit(MarshalVisitor{label_to_offset16(pc), label_to_offset32(pc)}, ins); + return std::visit(MarshalVisitor{crab::label_to_offset16(pc), crab::label_to_offset32(pc)}, ins); } static int size(const Instruction& inst) { diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index f76e023e1..ddd7022c6 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -6,12 +6,12 @@ #include #include -#include "asm_ostream.hpp" #include "asm_syntax.hpp" #include "crab/cfg.hpp" #include "crab/interval.hpp" #include "crab/type_encoding.hpp" #include "crab/variable.hpp" +#include "crab_utils/num_big.hpp" #include "helpers.hpp" #include "platform.hpp" #include "spec_type_descriptors.hpp" @@ -21,6 +21,113 @@ using std::optional; using std::string; using std::vector; +namespace crab { + +std::string number_t::to_string() const { return _n.str(); } + +std::string interval_t::to_string() const { + std::ostringstream s; + s << *this; + return s.str(); +} + +std::ostream& operator<<(std::ostream& os, const label_t& label) { + if (label == label_t::entry) { + return os << "entry"; + } + if (label == label_t::exit) { + return os << "exit"; + } + if (!label.stack_frame_prefix.empty()) { + os << label.stack_frame_prefix << STACK_FRAME_DELIMITER; + } + os << label.from; + if (label.to != -1) { + os << ":" << label.to; + } + if (!label.special_label.empty()) { + os << " (" << label.special_label << ")"; + } + return os; +} + +string to_string(label_t const& label) { + std::stringstream str; + str << label; + return str.str(); +} + +void print_dot(const cfg_t& cfg, std::ostream& out) { + out << "digraph program {\n"; + out << " node [shape = rectangle];\n"; + for (const auto& label : cfg.labels()) { + out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; + + const auto& value = cfg.get_node(label); + const auto& ins = value.instruction(); + for (const auto& pre : ins.preconditions) { + out << "assert " << pre << "\\l"; + } + out << ins.cmd << "\\l"; + + out << "\"];\n"; + for (const label_t& next : value.next_labels_set()) { + out << " \"" << label << "\" -> \"" << next << "\";\n"; + } + out << "\n"; + } + out << "}\n"; +} + +void print_dot(const cfg_t& cfg, const std::string& outfile) { + std::ofstream out{outfile}; + if (out.fail()) { + throw std::runtime_error(std::string("Could not open file ") + outfile); + } + print_dot(cfg, out); +} + +std::ostream& operator<<(std::ostream& o, const value_t& value) { + o << value.label() << ":\n"; + const auto ins = value.instruction(); + for (const auto& pre : ins.preconditions) { + o << " " + << "assert " << pre << ";\n"; + } + o << " " << ins.cmd << ";\n"; + auto [it, et] = value.next_labels(); + if (it != et) { + o << " " + << "goto "; + while (it != et) { + o << *it; + ++it; + if (it == et) { + o << ";"; + } else { + o << ","; + } + } + } + o << "\n"; + return o; +} + +std::ostream& operator<<(std::ostream& o, const cfg_t& cfg) { + for (const label_t& label : cfg.sorted_labels()) { + o << cfg.get_node(label); + o << "edges to:"; + for (const label_t& next_label : cfg.next_nodes(label)) { + o << " " << next_label; + } + o << "\n"; + } + return o; +} + +} // namespace crab + +namespace asm_syntax { std::ostream& operator<<(std::ostream& os, const ArgSingle::Kind kind) { switch (kind) { case ArgSingle::Kind::ANYTHING: return os << "uint64_t"; @@ -355,12 +462,6 @@ struct CommandPrinterVisitor { }; // ReSharper restore CppMemberFunctionMayBeConst -string to_string(label_t const& label) { - std::stringstream str; - str << label; - return str.str(); -} - std::ostream& operator<<(std::ostream& os, Instruction const& ins) { std::visit(CommandPrinterVisitor{os}, ins); return os; @@ -433,7 +534,7 @@ void print(const InstructionSeq& insts, std::ostream& out, const std::optional(&ins)) { if (!pc_of_label.contains(jmp->target)) { - throw std::runtime_error(string("Cannot find label ") + to_string(jmp->target)); + throw std::runtime_error(string("Cannot find label ") + crab::to_string(jmp->target)); } const pc_t target_pc = pc_of_label.at(jmp->target); visitor(*jmp, target_pc - static_cast(pc) - 1); @@ -446,6 +547,8 @@ void print(const InstructionSeq& insts, std::ostream& out, const std::optional& descriptors, st } } -void print_dot(const cfg_t& cfg, std::ostream& out) { - out << "digraph program {\n"; - out << " node [shape = rectangle];\n"; - for (const auto& label : cfg.labels()) { - out << " \"" << label << "\"[xlabel=\"" << label << "\",label=\""; - - const auto& bb = cfg.get_node(label); - for (const auto& ins : bb) { - for (const auto& pre : ins.preconditions) { - out << "assert " << pre << "\\l"; - } - out << ins.cmd << "\\l"; - } - - out << "\"];\n"; - for (const label_t& next : bb.next_blocks_set()) { - out << " \"" << label << "\" -> \"" << next << "\";\n"; - } - out << "\n"; - } - out << "}\n"; -} - -void print_dot(const cfg_t& cfg, const std::string& outfile) { - std::ofstream out{outfile}; - if (out.fail()) { - throw std::runtime_error(std::string("Could not open file ") + outfile); - } - print_dot(cfg, out); -} - -std::ostream& operator<<(std::ostream& o, const basic_block_t& bb) { - o << bb.label() << ":\n"; - for (const auto& s : bb) { - for (const auto& pre : s.preconditions) { - o << " " - << "assert " << pre << ";\n"; - } - o << " " << s.cmd << ";\n"; - } - auto [it, et] = bb.next_blocks(); - if (it != et) { - o << " " - << "goto "; - while (it != et) { - o << *it; - ++it; - if (it == et) { - o << ";"; - } else { - o << ","; - } - } - } - o << "\n"; - return o; -} - -std::ostream& operator<<(std::ostream& o, const crab::basic_block_rev_t& bb) { - o << bb.label() << ":\n"; - for (const auto& s : bb) { - for (const auto& pre : s.preconditions) { - o << " " - << "assert " << pre << ";\n"; - } - o << " " << s.cmd << ";\n"; - } - o << "--> ["; - for (const label_t& label : bb.next_blocks_set()) { - o << label << ";"; - } - o << "]\n"; - return o; -} - -std::ostream& operator<<(std::ostream& o, const cfg_t& cfg) { - for (const label_t& label : cfg.sorted_labels()) { - o << cfg.get_node(label); - o << "edges to:"; - for (const label_t& next_label : cfg.next_nodes(label)) { - o << " " << next_label; - } - o << "\n"; - } - return o; -} - std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info) { os << "; " << line_info.file_name << ":" << line_info.line_number << "\n"; os << "; " << line_info.source_line << "\n"; return os; } - -std::string crab::number_t::to_string() const { return _n.str(); } - -std::string crab::interval_t::to_string() const { - std::ostringstream s; - s << *this; - return s.str(); -} diff --git a/src/asm_ostream.hpp b/src/asm_ostream.hpp deleted file mode 100644 index 83d210cd9..000000000 --- a/src/asm_ostream.hpp +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) Prevail Verifier contributors. -// SPDX-License-Identifier: MIT -#pragma once - -#include -#include -#include - -#include - -#include "asm_syntax.hpp" -#include "crab_utils/num_safety.hpp" - -// We use a 16-bit offset whenever it fits in 16 bits. -inline std::function label_to_offset16(const pc_t pc) { - return [=](const label_t& label) { - const int64_t offset = label.from - gsl::narrow(pc) - 1; - const bool is16 = - std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); - return is16 ? gsl::narrow(offset) : 0; - }; -} - -// We use the JA32 opcode with the offset in 'imm' when the offset -// of an unconditional jump doesn't fit in an int16_t. -inline std::function label_to_offset32(const pc_t pc) { - return [=](const label_t& label) { - const int64_t offset = label.from - gsl::narrow(pc) - 1; - const bool is16 = - std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); - return is16 ? 0 : gsl::narrow(offset); - }; -} - -std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info); - -void print(const InstructionSeq& insts, std::ostream& out, const std::optional& label_to_print, - bool print_line_info = false); - -std::string to_string(label_t const& label); - -std::ostream& operator<<(std::ostream& os, Instruction const& ins); -std::string to_string(Instruction const& ins); - -std::ostream& operator<<(std::ostream& os, Bin::Op op); -std::ostream& operator<<(std::ostream& os, Condition::Op op); - -inline std::ostream& operator<<(std::ostream& os, const Imm imm) { return os << crab::to_signed(imm.v); } -inline std::ostream& operator<<(std::ostream& os, Reg const& a) { return os << "r" << gsl::narrow(a.v); } -inline std::ostream& operator<<(std::ostream& os, Value const& a) { - if (const auto pa = std::get_if(&a)) { - return os << *pa; - } - return os << std::get(a); -} - -std::ostream& operator<<(std::ostream& os, const Assertion& a); -std::string to_string(const Assertion& constraint); diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 792d6ef43..50c99efdc 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -208,8 +208,12 @@ Instruction parse_instruction(const std::string& line, const std::map +#include #include #include #include #include #include -#include "crab/variable.hpp" +#include "crab/label.hpp" +#include "crab/type_encoding.hpp" +#include "crab_utils/num_safety.hpp" #include "spec_type_descriptors.hpp" -constexpr char STACK_FRAME_DELIMITER = '/'; - -namespace crab { -struct label_t { - int from; ///< Jump source, or simply index of instruction - int to; ///< Jump target or -1 - std::string stack_frame_prefix; ///< Variable prefix when calling this label. - - explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept - : from(index), to(to), stack_frame_prefix(std::move(stack_frame_prefix)) {} - - static label_t make_jump(const label_t& src_label, const label_t& target_label) { - return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; - } - - bool operator==(const label_t& other) const noexcept = default; - - constexpr bool operator<(const label_t& other) const { - if (this == &other) { - return false; - } - if (*this == label_t::exit) { - return false; - } - if (other == label_t::exit) { - return true; - } - return (stack_frame_prefix < other.stack_frame_prefix || - (stack_frame_prefix == other.stack_frame_prefix && - (from < other.from || (from == other.from && to < other.to)))); - } - - // no hash; intended for use in ordered containers. - - [[nodiscard]] - constexpr bool isjump() const { - return to != -1; - } - - [[nodiscard]] - int call_stack_depth() const { - // The call stack depth is the number of '/' separated components in the label, - // which is one more than the number of '/' separated components in the prefix, - // hence two more than the number of '/' in the prefix, if any. - if (stack_frame_prefix.empty()) { - return 1; - } - return 2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER); - } - - friend std::ostream& operator<<(std::ostream& os, const label_t& label) { - if (label == entry) { - return os << "entry"; - } - if (label == exit) { - return os << "exit"; - } - if (!label.stack_frame_prefix.empty()) { - os << label.stack_frame_prefix << STACK_FRAME_DELIMITER; - } - if (label.to == -1) { - return os << label.from; - } - return os << label.from << ":" << label.to; - } - - static const label_t entry; - static const label_t exit; -}; - -inline const label_t label_t::entry{-1}; -inline const label_t label_t::exit{-2}; - -} // namespace crab using crab::label_t; // Assembly syntax. @@ -307,6 +236,10 @@ struct Undefined { /// the branch and before each jump target. struct Assume { Condition cond; + + // True if the condition is explicitly written in the program (for tests). + bool is_explicit{}; + constexpr bool operator==(const Assume&) const = default; }; @@ -426,12 +359,31 @@ struct GuardedInstruction { bool operator==(const GuardedInstruction&) const = default; }; -// cpu=v4 supports 32-bit PC offsets so we need a large enough type. -using pc_t = uint32_t; +std::ostream& operator<<(std::ostream& os, Instruction const& ins); +std::string to_string(Instruction const& ins); + +std::ostream& operator<<(std::ostream& os, Bin::Op op); +std::ostream& operator<<(std::ostream& os, Condition::Op op); + +inline std::ostream& operator<<(std::ostream& os, const Imm imm) { return os << crab::to_signed(imm.v); } +inline std::ostream& operator<<(std::ostream& os, Reg const& a) { return os << "r" << gsl::narrow(a.v); } +inline std::ostream& operator<<(std::ostream& os, Value const& a) { + if (const auto pa = std::get_if(&a)) { + return os << *pa; + } + return os << std::get(a); +} + +std::ostream& operator<<(std::ostream& os, const Assertion& a); +std::string to_string(const Assertion& constraint); + +void print(const InstructionSeq& insts, std::ostream& out, const std::optional& label_to_print, + bool print_line_info = false); } // namespace asm_syntax using namespace asm_syntax; +using crab::pc_t; template struct overloaded : Ts... { diff --git a/src/assertions.cpp b/src/assertions.cpp index 96ce593e5..faebc9579 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -38,7 +38,7 @@ class AssertExtractor { : info{std::move(info)}, current_label(label) {} vector operator()(const Undefined&) const { - assert(false); + // assert(false); return {}; } @@ -176,7 +176,12 @@ class AssertExtractor { return res; } - vector operator()(const Assume& ins) const { return explicate(ins.cond); } + vector operator()(const Assume& ins) const { + if (ins.is_explicit) { + return explicate(ins.cond); + } + return {}; + } vector operator()(const Jmp& ins) const { if (!ins.cond) { @@ -298,10 +303,9 @@ vector get_assertions(Instruction ins, const program_info& info, cons /// regions. The verifier will use these assertions to treat the program as /// unsafe unless it can prove that the assertions can never fail. void explicate_assertions(cfg_t& cfg, const program_info& info) { - for (auto& [label, bb] : cfg) { + for (auto& [label, value] : cfg) { (void)label; // unused - for (auto& ins : bb) { - ins.preconditions = get_assertions(ins.cmd, info, bb.label()); - } + auto& ins = value.instruction(); + ins.preconditions = get_assertions(ins.cmd, info, value.label()); } } diff --git a/src/config.hpp b/src/config.hpp index b109a35c8..09d5d919e 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -8,6 +8,9 @@ struct ebpf_verifier_options_t { // Options that control how the control flow graph is built. prepare_cfg_options cfg_opts; + /// When true, simplifies the control flow graph by merging basic blocks. + bool simplify = true; + // True to assume prior failed assertions are true and continue verification. bool assume_assertions = false; @@ -20,7 +23,7 @@ struct ebpf_verifier_options_t { // True to allow division by zero and assume BPF ISA defined semantics. bool allow_division_by_zero = true; - // Setup the entry constraints for a BPF program. + // Set up the entry constraints for a BPF program. bool setup_constraints = true; // True if the ELF file is built on a big endian system. diff --git a/src/crab/cfg.hpp b/src/crab/cfg.hpp index 8dd6e9ba0..678d1312e 100644 --- a/src/crab/cfg.hpp +++ b/src/crab/cfg.hpp @@ -3,14 +3,7 @@ #pragma once /* - * Build a CFG to interface with the abstract domains and fixpoint - * iterators. - * - * All the CFG statements are strongly typed. However, only variables - * need to be typed. The types of constants can be inferred from the - * context since they always appear together with at least one - * variable. - * + * Build a CFG to interface with the abstract domains and fixpoint iterators. */ #include #include @@ -22,7 +15,6 @@ #include #include -#include "asm_ostream.hpp" #include "asm_syntax.hpp" #include "crab_utils/debug.hpp" #include "crab_utils/num_big.hpp" @@ -32,103 +24,68 @@ namespace crab { class cfg_t; -class basic_block_t final { +// Node type for the CFG +class value_t final { friend class cfg_t; public: - basic_block_t(const basic_block_t&) = delete; + value_t(const value_t&) = delete; using label_vec_t = std::set; - using stmt_list_t = std::vector; using neighbour_const_iterator = label_vec_t::const_iterator; using neighbour_const_reverse_iterator = label_vec_t::const_reverse_iterator; - using iterator = stmt_list_t::iterator; - using const_iterator = stmt_list_t::const_iterator; - using reverse_iterator = stmt_list_t::reverse_iterator; - using const_reverse_iterator = stmt_list_t::const_reverse_iterator; private: label_t m_label; - stmt_list_t m_ts; + GuardedInstruction m_instruction{.cmd = Undefined{}}; label_vec_t m_prev, m_next; public: - void insert(const GuardedInstruction& arg) { - assert(label() != label_t::entry); - assert(label() != label_t::exit); - m_ts.push_back(arg); - } + explicit value_t(label_t _label) : m_label{std::move(_label)} {} - /// Insert a GuardedInstruction at the front of the basic block. - /// @note Cannot modify entry or exit blocks. - void insert_front(const GuardedInstruction& arg) { - assert(label() != label_t::entry); - assert(label() != label_t::exit); - m_ts.insert(m_ts.begin(), arg); - } - - explicit basic_block_t(label_t _label) : m_label(std::move(_label)) {} - - ~basic_block_t() = default; + ~value_t() = default; [[nodiscard]] label_t label() const { return m_label; } - iterator begin() { return (m_ts.begin()); } - iterator end() { return (m_ts.end()); } [[nodiscard]] - const_iterator begin() const { - return (m_ts.begin()); - } - [[nodiscard]] - const_iterator end() const { - return (m_ts.end()); + GuardedInstruction& instruction() { + return m_instruction; } - reverse_iterator rbegin() { return (m_ts.rbegin()); } - reverse_iterator rend() { return (m_ts.rend()); } [[nodiscard]] - const_reverse_iterator rbegin() const { - return (m_ts.rbegin()); - } - [[nodiscard]] - const_reverse_iterator rend() const { - return (m_ts.rend()); - } - - [[nodiscard]] - size_t size() const { - return gsl::narrow(std::distance(begin(), end())); + const GuardedInstruction& instruction() const { + return m_instruction; } [[nodiscard]] - std::pair next_blocks() const { + std::pair next_labels() const { return std::make_pair(m_next.begin(), m_next.end()); } [[nodiscard]] - std::pair next_blocks_reversed() const { + std::pair next_labels_reversed() const { return std::make_pair(m_next.rbegin(), m_next.rend()); } [[nodiscard]] - std::pair prev_blocks() const { + std::pair prev_labels() const { return std::make_pair(m_prev.begin(), m_prev.end()); } [[nodiscard]] - const label_vec_t& next_blocks_set() const { + const label_vec_t& next_labels_set() const { return m_next; } [[nodiscard]] - const label_vec_t& prev_blocks_set() const { + const label_vec_t& prev_labels_set() const { return m_prev; } // Add a cfg_t edge from *this to b - void operator>>(basic_block_t& b) { + void operator>>(value_t& b) { assert(b.label() != label_t::entry); assert(this->label() != label_t::exit); m_next.insert(b.m_label); @@ -136,17 +93,11 @@ class basic_block_t final { } // Remove a cfg_t edge from *this to b - void operator-=(basic_block_t& b) { + void operator-=(value_t& b) { m_next.erase(b.m_label); b.m_prev.erase(m_label); } - // insert all statements of other at the back - void move_back(basic_block_t& other) { - m_ts.reserve(m_ts.size() + other.m_ts.size()); - std::ranges::move(other.m_ts, std::back_inserter(m_ts)); - } - [[nodiscard]] size_t in_degree() const { return m_prev.size(); @@ -156,108 +107,47 @@ class basic_block_t final { size_t out_degree() const { return m_next.size(); } - - void swap_instructions(stmt_list_t& ts) { std::swap(m_ts, ts); } -}; - -// Viewing basic_block_t with all statements reversed. Useful for -// backward analysis. -class basic_block_rev_t final { - public: - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; - - using iterator = basic_block_t::reverse_iterator; - using const_iterator = basic_block_t::const_reverse_iterator; - - public: - basic_block_t& _bb; - - explicit basic_block_rev_t(basic_block_t& bb) : _bb(bb) {} - - [[nodiscard]] - label_t label() const { - return _bb.label(); - } - - iterator begin() { return _bb.rbegin(); } - - iterator end() { return _bb.rend(); } - - [[nodiscard]] - const_iterator begin() const { - return _bb.rbegin(); - } - - [[nodiscard]] - const_iterator end() const { - return _bb.rend(); - } - - [[nodiscard]] - std::size_t size() const { - return gsl::narrow(std::distance(begin(), end())); - } - - [[nodiscard]] - std::pair next_blocks() const { - return _bb.prev_blocks(); - } - - [[nodiscard]] - std::pair prev_blocks() const { - return _bb.next_blocks(); - } - - [[nodiscard]] - const basic_block_t::label_vec_t& next_blocks_set() const { - return _bb.prev_blocks_set(); - } - - [[nodiscard]] - const basic_block_t::label_vec_t& prev_blocks_set() const { - return _bb.next_blocks_set(); - } }; -/// Control-Flow Graph. +/// Control-Flow Graph class cfg_t final { public: using node_t = label_t; // for Bgl graphs - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; - using neighbour_const_reverse_iterator = basic_block_t::neighbour_const_reverse_iterator; + using neighbour_const_iterator = value_t::neighbour_const_iterator; + using neighbour_const_reverse_iterator = value_t::neighbour_const_reverse_iterator; using neighbour_const_range = boost::iterator_range; using neighbour_const_reverse_range = boost::iterator_range; private: - using basic_block_map_t = std::map; - using binding_t = basic_block_map_t::value_type; + using map_t = std::map; + using binding_t = map_t::value_type; struct get_label { label_t operator()(const binding_t& p) const { return p.second.label(); } }; public: - using iterator = basic_block_map_t::iterator; - using const_iterator = basic_block_map_t::const_iterator; - using label_iterator = boost::transform_iterator; - using const_label_iterator = boost::transform_iterator; + using iterator = map_t::iterator; + using const_iterator = map_t::const_iterator; + using label_iterator = boost::transform_iterator; + using const_label_iterator = boost::transform_iterator; private: - basic_block_map_t m_blocks; + map_t m_map; using visited_t = std::set; public: cfg_t() { - m_blocks.emplace(entry_label(), entry_label()); - m_blocks.emplace(exit_label(), exit_label()); + m_map.emplace(entry_label(), entry_label()); + m_map.emplace(exit_label(), exit_label()); } cfg_t(const cfg_t&) = delete; - cfg_t(cfg_t&& o) noexcept : m_blocks(std::move(o.m_blocks)) {} + cfg_t(cfg_t&& o) noexcept : m_map(std::move(o.m_map)) {} ~cfg_t() = default; @@ -275,45 +165,89 @@ class cfg_t final { [[nodiscard]] neighbour_const_range next_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_blocks()); + return boost::make_iterator_range(get_node(_label).next_labels()); } + [[nodiscard]] neighbour_const_reverse_range next_nodes_reversed(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).next_blocks_reversed()); + return boost::make_iterator_range(get_node(_label).next_labels_reversed()); } [[nodiscard]] neighbour_const_range prev_nodes(const label_t& _label) const { - return boost::make_iterator_range(get_node(_label).prev_blocks()); + return boost::make_iterator_range(get_node(_label).prev_labels()); } - basic_block_t& get_node(const label_t& _label) { - auto it = m_blocks.find(_label); - if (it == m_blocks.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); + value_t& get_node(const label_t& _label) { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } - [[nodiscard]] - const basic_block_t& get_node(const label_t& _label) const { - auto it = m_blocks.find(_label); - if (it == m_blocks.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); + const value_t& get_node(const label_t& _label) const { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); } return it->second; } + GuardedInstruction& at(const label_t& _label) { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + } + return it->second.instruction(); + } + + [[nodiscard]] + const GuardedInstruction& at(const label_t& _label) const { + const auto it = m_map.find(_label); + if (it == m_map.end()) { + CRAB_ERROR("Label ", to_string(_label), " not found in the CFG: "); + } + return it->second.instruction(); + } + // --- End ikos fixpoint API - basic_block_t& insert(const label_t& _label) { - auto it = m_blocks.find(_label); - if (it != m_blocks.end()) { + value_t& insert_after(const label_t& prev_label, const label_t& new_label, const Instruction& _ins) { + value_t& res = insert(new_label, GuardedInstruction{.cmd = _ins}); + value_t& prev = get_node(prev_label); + std::vector nexts; + for (const label_t& next : prev.next_labels_set()) { + nexts.push_back(next); + } + prev.m_next.clear(); + + std::vector prevs; + for (const label_t& next_label : nexts) { + get_node(next_label).m_prev.erase(prev_label); + } + + for (const label_t& next : nexts) { + get_node(prev_label) >> res; + res >> get_node(next); + } + return res; + } + + value_t& insert(const label_t& _label, const Instruction& _ins) { + return insert(_label, GuardedInstruction{.cmd = _ins}); + } + + value_t& insert(const label_t& _label, GuardedInstruction&& _ins) { + const auto it = m_map.find(_label); + if (it != m_map.end()) { return it->second; } - m_blocks.emplace(_label, _label); - return get_node(_label); + m_map.emplace(_label, _label); + value_t& v = get_node(_label); + v.m_instruction = std::move(_ins); + return v; } void remove(const label_t& _label) { @@ -325,16 +259,16 @@ class cfg_t final { CRAB_ERROR("Cannot remove exit block"); } - std::vector> dead_edges; + std::vector> dead_edges; auto& bb = get_node(_label); - for (const auto& id : boost::make_iterator_range(bb.prev_blocks())) { + for (const auto& id : boost::make_iterator_range(bb.prev_labels())) { if (_label != id) { dead_edges.emplace_back(&get_node(id), &bb); } } - for (const auto& id : boost::make_iterator_range(bb.next_blocks())) { + for (const auto& id : boost::make_iterator_range(bb.next_labels())) { if (_label != id) { dead_edges.emplace_back(&bb, &get_node(id)); } @@ -344,37 +278,37 @@ class cfg_t final { *p.first -= *p.second; } - m_blocks.erase(_label); + m_map.erase(_label); } //! return a begin iterator of basic_block_t's - iterator begin() { return m_blocks.begin(); } + iterator begin() { return m_map.begin(); } //! return an end iterator of basic_block_t's - iterator end() { return m_blocks.end(); } + iterator end() { return m_map.end(); } [[nodiscard]] const_iterator begin() const { - return m_blocks.begin(); + return m_map.begin(); } [[nodiscard]] const_iterator end() const { - return m_blocks.end(); + return m_map.end(); } //! return a begin iterator of label_t's - const_label_iterator label_begin() const { return boost::make_transform_iterator(m_blocks.begin(), get_label()); } + const_label_iterator label_begin() const { return boost::make_transform_iterator(m_map.begin(), get_label()); } //! return an end iterator of label_t's - const_label_iterator label_end() const { return boost::make_transform_iterator(m_blocks.end(), get_label()); } + const_label_iterator label_end() const { return boost::make_transform_iterator(m_map.end(), get_label()); } //! return a begin iterator of label_t's [[nodiscard]] std::vector labels() const { std::vector res; - res.reserve(m_blocks.size()); - for (const auto& p : m_blocks) { + res.reserve(m_map.size()); + for (const auto& p : m_map) { res.push_back(p.first); } return res; @@ -385,41 +319,6 @@ class cfg_t final { return gsl::narrow(std::distance(begin(), end())); } - void simplify() { - std::set worklist(this->label_begin(), this->label_end()); - while (!worklist.empty()) { - label_t label = *worklist.begin(); - worklist.erase(label); - - basic_block_t& bb = get_node(label); - if (bb.in_degree() == 1 && get_parent(label).out_degree() == 1) { - continue; - } - while (bb.out_degree() == 1) { - basic_block_t& next_bb = get_child(label); - - if (&next_bb == &bb || next_bb.in_degree() != 1) { - break; - } - if (next_bb.label() == exit_label()) { - break; - } - worklist.erase(next_bb.label()); - - bb.move_back(next_bb); - bb -= next_bb; - auto children = next_bb.m_next; - for (const label_t& next_next_label : children) { - basic_block_t& next_next_bb = get_node(next_next_label); - bb >> next_next_bb; - } - - // delete next_bb entirely - remove(next_bb.label()); - } - } - } - [[nodiscard]] std::vector sorted_labels() const { std::vector labels = this->labels(); @@ -427,30 +326,30 @@ class cfg_t final { return labels; } + value_t& get_child(const label_t& b) { + assert(has_one_child(b)); + const auto rng = next_nodes(b); + return get_node(*rng.begin()); + } + + value_t& get_parent(const label_t& b) { + assert(has_one_parent(b)); + const auto rng = prev_nodes(b); + return get_node(*rng.begin()); + } + private: // Helpers [[nodiscard]] bool has_one_child(const label_t& b) const { - auto rng = next_nodes(b); - return (std::distance(rng.begin(), rng.end()) == 1); + const auto rng = next_nodes(b); + return std::distance(rng.begin(), rng.end()) == 1; } [[nodiscard]] bool has_one_parent(const label_t& b) const { - auto rng = prev_nodes(b); - return (std::distance(rng.begin(), rng.end()) == 1); - } - - basic_block_t& get_child(const label_t& b) { - assert(has_one_child(b)); - auto rng = next_nodes(b); - return get_node(*(rng.begin())); - } - - basic_block_t& get_parent(const label_t& b) { - assert(has_one_parent(b)); - auto rng = prev_nodes(b); - return get_node(*(rng.begin())); + const auto rng = prev_nodes(b); + return std::distance(rng.begin(), rng.end()) == 1; } // mark reachable blocks from curId @@ -466,142 +365,130 @@ class cfg_t final { } void remove_unreachable_blocks(); - - // remove blocks that cannot reach the exit block - void remove_useless_blocks(); }; -// Viewing a cfg_t with all edges and block statements reversed. Useful for backward analysis. -class cfg_rev_t final { - public: - using node_t = label_t; // for Bgl graphs - - using neighbour_const_range = cfg_t::neighbour_const_range; +class basic_block_t final { + friend class cfg_t; - // For BGL - using neighbour_const_iterator = basic_block_t::neighbour_const_iterator; + public: + basic_block_t(const basic_block_t&) = delete; - using basic_block_rev_map_t = std::map; - using iterator = basic_block_rev_map_t::iterator; - using const_iterator = basic_block_rev_map_t::const_iterator; - using label_iterator = cfg_t::label_iterator; - using const_label_iterator = cfg_t::const_label_iterator; + using label_vec_t = std::set; + using stmt_list_t = std::vector; + using iterator = stmt_list_t::iterator; + using const_iterator = stmt_list_t::const_iterator; + using reverse_iterator = stmt_list_t::reverse_iterator; + using const_reverse_iterator = stmt_list_t::const_reverse_iterator; private: - cfg_t& _cfg; - basic_block_rev_map_t _rev_bbs; + label_t m_label; + stmt_list_t m_ts; public: - explicit cfg_rev_t(cfg_t& cfg) : _cfg(cfg) { - // Create basic_block_rev_t from basic_block_t objects - // Note that basic_block_rev_t is also a view of basic_block_t so it - // doesn't modify basic_block_t objects. - for (auto& [label, bb] : cfg) { - _rev_bbs.emplace(label, bb); - } - } + static std::map collect_basic_blocks(cfg_t& cfg) { + std::map res; - cfg_rev_t(const cfg_rev_t& o) = default; + std::set worklist(cfg.label_begin(), cfg.label_end()); + std::set seen; + while (!worklist.empty()) { + label_t label = *worklist.begin(); + worklist.erase(label); + if (seen.contains(label)) { + continue; + } + seen.insert(label); - cfg_rev_t(cfg_rev_t&& o) noexcept : _cfg(o._cfg), _rev_bbs(std::move(o._rev_bbs)) {} + const value_t& value = cfg.get_node(label); + if (value.in_degree() == 1 && cfg.get_parent(label).out_degree() == 1) { + continue; + } + res.emplace(label, label); + basic_block_t& bb = res.at(label); + while (value.out_degree() == 1) { + value_t& next_value = cfg.get_child(label); - [[nodiscard]] - label_t entry_label() const { - return _cfg.exit_label(); - } + if (&next_value == &value || next_value.in_degree() != 1) { + break; + } + if (next_value.label() == cfg.exit_label()) { + break; + } + worklist.erase(next_value.label()); - [[nodiscard]] - neighbour_const_range next_nodes(const label_t& bb) const { - return _cfg.prev_nodes(bb); + bb.m_ts.push_back(&next_value.instruction()); + + // delete next_bb entirely + // remove(next_value.label()); + seen.insert(next_value.label()); + } + } + return res; } - [[nodiscard]] - neighbour_const_range prev_nodes(const label_t& bb) const { - return _cfg.next_nodes(bb); + void insert(GuardedInstruction* arg) { + assert(label() != label_t::entry); + assert(label() != label_t::exit); + m_ts.push_back(arg); } - neighbour_const_range next_nodes(const label_t& bb) { return _cfg.prev_nodes(bb); } + /// Insert a GuardedInstruction at the front of the basic block. + /// @note Cannot modify entry or exit blocks. + void insert_front(GuardedInstruction* arg) { + assert(label() != label_t::entry); + assert(label() != label_t::exit); + m_ts.insert(m_ts.begin(), arg); + } - neighbour_const_range prev_nodes(const label_t& bb) { return _cfg.next_nodes(bb); } + explicit basic_block_t(label_t _label) : m_label(std::move(_label)) {} - basic_block_rev_t& get_node(const label_t& _label) { - auto it = _rev_bbs.find(_label); - if (it == _rev_bbs.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); - } - return it->second; - } + ~basic_block_t() = default; [[nodiscard]] - const basic_block_rev_t& get_node(const label_t& _label) const { - auto it = _rev_bbs.find(_label); - if (it == _rev_bbs.end()) { - CRAB_ERROR("Basic block ", _label, " not found in the CFG: ", __LINE__); - } - return it->second; + label_t label() const { + return m_label; } - iterator begin() { return _rev_bbs.begin(); } - - iterator end() { return _rev_bbs.end(); } - + iterator begin() { return (m_ts.begin()); } + iterator end() { return (m_ts.end()); } [[nodiscard]] const_iterator begin() const { - return _rev_bbs.begin(); + return m_ts.begin(); } - [[nodiscard]] const_iterator end() const { - return _rev_bbs.end(); + return m_ts.end(); } - const_label_iterator label_begin() const { return _cfg.label_begin(); } - - const_label_iterator label_end() const { return _cfg.label_end(); } - + reverse_iterator rbegin() { return (m_ts.rbegin()); } + reverse_iterator rend() { return (m_ts.rend()); } [[nodiscard]] - label_t exit_label() const { - return _cfg.entry_label(); + const_reverse_iterator rbegin() const { + return m_ts.rbegin(); } -}; - -inline void cfg_t::remove_useless_blocks() { - cfg_rev_t rev_cfg(*this); - - visited_t useful, useless; - mark_alive_blocks(rev_cfg.entry_label(), rev_cfg, useful); - - if (!useful.contains(exit_label())) { - CRAB_ERROR("Exit block must be reachable"); + [[nodiscard]] + const_reverse_iterator rend() const { + return m_ts.rend(); } - for (const auto& label : labels()) { - if (!useful.contains(label)) { - useless.insert(label); - } + + [[nodiscard]] + size_t size() const { + return gsl::narrow(std::distance(begin(), end())); } - for (const auto& _label : useless) { - remove(_label); + // insert all statements of other at the back + void move_back(basic_block_t& other) { + m_ts.reserve(m_ts.size() + other.m_ts.size()); + std::ranges::move(other.m_ts, std::back_inserter(m_ts)); } -} -inline void cfg_t::remove_unreachable_blocks() { - visited_t alive, dead; - mark_alive_blocks(entry_label(), *this, alive); + void swap_instructions(stmt_list_t& ts) { std::swap(m_ts, ts); } +}; - for (const auto& label : labels()) { - if (!alive.contains(label)) { - dead.insert(label); - } - } +void print_dot(const cfg_t& cfg, std::ostream& out); +void print_dot(const cfg_t& cfg, const std::string& outfile); - if (dead.contains(exit_label())) { - CRAB_ERROR("Exit block must be reachable"); - } - for (const auto& _label : dead) { - remove(_label); - } -} +std::ostream& operator<<(std::ostream& o, const value_t& value); +std::ostream& operator<<(std::ostream& o, const cfg_t& cfg); } // end namespace crab @@ -613,8 +500,6 @@ std::vector stats_headers(); std::map collect_stats(const cfg_t&); struct prepare_cfg_options { - /// When true, simplifies the control flow graph by merging basic blocks. - bool simplify = true; /// When true, verifies that the program terminates. bool check_for_termination = false; /// When true, ensures the program has a valid exit block. @@ -625,10 +510,3 @@ cfg_t prepare_cfg(const InstructionSeq& prog, const program_info& info, const pr void explicate_assertions(cfg_t& cfg, const program_info& info); std::vector get_assertions(Instruction ins, const program_info& info, const std::optional& label); - -void print_dot(const cfg_t& cfg, std::ostream& out); -void print_dot(const cfg_t& cfg, const std::string& outfile); - -std::ostream& operator<<(std::ostream& o, const basic_block_t& bb); -std::ostream& operator<<(std::ostream& o, const crab::basic_block_rev_t& bb); -std::ostream& operator<<(std::ostream& o, const cfg_t& cfg); diff --git a/src/crab/ebpf_checker.cpp b/src/crab/ebpf_checker.cpp new file mode 100644 index 000000000..4d734780d --- /dev/null +++ b/src/crab/ebpf_checker.cpp @@ -0,0 +1,444 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT + +// This file is eBPF-specific, not derived from CRAB. + +#include +#include +#include + +#include "asm_syntax.hpp" +#include "asm_unmarshal.hpp" +#include "config.hpp" +#include "crab/array_domain.hpp" +#include "crab/ebpf_domain.hpp" +#include "crab_utils/num_safety.hpp" +#include "dsl_syntax.hpp" +#include "platform.hpp" +#include "string_constraints.hpp" + +using crab::domains::NumAbsDomain; +namespace crab { + +static bool check_require(const NumAbsDomain& inv, const linear_constraint_t& cst) { + if (inv.is_bottom()) { + return true; + } + if (cst.is_contradiction()) { + return false; + } + if (inv.entail(cst)) { + // XXX: add_redundant(s); + return true; + } + if (inv.intersect(cst)) { + // XXX: add_error() if imply negation + return false; + } + return false; +} + +class ebpf_checker final { + public: + explicit ebpf_checker(ebpf_domain_t& dom, const Assertion& assertion, const std::optional& label = {}) + : assertion{assertion}, label{label}, dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} + + void visit(const Assertion& assertion) { std::visit(*this, assertion); } + + void operator()(const Addable&); + void operator()(const BoundedLoopCount&); + void operator()(const Comparable&); + void operator()(const FuncConstraint&); + void operator()(const ValidDivisor&); + void operator()(const TypeConstraint&); + void operator()(const ValidAccess&); + void operator()(const ValidCall&); + void operator()(const ValidMapKeyValue&); + void operator()(const ValidSize&); + void operator()(const ValidStore&); + void operator()(const ZeroCtxOffset&); + + private: + std::string create_warning(const std::string& s) const { return s + " (" + to_string(assertion) + ")"; } + + void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& msg) { + if (label && !check_require(inv, cst)) { + warnings.push_back(create_warning(msg)); + } + + if (thread_local_options.assume_assertions) { + // avoid redundant errors + inv += cst; + } + } + + void require(const std::string& msg) { + if (label) { + warnings.push_back(create_warning(msg)); + } + if (thread_local_options.assume_assertions) { + m_inv.set_to_bottom(); + } + } + + // memory check / load / store + void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub); + void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + std::optional packet_size); + void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + variable_t shared_region_size); + + public: + std::vector warnings; + + private: + const Assertion& assertion; + const std::optional label; + + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; +}; + +void ebpf_domain_assume(ebpf_domain_t& dom, const Assertion& assertion) { + if (dom.is_bottom()) { + return; + } + ebpf_checker{dom, assertion}.visit(assertion); +} + +std::vector ebpf_domain_check(ebpf_domain_t& dom, const label_t& label, const Assertion& assertion) { + if (dom.is_bottom()) { + return {}; + } + ebpf_checker checker{dom, assertion, label}; + checker.visit(assertion); + return std::move(checker.warnings); +} + +static linear_constraint_t type_is_pointer(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type >= T_CTX; +} + +static linear_constraint_t type_is_number(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type == T_NUM; +} + +static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } + +static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type != T_STACK; +} + +void ebpf_checker::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) { + using namespace crab::dsl_syntax; + const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; + const auto interval = inv.eval_interval(r10_stack_offset); + if (interval.is_singleton()) { + const int64_t stack_offset = interval.singleton()->cast_to(); + require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE, + "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE"); + } + require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); +} + +void ebpf_checker::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, + const linear_expression_t& ub) { + using namespace crab::dsl_syntax; + require(inv, lb >= 0, "Lower bound must be at least 0"); + require(inv, ub <= global_program_info->type.context_descriptor->size, + std::string("Upper bound must be at most ") + + std::to_string(global_program_info->type.context_descriptor->size)); +} + +void ebpf_checker::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const std::optional packet_size) { + using namespace crab::dsl_syntax; + require(inv, lb >= variable_t::meta_offset(), "Lower bound must be at least meta_offset"); + if (packet_size) { + require(inv, ub <= *packet_size, "Upper bound must be at most packet_size"); + } else { + require(inv, ub <= MAX_PACKET_SIZE, + std::string{"Upper bound must be at most "} + std::to_string(MAX_PACKET_SIZE)); + } +} + +void ebpf_checker::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, + const variable_t shared_region_size) { + using namespace crab::dsl_syntax; + require(inv, lb >= 0, "Lower bound must be at least 0"); + require(inv, ub <= shared_region_size, std::string("Upper bound must be at most ") + shared_region_size.name()); +} + +void ebpf_checker::operator()(const Comparable& s) { + using namespace crab::dsl_syntax; + if (type_inv.same_type(m_inv, s.r1, s.r2)) { + // Same type. If both are numbers, that's okay. Otherwise: + const auto inv = m_inv.when(reg_pack(s.r2).type != T_NUM); + // We must check that they belong to a singleton region: + if (!type_inv.is_in_group(inv, s.r1, TypeGroup::singleton_ptr) && + !type_inv.is_in_group(inv, s.r1, TypeGroup::map_fd)) { + require("Cannot subtract pointers to non-singleton regions"); + return; + } + // And, to avoid wraparound errors, they must be within bounds. + this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r1, 0, Imm{0}, false}); + this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r2, 0, Imm{0}, false}); + } else { + // _Maybe_ different types, so r2 must be a number. + // We checked in a previous assertion that r1 is a pointer or a number. + require(m_inv, reg_pack(s.r2).type == T_NUM, "Cannot subtract pointers to different regions"); + }; +} + +void ebpf_checker::operator()(const Addable& s) { + if (!type_inv.implies_type(m_inv, type_is_pointer(reg_pack(s.ptr)), type_is_number(s.num))) { + require("Only numbers can be added to pointers"); + } +} + +void ebpf_checker::operator()(const ValidDivisor& s) { + using namespace crab::dsl_syntax; + const auto reg = reg_pack(s.reg); + if (!type_inv.implies_type(m_inv, type_is_pointer(reg), type_is_number(s.reg))) { + require("Only numbers can be used as divisors"); + } + if (!thread_local_options.allow_division_by_zero) { + const auto v = s.is_signed ? reg.svalue : reg.uvalue; + require(m_inv, v != 0, "Possible division by zero"); + } +} + +void ebpf_checker::operator()(const ValidStore& s) { + if (!type_inv.implies_type(m_inv, type_is_not_stack(reg_pack(s.mem)), type_is_number(s.val))) { + require("Only numbers can be stored to externally-visible regions"); + } +} + +void ebpf_checker::operator()(const TypeConstraint& s) { + if (!type_inv.is_in_group(m_inv, s.reg, s.types)) { + require("Invalid type"); + } +} + +void ebpf_checker::operator()(const BoundedLoopCount& s) { + // Enforces an upper bound on loop iterations by checking that the loop counter + // does not exceed the specified limit + using namespace crab::dsl_syntax; + const auto counter = variable_t::loop_counter(to_string(s.name)); + require(m_inv, counter <= s.limit, "Loop counter is too large"); +} + +void ebpf_checker::operator()(const FuncConstraint& s) { + // Look up the helper function id. + const reg_pack_t& reg = reg_pack(s.reg); + const auto src_interval = m_inv.eval_interval(reg.svalue); + if (const auto sn = src_interval.singleton()) { + if (sn->fits()) { + // We can now process it as if the id was immediate. + const int32_t imm = sn->cast_to(); + if (!global_program_info->platform->is_helper_usable(imm)) { + require("invalid helper function id " + std::to_string(imm)); + return; + } + const Call call = make_call(imm, *global_program_info->platform); + for (const Assertion& sub_assertion : get_assertions(call, *global_program_info, {})) { + // TODO: create explicit sub assertions elsewhere + ebpf_checker sub_checker{dom, sub_assertion, label}; + sub_checker.visit(sub_assertion); + for (const auto& warning : sub_checker.warnings) { + warnings.push_back(warning); + } + } + return; + } + } + require("callx helper function id is not a valid singleton"); +} + +void ebpf_checker::operator()(const ValidSize& s) { + using namespace crab::dsl_syntax; + const auto r = reg_pack(s.reg); + require(m_inv, s.can_be_zero ? r.svalue >= 0 : r.svalue > 0, "Invalid size"); +} + +void ebpf_checker::operator()(const ValidCall& s) { + if (!s.stack_frame_prefix.empty()) { + const EbpfHelperPrototype proto = global_program_info->platform->get_helper_prototype(s.func); + if (proto.return_type == EBPF_RETURN_TYPE_INTEGER_OR_NO_RETURN_IF_SUCCEED) { + require("tail call not supported in subprogram"); + return; + } + } +} + +void ebpf_checker::operator()(const ValidMapKeyValue& s) { + using namespace crab::dsl_syntax; + + const auto fd_type = dom.get_map_type(s.map_fd_reg); + + const auto access_reg = reg_pack(s.access_reg); + int width; + if (s.key) { + const auto key_size = dom.get_map_key_size(s.map_fd_reg).singleton(); + if (!key_size.has_value()) { + require("Map key size is not singleton"); + return; + } + width = key_size->narrow(); + } else { + const auto value_size = dom.get_map_value_size(s.map_fd_reg).singleton(); + if (!value_size.has_value()) { + require("Map value size is not singleton"); + return; + } + width = value_size->narrow(); + } + + m_inv = type_inv.join_over_types(m_inv, s.access_reg, [&](NumAbsDomain& inv, type_encoding_t access_reg_type) { + if (access_reg_type == T_STACK) { + variable_t lb = access_reg.stack_offset; + linear_expression_t ub = lb + width; + if (!stack.all_num(inv, lb, ub)) { + auto lb_is = inv[lb].lb().number(); + std::string lb_s = lb_is && lb_is->fits() ? std::to_string(lb_is->narrow()) : "-oo"; + auto ub_is = inv.eval_interval(ub).ub().number(); + std::string ub_s = ub_is && ub_is->fits() ? std::to_string(ub_is->narrow()) : "oo"; + require(inv, linear_constraint_t::false_const(), + "Illegal map update with a non-numerical value [" + lb_s + "-" + ub_s + ")"); + } else if (thread_local_options.strict && fd_type.has_value()) { + EbpfMapType map_type = global_program_info->platform->get_map_type(*fd_type); + if (map_type.is_array) { + // Get offset value. + variable_t key_ptr = access_reg.stack_offset; + std::optional offset = inv[key_ptr].singleton(); + if (!offset.has_value()) { + require("Pointer must be a singleton"); + } else if (s.key) { + // Look up the value pointed to by the key pointer. + variable_t key_value = + variable_t::cell_var(data_kind_t::svalues, offset.value(), sizeof(uint32_t)); + + if (auto max_entries = dom.get_map_max_entries(s.map_fd_reg).lb().number()) { + require(inv, key_value < *max_entries, "Array index overflow"); + } else { + require("Max entries is not finite"); + } + require(inv, key_value >= 0, "Array index underflow"); + } + } + } + } else if (access_reg_type == T_PACKET) { + variable_t lb = access_reg.packet_offset; + linear_expression_t ub = lb + width; + check_access_packet(inv, lb, ub, {}); + // Packet memory is both readable and writable. + } else if (access_reg_type == T_SHARED) { + variable_t lb = access_reg.shared_offset; + linear_expression_t ub = lb + width; + check_access_shared(inv, lb, ub, access_reg.shared_region_size); + require(inv, access_reg.svalue > 0, "Possible null access"); + // Shared memory is zero-initialized when created so is safe to read and write. + } else { + require("Only stack or packet can be used as a parameter"); + } + }); +} + +static std::tuple lb_ub_access_pair(const ValidAccess& s, + const variable_t offset_var) { + using namespace crab::dsl_syntax; + linear_expression_t lb = offset_var + s.offset; + linear_expression_t ub = std::holds_alternative(s.width) ? lb + std::get(s.width).v + : lb + reg_pack(std::get(s.width)).svalue; + return {lb, ub}; +} + +void ebpf_checker::operator()(const ValidAccess& s) { + using namespace crab::dsl_syntax; + + const bool is_comparison_check = s.width == Value{Imm{0}}; + + const auto reg = reg_pack(s.reg); + // join_over_types instead of simple iteration is only needed for assume-assert + m_inv = type_inv.join_over_types(m_inv, s.reg, [&](NumAbsDomain& inv, type_encoding_t type) { + switch (type) { + case T_PACKET: { + auto [lb, ub] = lb_ub_access_pair(s, reg.packet_offset); + check_access_packet(inv, lb, ub, + is_comparison_check ? std::optional{} : variable_t::packet_size()); + // if within bounds, it can never be null + // Context memory is both readable and writable. + break; + } + case T_STACK: { + auto [lb, ub] = lb_ub_access_pair(s, reg.stack_offset); + check_access_stack(inv, lb, ub); + // if within bounds, it can never be null + if (s.access_type == AccessType::read) { + // Require that the stack range contains numbers. + if (!stack.all_num(inv, lb, ub)) { + if (s.offset < 0) { + require("Stack content is not numeric"); + } else if (const auto pimm = std::get_if(&s.width)) { + if (!inv.entail(gsl::narrow(pimm->v) <= reg.stack_numeric_size - s.offset)) { + require("Stack content is not numeric"); + } + } else { + if (!inv.entail(reg_pack(std::get(s.width)).svalue <= reg.stack_numeric_size - s.offset)) { + require("Stack content is not numeric"); + } + } + } + } + break; + } + case T_CTX: { + auto [lb, ub] = lb_ub_access_pair(s, reg.ctx_offset); + check_access_context(inv, lb, ub); + // if within bounds, it can never be null + // The context is both readable and writable. + break; + } + case T_SHARED: { + auto [lb, ub] = lb_ub_access_pair(s, reg.shared_offset); + check_access_shared(inv, lb, ub, reg.shared_region_size); + if (!is_comparison_check && !s.or_null) { + require(inv, reg.svalue > 0, "Possible null access"); + } + // Shared memory is zero-initialized when created so is safe to read and write. + break; + } + case T_NUM: + if (!is_comparison_check) { + if (s.or_null) { + require(inv, reg.svalue == 0, "Non-null number"); + } else { + require("Only pointers can be dereferenced"); + } + } + break; + case T_MAP: + case T_MAP_PROGRAMS: + if (!is_comparison_check) { + require("FDs cannot be dereferenced directly"); + } + break; + default: require("Invalid type"); break; + } + }); +} + +void ebpf_checker::operator()(const ZeroCtxOffset& s) { + using namespace crab::dsl_syntax; + const auto reg = reg_pack(s.reg); + require(m_inv, reg.ctx_offset == 0, "Nonzero context offset"); +} + +} // namespace crab diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index 02e37074a..cb4ca0b6c 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -3,719 +3,22 @@ // This file is eBPF-specific, not derived from CRAB. -#include #include #include #include #include "boost/endian/conversion.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" #include "config.hpp" #include "crab/array_domain.hpp" #include "crab/ebpf_domain.hpp" -#include "crab_utils/num_safety.hpp" #include "dsl_syntax.hpp" -#include "platform.hpp" #include "string_constraints.hpp" using crab::domains::NumAbsDomain; namespace crab { -constexpr int MAX_PACKET_SIZE = 0xffff; - -// Pointers in the BPF VM are defined to be 64 bits. Some contexts, like -// data, data_end, and meta in Linux's struct xdp_md are only 32 bit offsets -// from a base address not exposed to the program, but when a program is loaded, -// the offsets get replaced with 64-bit address pointers. However, we currently -// need to do pointer arithmetic on 64-bit numbers so for now we cap the interval -// to 32 bits. -constexpr int64_t PTR_MAX = std::numeric_limits::max() - MAX_PACKET_SIZE; - -/** Linear constraint for a pointer comparison. - */ -static linear_constraint_t assume_cst_offsets_reg(const Condition::Op op, const variable_t dst_offset, - const variable_t src_offset) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - switch (op) { - case Op::EQ: return eq(dst_offset, src_offset); - case Op::NE: return neq(dst_offset, src_offset); - case Op::GE: return dst_offset >= src_offset; - case Op::SGE: return dst_offset >= src_offset; // pointer comparison is unsigned - case Op::LE: return dst_offset <= src_offset; - case Op::SLE: return dst_offset <= src_offset; // pointer comparison is unsigned - case Op::GT: return dst_offset > src_offset; - case Op::SGT: return dst_offset > src_offset; // pointer comparison is unsigned - case Op::SLT: return src_offset > dst_offset; - // Note: reverse the test as a workaround strange lookup: - case Op::LT: return src_offset > dst_offset; // FIX unsigned - default: return dst_offset - dst_offset == 0; - } -} - -static std::vector assume_bit_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, - variable_t dst_uvalue, interval_t src_interval) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - - auto dst_interval = inv.eval_interval(dst_uvalue); - std::optional dst_n = dst_interval.singleton(); - if (!dst_n || !dst_n.value().fits_cast_to()) { - return {}; - } - - std::optional src_n = src_interval.singleton(); - if (!src_n || !src_n->fits_cast_to()) { - return {}; - } - uint64_t src_int_value = src_n.value().cast_to(); - if (!is64) { - src_int_value = gsl::narrow_cast(src_int_value); - } - - bool result; - switch (op) { - case Op::SET: result = (dst_n.value().cast_to() & src_int_value) != 0; break; - case Op::NSET: result = (dst_n.value().cast_to() & src_int_value) == 0; break; - default: throw std::exception(); - } - - return {result ? linear_constraint_t::true_const() : linear_constraint_t::false_const()}; -} - -static std::vector assume_signed_64bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, - const variable_t left_uvalue, - const interval_t& right_interval, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using namespace crab::dsl_syntax; - if (right_interval <= interval_t::nonnegative(64) && !right_interval.is_singleton()) { - return {(left_svalue == right_svalue), (left_uvalue == right_uvalue), eq(left_svalue, left_uvalue)}; - } else { - return {(left_svalue == right_svalue), (left_uvalue == right_uvalue)}; - } -} - -static std::vector assume_signed_32bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, - const variable_t left_uvalue, - const interval_t& right_interval) { - using namespace crab::dsl_syntax; - - if (const auto rn = right_interval.singleton()) { - const auto left_svalue_interval = inv.eval_interval(left_svalue); - if (auto size = left_svalue_interval.finite_size()) { - // Find the lowest 64-bit svalue whose low 32 bits match the singleton. - - // Get lower bound as a 64-bit value. - int64_t lb = left_svalue_interval.lb().number()->cast_to(); - - // Use the high 32-bits from the left lower bound and the low 32-bits from the right singleton. - // The result might be lower than the lower bound. - const int64_t lb_match = (lb & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); - if (lb_match < lb) { - // The result is lower than the left interval, so try the next higher matching 64-bit value. - // It's ok if this goes higher than the left upper bound. - lb += 0x100000000; - } - - // Find the highest 64-bit svalue whose low 32 bits match the singleton. - - // Get upper bound as a 64-bit value. - const int64_t ub = left_svalue_interval.ub().number()->cast_to(); - - // Use the high 32-bits from the left upper bound and the low 32-bits from the right singleton. - // The result might be higher than the upper bound. - const int64_t ub_match = (ub & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); - if (ub_match > ub) { - // The result is higher than the left interval, so try the next lower matching 64-bit value. - // It's ok if this goes lower than the left lower bound. - lb -= 0x100000000; - } - - if (to_unsigned(lb_match) <= to_unsigned(ub_match)) { - // The interval is also valid when cast to a uvalue, meaning - // both bounds are positive or both are negative. - return {left_svalue >= lb_match, left_svalue <= ub_match, left_uvalue >= to_unsigned(lb_match), - left_uvalue <= to_unsigned(ub_match)}; - } else { - // The interval can only be represented as an svalue. - return {left_svalue >= lb_match, left_svalue <= ub_match}; - } - } - } - return {}; -} - -// Given left and right values, get the left and right intervals, and also split -// the left interval into separate negative and positive intervals. -static void get_signed_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, - const variable_t left_uvalue, const linear_expression_t& right_svalue, - interval_t& left_interval, interval_t& right_interval, - interval_t& left_interval_positive, interval_t& left_interval_negative) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - // Get intervals as 32-bit or 64-bit as appropriate. - left_interval = inv.eval_interval(left_svalue); - right_interval = inv.eval_interval(right_svalue); - if (!is64) { - if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || - (left_interval <= interval_t::negative(32) && right_interval <= interval_t::negative(32))) { - is64 = true; - // fallthrough as 64bit, including deduction of relational information - } else { - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); - // continue as 32bit - } - } - - if (!left_interval.is_top()) { - left_interval_positive = left_interval & interval_t::nonnegative(64); - left_interval_negative = left_interval & interval_t::negative(64); - } else { - left_interval = inv.eval_interval(left_uvalue); - if (!left_interval.is_top()) { - // The interval is TOP as a signed interval but is represented precisely as an unsigned interval, - // so split into two signed intervals that can be treated separately. - left_interval_positive = left_interval & interval_t::nonnegative(64); - const number_t lih_ub = - left_interval.ub().number() ? left_interval.ub().number()->truncate_to() : -1; - left_interval_negative = interval_t{std::numeric_limits::min(), lih_ub}; - } else { - left_interval_positive = interval_t::nonnegative(64); - left_interval_negative = interval_t::negative(64); - } - } - - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); -} - -// Given left and right values, get the left and right intervals, and also split -// the left interval into separate low and high intervals. -static void get_unsigned_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, - const variable_t left_uvalue, const linear_expression_t& right_uvalue, - interval_t& left_interval, interval_t& right_interval, interval_t& left_interval_low, - interval_t& left_interval_high) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - // Get intervals as 32-bit or 64-bit as appropriate. - left_interval = inv.eval_interval(left_uvalue); - right_interval = inv.eval_interval(right_uvalue); - if (!is64) { - if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || - (left_interval <= interval_t::unsigned_high(32) && right_interval <= interval_t::unsigned_high(32))) { - is64 = true; - // fallthrough as 64bit, including deduction of relational information - } else { - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); - // continue as 32bit - } - } - - if (!left_interval.is_top()) { - left_interval_low = left_interval & interval_t::nonnegative(64); - left_interval_high = left_interval & interval_t::unsigned_high(64); - } else { - left_interval = inv.eval_interval(left_svalue); - if (!left_interval.is_top()) { - // The interval is TOP as an unsigned interval but is represented precisely as a signed interval, - // so split into two unsigned intervals that can be treated separately. - left_interval_low = interval_t(0, left_interval.ub()).truncate_to(); - left_interval_high = interval_t(left_interval.lb(), -1).truncate_to(); - } else { - left_interval_low = interval_t::nonnegative(64); - left_interval_high = interval_t::unsigned_high(64); - } - } - - left_interval = left_interval.truncate_to(); - right_interval = right_interval.truncate_to(); -} - -static std::vector -assume_signed_64bit_lt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, - const interval_t& left_interval_positive, const interval_t& left_interval_negative, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::negative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1]. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(64) && - right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // Interval can only be represented as an svalue. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } -} - -static std::vector -assume_signed_32bit_lt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_positive, - const interval_t& left_interval_negative, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {std::numeric_limits::max() < left_uvalue, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(32) && - right_interval <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX] - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && - inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { - // Interval can only be represented as an svalue. - return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else { - // We can't directly compare the svalues since they may differ in high order bits. - return {}; - } -} - -static std::vector -assume_signed_64bit_gt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, - const interval_t& left_interval_positive, const interval_t& left_interval_negative, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(64) && - right_interval <= interval_t::negative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {std::numeric_limits::max() < left_uvalue, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // Interval can only be represented as an svalue. - return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } -} - -static std::vector -assume_signed_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_positive, - const interval_t& left_interval_negative, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - const auto lpub = left_interval_positive.truncate_to().ub(); - return {left_svalue >= 0, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue, - left_svalue <= left_uvalue, - left_svalue >= left_uvalue, - left_uvalue >= 0, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - left_uvalue <= *lpub.number()}; - } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(32) && - right_interval <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], - // aka [INT_MAX+1, UINT_MAX]. - return {left_uvalue >= number_t{std::numeric_limits::max()} + 1, - strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && - inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { - // Interval can only be represented as an svalue. - return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // We can't directly compare the svalues since they may differ in high order bits. - return {}; - } -} - -static std::vector assume_signed_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, - variable_t left_svalue, variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_positive = interval_t::bottom(); - interval_t left_interval_negative = interval_t::bottom(); - get_signed_intervals(inv, is64, left_svalue, left_uvalue, right_svalue, left_interval, right_interval, - left_interval_positive, left_interval_negative); - - if (op == Condition::Op::EQ) { - // Handle svalue == right. - if (is64) { - return assume_signed_64bit_eq(inv, left_svalue, left_uvalue, right_interval, right_svalue, right_uvalue); - } else { - return assume_signed_32bit_eq(inv, left_svalue, left_uvalue, right_interval); - } - } - - const bool is_lt = op == Condition::Op::SLT || op == Condition::Op::SLE; - bool strict = op == Condition::Op::SLT || op == Condition::Op::SGT; - - auto llb = left_interval.lb(); - auto lub = left_interval.ub(); - auto rlb = right_interval.lb(); - auto rub = right_interval.ub(); - if (!is_lt && (strict ? lub <= rlb : lub < rlb)) { - // Left signed interval is lower than right signed interval. - return {linear_constraint_t::false_const()}; - } else if (is_lt && (strict ? llb >= rub : llb > rub)) { - // Left signed interval is higher than right signed interval. - return {linear_constraint_t::false_const()}; - } - if (is_lt && (strict ? lub < rlb : lub <= rlb)) { - // Left signed interval is lower than right signed interval. - return {linear_constraint_t::true_const()}; - } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { - // Left signed interval is higher than right signed interval. - return {linear_constraint_t::true_const()}; - } - - if (is64) { - if (is_lt) { - return assume_signed_64bit_lt(strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } else { - return assume_signed_64bit_gt(strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } - } else { - // 32-bit compare. - if (is_lt) { - return assume_signed_32bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } else { - return assume_signed_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_positive, - left_interval_negative, right_svalue, right_uvalue, right_interval); - } - } - return {}; -} - -static std::vector -assume_unsigned_64bit_lt(const NumAbsDomain& inv, bool strict, variable_t left_svalue, variable_t left_uvalue, - const interval_t& left_interval_low, const interval_t& left_interval_high, - const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, - const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - auto rub = right_interval.ub(); - auto lllb = left_interval_low.truncate_to().lb(); - if (right_interval <= interval_t::nonnegative(64) && (strict ? lllb >= rub : lllb > rub)) { - // The high interval is out of range. - if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { - return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), - left_uvalue <= *lsubn, left_svalue >= 0}; - } else { - return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), - left_svalue >= 0}; - } - } - auto lhlb = left_interval_high.truncate_to().lb(); - if (right_interval <= interval_t::unsigned_high(64) && (strict ? lhlb >= rub : lhlb > rub)) { - // The high interval is out of range. - if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { - return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), - left_uvalue <= *lsubn, left_svalue >= 0}; - } else { - return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), - left_svalue >= 0}; - } - } - if (right_interval <= interval_t::signed_int(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - auto llub = left_interval_low.truncate_to().ub(); - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - left_uvalue <= *llub.number(), 0 <= left_svalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (left_interval_low.is_bottom() && right_interval <= interval_t::unsigned_high(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if ((left_interval_low | left_interval_high) == interval_t::unsigned_int(64)) { - // Interval can only be represented as a uvalue, and was TOP before. - return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } -} - -static std::vector assume_unsigned_32bit_lt(const NumAbsDomain& inv, const bool strict, - const variable_t left_svalue, - const variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (inv.eval_interval(left_uvalue) <= interval_t::nonnegative(32) && - inv.eval_interval(right_uvalue) <= interval_t::nonnegative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT32_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (inv.eval_interval(left_svalue) <= interval_t::negative(32) && - inv.eval_interval(right_svalue) <= interval_t::negative(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT32_MIN, -1]. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, - strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; - } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && - inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } else { - // We can't directly compare the uvalues since they may differ in high order bits. - return {}; - } -} - -static std::vector -assume_unsigned_64bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_low, - const interval_t& left_interval_high, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - const auto rlb = right_interval.lb(); - const auto llub = left_interval_low.truncate_to().ub(); - const auto lhlb = left_interval_high.truncate_to().lb(); - - if (right_interval <= interval_t::nonnegative(64) && (strict ? llub <= rlb : llub < rlb)) { - // The low interval is out of range. - return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - *lhlb.number() == std::numeric_limits::max() ? left_uvalue == *lhlb.number() - : left_uvalue >= *lhlb.number(), - left_svalue < 0}; - } else if (right_interval <= interval_t::unsigned_high(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if ((left_interval_low | left_interval_high) <= interval_t::nonnegative(64) && - right_interval <= interval_t::nonnegative(64)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } -} - -static std::vector -assume_unsigned_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, - const variable_t left_uvalue, const interval_t& left_interval_low, - const interval_t& left_interval_high, const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue, const interval_t& right_interval) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - if (right_interval <= interval_t::unsigned_high(32)) { - // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, - strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; - } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && - inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { - // Interval can only be represented as a uvalue. - return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } else { - // We can't directly compare the uvalues since they may differ in high order bits. - return {}; - }; -} - -static std::vector assume_unsigned_cst_interval(const NumAbsDomain& inv, Condition::Op op, - bool is64, variable_t left_svalue, - variable_t left_uvalue, - const linear_expression_t& right_svalue, - const linear_expression_t& right_uvalue) { - using crab::interval_t; - using namespace crab::dsl_syntax; - - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_low = interval_t::bottom(); - interval_t left_interval_high = interval_t::bottom(); - get_unsigned_intervals(inv, is64, left_svalue, left_uvalue, right_uvalue, left_interval, right_interval, - left_interval_low, left_interval_high); - - // Handle uvalue != right. - if (op == Condition::Op::NE) { - if (auto rn = right_interval.singleton()) { - if (rn == left_interval.truncate_to_uint(is64 ? 64 : 32).lb().number()) { - // "NE lower bound" is equivalent to "GT lower bound". - op = Condition::Op::GT; - right_interval = interval_t{left_interval.lb()}; - } else if (rn == left_interval.ub().number()) { - // "NE upper bound" is equivalent to "LT upper bound". - op = Condition::Op::LT; - right_interval = interval_t{left_interval.ub()}; - } else { - return {}; - } - } else { - return {}; - } - } - - const bool is_lt = op == Condition::Op::LT || op == Condition::Op::LE; - bool strict = op == Condition::Op::LT || op == Condition::Op::GT; - - auto [llb, lub] = left_interval.pair(); - auto [rlb, rub] = right_interval.pair(); - if (is_lt ? (strict ? llb >= rub : llb > rub) : (strict ? lub <= rlb : lub < rlb)) { - // Left unsigned interval is lower than right unsigned interval. - return {linear_constraint_t::false_const()}; - } - if (is_lt && (strict ? lub < rlb : lub <= rlb)) { - // Left unsigned interval is lower than right unsigned interval. We still add a - // relationship for use when widening, such as is used in the prime conformance test. - if (is64) { - return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; - } - return {}; - } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { - // Left unsigned interval is higher than right unsigned interval. We still add a - // relationship for use when widening, such as is used in the prime conformance test. - if (is64) { - return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; - } else { - return {}; - } - } - - if (is64) { - if (is_lt) { - return assume_unsigned_64bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } else { - return assume_unsigned_64bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } - } else { - if (is_lt) { - return assume_unsigned_32bit_lt(inv, strict, left_svalue, left_uvalue, right_svalue, right_uvalue); - } else { - return assume_unsigned_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, - left_interval_high, right_svalue, right_uvalue, right_interval); - } - } -} - -/** Linear constraints for a comparison with a constant. - */ -static std::vector assume_cst_imm(const NumAbsDomain& inv, const Condition::Op op, const bool is64, - const variable_t dst_svalue, const variable_t dst_uvalue, - const int64_t imm) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - switch (op) { - case Op::EQ: - case Op::SGE: - case Op::SLE: - case Op::SGT: - case Op::SLT: - return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, gsl::narrow_cast(imm)); - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, interval_t{imm}); - case Op::NE: - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: - return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, - gsl::narrow_cast(imm)); - } - return {}; -} - -/** Linear constraint for a numerical comparison between registers. - */ -static std::vector assume_cst_reg(const NumAbsDomain& inv, const Condition::Op op, const bool is64, - const variable_t dst_svalue, const variable_t dst_uvalue, - const variable_t src_svalue, const variable_t src_uvalue) { - using namespace crab::dsl_syntax; - using Op = Condition::Op; - if (is64) { - switch (op) { - case Op::EQ: { - const interval_t src_interval = inv.eval_interval(src_svalue); - if (!src_interval.is_singleton() && src_interval <= interval_t::nonnegative(64)) { - return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue), eq(dst_svalue, dst_uvalue)}; - } else { - return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue)}; - } - } - case Op::NE: return {neq(dst_svalue, src_svalue)}; - case Op::SGE: return {dst_svalue >= src_svalue}; - case Op::SLE: return {dst_svalue <= src_svalue}; - case Op::SGT: return {dst_svalue > src_svalue}; - // Note: reverse the test as a workaround strange lookup: - case Op::SLT: return {src_svalue > dst_svalue}; - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - } - } else { - switch (op) { - case Op::EQ: - case Op::SGE: - case Op::SLE: - case Op::SGT: - case Op::SLT: return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - case Op::SET: - case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); - case Op::NE: - case Op::GE: - case Op::LE: - case Op::GT: - case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); - } - } - assert(false); - throw std::exception(); -} - std::optional ebpf_domain_t::get_type_offset_variable(const Reg& reg, const int type) { reg_pack_t r = reg_pack(reg); switch (type) { @@ -737,7 +40,7 @@ std::optional ebpf_domain_t::get_type_offset_variable(const Reg& reg return get_type_offset_variable(reg, m_inv); } -void ebpf_domain_t::set_require_check(std::function f) { check_require = std::move(f); } +string_invariant ebpf_domain_t::to_set() const { return this->m_inv.to_set() + this->stack.to_set(); } ebpf_domain_t ebpf_domain_t::top() { ebpf_domain_t abs; @@ -854,2038 +157,130 @@ void ebpf_domain_t::operator+=(const linear_constraint_t& cst) { m_inv += cst; } void ebpf_domain_t::operator-=(const variable_t var) { m_inv -= var; } -void ebpf_domain_t::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } -void ebpf_domain_t::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } - -void ebpf_domain_t::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_domain_t::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_domain_t::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, - const int finite_width) { - m_inv.apply(op, x, y, z, finite_width); -} - -void ebpf_domain_t::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, - const int finite_width) { - m_inv.apply(op, x, y, k, finite_width); -} - -void ebpf_domain_t::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { - std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); -} - -void ebpf_domain_t::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { - std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); -} +// Get the start and end of the range of possible map fd values. +// In the future, it would be cleaner to use a set rather than an interval +// for map fds. +bool ebpf_domain_t::get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const { + const interval_t& map_fd_interval = m_inv[reg_pack(map_fd_reg).map_fd]; + const auto lb = map_fd_interval.lb().number(); + const auto ub = map_fd_interval.ub().number(); + if (!lb || !lb->fits() || !ub || !ub->fits()) { + return false; + } + *start_fd = lb->narrow(); + *end_fd = ub->narrow(); -static void havoc_offsets(NumAbsDomain& inv, const Reg& reg) { - const reg_pack_t r = reg_pack(reg); - inv -= r.ctx_offset; - inv -= r.map_fd; - inv -= r.packet_offset; - inv -= r.shared_offset; - inv -= r.shared_region_size; - inv -= r.stack_offset; - inv -= r.stack_numeric_size; -} -static void havoc_register(NumAbsDomain& inv, const Reg& reg) { - const reg_pack_t r = reg_pack(reg); - havoc_offsets(inv, reg); - inv -= r.svalue; - inv -= r.uvalue; + // Cap the maximum range we'll check. + constexpr int max_range = 32; + return *map_fd_interval.finite_size() < max_range; } -void ebpf_domain_t::scratch_caller_saved_registers() { - for (int i = R1_ARG; i <= R5_ARG; i++) { - Reg r{gsl::narrow(i)}; - havoc_register(m_inv, r); - type_inv.havoc_type(m_inv, r); +// All maps in the range must have the same type for us to use it. +std::optional ebpf_domain_t::get_map_type(const Reg& map_fd_reg) const { + int32_t start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return std::optional(); } -} -void ebpf_domain_t::save_callee_saved_registers(const std::string& prefix) { - // Create variables specific to the new call stack frame that store - // copies of the states of r6 through r9. - for (int r = R6; r <= R9; r++) { - for (const data_kind_t kind : iterate_kinds()) { - const variable_t src_var = variable_t::reg(kind, r); - if (!m_inv[src_var].is_top()) { - assign(variable_t::stack_frame_var(kind, r, prefix), src_var); - } + std::optional type; + for (int32_t map_fd = start_fd; map_fd <= end_fd; map_fd++) { + EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); + if (map == nullptr) { + return std::optional(); } - } -} - -void ebpf_domain_t::restore_callee_saved_registers(const std::string& prefix) { - for (int r = R6; r <= R9; r++) { - for (const data_kind_t kind : iterate_kinds()) { - const variable_t src_var = variable_t::stack_frame_var(kind, r, prefix); - if (!m_inv[src_var].is_top()) { - assign(variable_t::reg(kind, r), src_var); - } else { - havoc(variable_t::reg(kind, r)); - } - havoc(src_var); + if (!type.has_value()) { + type = map->type; + } else if (map->type != *type) { + return std::optional(); } } + return type; } -void ebpf_domain_t::havoc_subprogram_stack(const std::string& prefix) { - // Calculate the call stack depth being returned from. Since we're returning - // *to* the given prefix, the current call stack is 2 + the number of - // '/' separators because we need to account for the current frame and the root frame. - const int call_stack_depth = 2 + std::ranges::count(prefix, STACK_FRAME_DELIMITER); - - const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; - const auto intv = m_inv.eval_interval(r10_stack_offset); - if (!intv.is_singleton()) { - return; - } - const int64_t stack_offset = intv.singleton()->cast_to(); - const int32_t stack_start = stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth; - for (const data_kind_t kind : iterate_kinds()) { - stack.havoc(m_inv, kind, stack_start, EBPF_SUBPROGRAM_STACK_SIZE); +// All maps in the range must have the same inner map fd for us to use it. +std::optional ebpf_domain_t::get_map_inner_map_fd(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return {}; } -} -void ebpf_domain_t::forget_packet_pointers() { - using namespace crab::dsl_syntax; - - for (const variable_t type_variable : variable_t::get_type_variables()) { - if (type_inv.has_type(m_inv, type_variable, T_PACKET)) { - havoc(variable_t::kind_var(data_kind_t::types, type_variable)); - havoc(variable_t::kind_var(data_kind_t::packet_offsets, type_variable)); - havoc(variable_t::kind_var(data_kind_t::svalues, type_variable)); - havoc(variable_t::kind_var(data_kind_t::uvalues, type_variable)); + std::optional inner_map_fd; + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); + if (map == nullptr) { + return {}; + } + if (!inner_map_fd.has_value()) { + inner_map_fd = map->inner_map_fd; + } else if (map->type != *inner_map_fd) { + return {}; } } - - initialize_packet(*this); + return inner_map_fd; } -static void overflow_bounds(NumAbsDomain& inv, variable_t lhs, number_t span, int finite_width, bool issigned) { - using namespace crab::dsl_syntax; - auto interval = inv[lhs]; - if (interval.ub() - interval.lb() >= span) { - // Interval covers the full space. - inv -= lhs; - return; - } - if (interval.is_bottom()) { - inv -= lhs; - return; - } - number_t lb_value = interval.lb().number().value(); - number_t ub_value = interval.ub().number().value(); - - // Compute the interval, taking overflow into account. - // For a signed result, we need to ensure the signed and unsigned results match - // so for a 32-bit operation, 0x80000000 should be a positive 64-bit number not - // a sign extended negative one. - number_t lb = lb_value.truncate_to_uint(finite_width); - number_t ub = ub_value.truncate_to_uint(finite_width); - if (issigned) { - lb = lb.truncate_to(); - ub = ub.truncate_to(); - } - if (lb > ub) { - // Range wraps in the middle, so we cannot represent as an unsigned interval. - inv -= lhs; - return; - } - auto new_interval = interval_t{lb, ub}; - if (new_interval != interval) { - // Update the variable, which will lose any relationships to other variables. - inv.set(lhs, new_interval); - } -} - -static void overflow_signed(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { - const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} - : finite_width == 32 ? number_t{std::numeric_limits::max()} - : throw std::exception()}; - overflow_bounds(inv, lhs, span, finite_width, true); -} - -static void overflow_unsigned(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { - const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} - : finite_width == 32 ? number_t{std::numeric_limits::max()} - : throw std::exception()}; - overflow_bounds(inv, lhs, span, finite_width, false); -} -static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const number_t& z, const int finite_width) { - inv.apply(op, xs, y, z, finite_width); - if (finite_width) { - inv.assign(xu, xs); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const number_t& z, const int finite_width) { - inv.apply(op, xu, y, z, finite_width); - if (finite_width) { - inv.assign(xs, xu); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const variable_t z, const int finite_width) { - inv.apply(op, xs, y, z, finite_width); - if (finite_width) { - inv.assign(xu, xs); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, - const variable_t y, const variable_t z, const int finite_width) { - inv.apply(op, xu, y, z, finite_width); - if (finite_width) { - inv.assign(xs, xu); - overflow_signed(inv, xs, finite_width); - overflow_unsigned(inv, xu, finite_width); - } -} - -void ebpf_domain_t::add(const variable_t lhs, const variable_t op2) { - apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); -} -void ebpf_domain_t::add(const variable_t lhs, const number_t& op2) { - apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); -} -void ebpf_domain_t::sub(const variable_t lhs, const variable_t op2) { - apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); -} -void ebpf_domain_t::sub(const variable_t lhs, const number_t& op2) { - apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); -} - -// Add/subtract with overflow are both signed and unsigned. We can use either one of the two to compute the -// result before adjusting for overflow, though if one is top we want to use the other to retain precision. -void ebpf_domain_t::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_domain_t::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_domain_t::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} -void ebpf_domain_t::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, - const int finite_width) { - apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, - finite_width); -} - -void ebpf_domain_t::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, -1, finite_width); -} -void ebpf_domain_t::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); -} -void ebpf_domain_t::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { - apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); -} - -void ebpf_domain_t::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - // Use finite width 64 to make the svalue be set as well as the uvalue. - apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_domain_t::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_domain_t::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, - const int finite_width) { - apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, finite_width); -} -void ebpf_domain_t::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_domain_t::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { - apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); -} -void ebpf_domain_t::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { - apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); -} - -static void assume(NumAbsDomain& inv, const linear_constraint_t& cst) { inv += cst; } -void ebpf_domain_t::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } - -void ebpf_domain_t::require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const { - if (check_require) { - check_require(inv, cst, s + " (" + this->current_assertion + ")"); - } - if (thread_local_options.assume_assertions) { - // avoid redundant errors - crab::assume(inv, cst); - } -} - -/// Forget everything we know about the value of a variable. -void ebpf_domain_t::havoc(const variable_t v) { m_inv -= v; } -void ebpf_domain_t::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } - -void ebpf_domain_t::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } - -static linear_constraint_t type_is_pointer(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type >= T_CTX; -} - -static linear_constraint_t type_is_number(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type == T_NUM; -} - -static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } - -static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { - using namespace crab::dsl_syntax; - return r.type != T_STACK; -} - -void ebpf_domain_t::operator()(const Assertion& assertion) { - if (check_require || thread_local_options.assume_assertions) { - this->current_assertion = to_string(assertion); - std::visit(*this, assertion); - this->current_assertion.clear(); - } -} - -void ebpf_domain_t::operator()(const basic_block_t& bb) { - for (const GuardedInstruction& ins : bb) { - for (const Assertion& assertion : ins.preconditions) { - (*this)(assertion); - } - std::visit(*this, ins.cmd); - } -} - -void ebpf_domain_t::check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const int call_stack_depth) const { - using namespace crab::dsl_syntax; - const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; - const auto interval = inv.eval_interval(r10_stack_offset); - if (interval.is_singleton()) { - const int64_t stack_offset = interval.singleton()->cast_to(); - require(inv, lb >= stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth, - "Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth"); - } - require(inv, ub <= EBPF_TOTAL_STACK_SIZE, "Upper bound must be at most EBPF_TOTAL_STACK_SIZE"); -} - -void ebpf_domain_t::check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, - const linear_expression_t& ub) const { - using namespace crab::dsl_syntax; - require(inv, lb >= 0, "Lower bound must be at least 0"); - require(inv, ub <= global_program_info->type.context_descriptor->size, - std::string("Upper bound must be at most ") + - std::to_string(global_program_info->type.context_descriptor->size)); -} - -void ebpf_domain_t::check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const std::optional packet_size) const { - using namespace crab::dsl_syntax; - require(inv, lb >= variable_t::meta_offset(), "Lower bound must be at least meta_offset"); - if (packet_size) { - require(inv, ub <= *packet_size, "Upper bound must be at most packet_size"); - } else { - require(inv, ub <= MAX_PACKET_SIZE, - std::string{"Upper bound must be at most "} + std::to_string(MAX_PACKET_SIZE)); - } -} - -void ebpf_domain_t::check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - const variable_t shared_region_size) const { - using namespace crab::dsl_syntax; - require(inv, lb >= 0, "Lower bound must be at least 0"); - require(inv, ub <= shared_region_size, std::string("Upper bound must be at most ") + shared_region_size.name()); -} - -void ebpf_domain_t::operator()(const Assume& s) { - const Condition cond = s.cond; - const auto dst = reg_pack(cond.left); - if (const auto psrc_reg = std::get_if(&cond.right)) { - const auto src_reg = *psrc_reg; - const auto src = reg_pack(src_reg); - if (type_inv.same_type(m_inv, cond.left, std::get(cond.right))) { - m_inv = type_inv.join_over_types(m_inv, cond.left, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_NUM) { - for (const linear_constraint_t& cst : - assume_cst_reg(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, src.svalue, src.uvalue)) { - inv += cst; - } - } else { - // Either pointers to a singleton region, - // or an equality comparison on map descriptors/pointers to non-singleton locations - if (const auto dst_offset = get_type_offset_variable(cond.left, type)) { - if (const auto src_offset = get_type_offset_variable(src_reg, type)) { - inv += assume_cst_offsets_reg(cond.op, dst_offset.value(), src_offset.value()); - } - } - } - }); - } else { - // We should only reach here if `--assume-assert` is off - assert(!thread_local_options.assume_assertions || is_bottom()); - // be sound in any case, it happens to flush out bugs: - m_inv.set_to_top(); - } - } else { - const int64_t imm = gsl::narrow_cast(std::get(cond.right).v); - for (const linear_constraint_t& cst : assume_cst_imm(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, imm)) { - assume(cst); - } - } -} - -void ebpf_domain_t::operator()(const Undefined& a) {} - -// Simple truncation function usable with swap_endianness(). -template -constexpr T truncate(T x) noexcept { - return x; -} - -void ebpf_domain_t::operator()(const Un& stmt) { - const auto dst = reg_pack(stmt.dst); - auto swap_endianness = [&](const variable_t v, auto be_or_le) { - if (m_inv.entail(type_is_number(stmt.dst))) { - if (const auto n = m_inv.eval_interval(v).singleton()) { - if (n->fits_cast_to()) { - m_inv.set(v, interval_t{be_or_le(n->cast_to())}); - return; - } - } - } - havoc(v); - havoc_offsets(stmt.dst); - }; - // Swap bytes if needed. For 64-bit types we need the weights to fit in a - // signed int64, but for smaller types we don't want sign extension, - // so we use unsigned which still fits in a signed int64. - switch (stmt.op) { - case Un::Op::BE16: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::BE32: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::BE64: - if (!thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } - break; - case Un::Op::LE16: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::LE32: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } else { - swap_endianness(dst.svalue, truncate); - swap_endianness(dst.uvalue, truncate); - } - break; - case Un::Op::LE64: - if (thread_local_options.big_endian) { - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - } - break; - case Un::Op::SWAP16: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::SWAP32: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::SWAP64: - swap_endianness(dst.svalue, boost::endian::endian_reverse); - swap_endianness(dst.uvalue, boost::endian::endian_reverse); - break; - case Un::Op::NEG: - neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); - havoc_offsets(stmt.dst); - break; - } -} - -void ebpf_domain_t::operator()(const Exit& a) { - // Clean up any state for the current stack frame. - const std::string prefix = a.stack_frame_prefix; - if (prefix.empty()) { - return; - } - havoc_subprogram_stack(prefix); - restore_callee_saved_registers(prefix); -} - -void ebpf_domain_t::operator()(const Jmp&) const { - // This is a NOP. It only exists to hold the jump preconditions. -} - -void ebpf_domain_t::operator()(const Comparable& s) { - using namespace crab::dsl_syntax; - if (type_inv.same_type(m_inv, s.r1, s.r2)) { - // Same type. If both are numbers, that's okay. Otherwise: - auto inv = m_inv.when(reg_pack(s.r2).type != T_NUM); - // We must check that they belong to a singleton region: - if (!type_inv.is_in_group(inv, s.r1, TypeGroup::singleton_ptr) && - !type_inv.is_in_group(inv, s.r1, TypeGroup::map_fd)) { - require(inv, linear_constraint_t::false_const(), "Cannot subtract pointers to non-singleton regions"); - return; - } - // And, to avoid wraparound errors, they must be within bounds. - this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r1, 0, Imm{0}, false}); - this->operator()(ValidAccess{MAX_CALL_STACK_FRAMES, s.r2, 0, Imm{0}, false}); - } else { - // _Maybe_ different types, so r2 must be a number. - // We checked in a previous assertion that r1 is a pointer or a number. - require(m_inv, reg_pack(s.r2).type == T_NUM, "Cannot subtract pointers to different regions"); - }; -} - -void ebpf_domain_t::operator()(const Addable& s) { - if (!type_inv.implies_type(m_inv, type_is_pointer(reg_pack(s.ptr)), type_is_number(s.num))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be added to pointers"); - } -} - -void ebpf_domain_t::operator()(const ValidDivisor& s) { - using namespace crab::dsl_syntax; - const auto reg = reg_pack(s.reg); - if (!type_inv.implies_type(m_inv, type_is_pointer(reg), type_is_number(s.reg))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be used as divisors"); - } - if (!thread_local_options.allow_division_by_zero) { - const auto v = s.is_signed ? reg.svalue : reg.uvalue; - require(m_inv, v != 0, "Possible division by zero"); - } -} - -void ebpf_domain_t::operator()(const ValidStore& s) { - if (!type_inv.implies_type(m_inv, type_is_not_stack(reg_pack(s.mem)), type_is_number(s.val))) { - require(m_inv, linear_constraint_t::false_const(), "Only numbers can be stored to externally-visible regions"); - } -} - -void ebpf_domain_t::operator()(const TypeConstraint& s) { - if (!type_inv.is_in_group(m_inv, s.reg, s.types)) { - require(m_inv, linear_constraint_t::false_const(), "Invalid type"); - } -} - -void ebpf_domain_t::operator()(const BoundedLoopCount& s) { - // Enforces an upper bound on loop iterations by checking that the loop counter - // does not exceed the specified limit - using namespace crab::dsl_syntax; - const auto counter = variable_t::loop_counter(to_string(s.name)); - require(m_inv, counter <= s.limit, "Loop counter is too large"); -} - -void ebpf_domain_t::operator()(const FuncConstraint& s) { - // Look up the helper function id. - const reg_pack_t& reg = reg_pack(s.reg); - const auto src_interval = m_inv.eval_interval(reg.svalue); - if (const auto sn = src_interval.singleton()) { - if (sn->fits()) { - // We can now process it as if the id was immediate. - const int32_t imm = sn->cast_to(); - if (!global_program_info->platform->is_helper_usable(imm)) { - require(m_inv, linear_constraint_t::false_const(), "invalid helper function id " + std::to_string(imm)); - return; - } - Call call = make_call(imm, *global_program_info->platform); - for (const Assertion& assertion : get_assertions(call, *global_program_info, {})) { - (*this)(assertion); - } - return; - } - } - require(m_inv, linear_constraint_t::false_const(), "callx helper function id is not a valid singleton"); -} - -void ebpf_domain_t::operator()(const ValidSize& s) { - using namespace crab::dsl_syntax; - const auto r = reg_pack(s.reg); - require(m_inv, s.can_be_zero ? r.svalue >= 0 : r.svalue > 0, "Invalid size"); -} - -// Get the start and end of the range of possible map fd values. -// In the future, it would be cleaner to use a set rather than an interval -// for map fds. -bool ebpf_domain_t::get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const { - const interval_t& map_fd_interval = m_inv[reg_pack(map_fd_reg).map_fd]; - const auto lb = map_fd_interval.lb().number(); - const auto ub = map_fd_interval.ub().number(); - if (!lb || !lb->fits() || !ub || !ub->fits()) { - return false; - } - *start_fd = lb->narrow(); - *end_fd = ub->narrow(); - - // Cap the maximum range we'll check. - constexpr int max_range = 32; - return *map_fd_interval.finite_size() < max_range; -} - -// All maps in the range must have the same type for us to use it. -std::optional ebpf_domain_t::get_map_type(const Reg& map_fd_reg) const { - int32_t start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return std::optional(); - } - - std::optional type; - for (int32_t map_fd = start_fd; map_fd <= end_fd; map_fd++) { - EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); - if (map == nullptr) { - return std::optional(); - } - if (!type.has_value()) { - type = map->type; - } else if (map->type != *type) { - return std::optional(); - } - } - return type; -} - -// All maps in the range must have the same inner map fd for us to use it. -std::optional ebpf_domain_t::get_map_inner_map_fd(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return {}; - } - - std::optional inner_map_fd; - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd); - if (map == nullptr) { - return {}; - } - if (!inner_map_fd.has_value()) { - inner_map_fd = map->inner_map_fd; - } else if (map->type != *inner_map_fd) { - return {}; - } - } - return inner_map_fd; -} - -// We can deal with a range of key sizes. -interval_t ebpf_domain_t::get_map_key_size(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t{map->key_size}; - } else { - return interval_t::top(); - } - } - return result; -} - -// We can deal with a range of value sizes. -interval_t ebpf_domain_t::get_map_value_size(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t(map->value_size); - } else { - return interval_t::top(); - } - } - return result; -} - -// We can deal with a range of max_entries values. -interval_t ebpf_domain_t::get_map_max_entries(const Reg& map_fd_reg) const { - int start_fd, end_fd; - if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { - return interval_t::top(); - } - - interval_t result = interval_t::bottom(); - for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { - if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { - result = result | interval_t(map->max_entries); - } else { - return interval_t::top(); - } - } - return result; -} - -void ebpf_domain_t::operator()(const ValidCall& s) { - if (!s.stack_frame_prefix.empty()) { - const EbpfHelperPrototype proto = global_program_info->platform->get_helper_prototype(s.func); - if (proto.return_type == EBPF_RETURN_TYPE_INTEGER_OR_NO_RETURN_IF_SUCCEED) { - require(m_inv, linear_constraint_t::false_const(), "tail call not supported in subprogram"); - return; - } - } -} - -void ebpf_domain_t::operator()(const ValidMapKeyValue& s) { - using namespace crab::dsl_syntax; - - const auto fd_type = get_map_type(s.map_fd_reg); - - const auto access_reg = reg_pack(s.access_reg); - int width; - if (s.key) { - const auto key_size = get_map_key_size(s.map_fd_reg).singleton(); - if (!key_size.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Map key size is not singleton"); - return; - } - width = key_size->narrow(); - } else { - const auto value_size = get_map_value_size(s.map_fd_reg).singleton(); - if (!value_size.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Map value size is not singleton"); - return; - } - width = value_size->narrow(); - } - - m_inv = type_inv.join_over_types(m_inv, s.access_reg, [&](NumAbsDomain& inv, type_encoding_t access_reg_type) { - if (access_reg_type == T_STACK) { - variable_t lb = access_reg.stack_offset; - linear_expression_t ub = lb + width; - if (!stack.all_num(inv, lb, ub)) { - auto lb_is = inv[lb].lb().number(); - std::string lb_s = lb_is && lb_is->fits() ? std::to_string(lb_is->narrow()) : "-oo"; - auto ub_is = inv.eval_interval(ub).ub().number(); - std::string ub_s = ub_is && ub_is->fits() ? std::to_string(ub_is->narrow()) : "oo"; - require(inv, linear_constraint_t::false_const(), - "Illegal map update with a non-numerical value [" + lb_s + "-" + ub_s + ")"); - } else if (thread_local_options.strict && fd_type.has_value()) { - EbpfMapType map_type = global_program_info->platform->get_map_type(*fd_type); - if (map_type.is_array) { - // Get offset value. - variable_t key_ptr = access_reg.stack_offset; - std::optional offset = inv[key_ptr].singleton(); - if (!offset.has_value()) { - require(inv, linear_constraint_t::false_const(), "Pointer must be a singleton"); - } else if (s.key) { - // Look up the value pointed to by the key pointer. - variable_t key_value = - variable_t::cell_var(data_kind_t::svalues, offset.value(), sizeof(uint32_t)); - - if (auto max_entries = get_map_max_entries(s.map_fd_reg).lb().number()) { - require(inv, key_value < *max_entries, "Array index overflow"); - } else { - require(inv, linear_constraint_t::false_const(), "Max entries is not finite"); - } - require(inv, key_value >= 0, "Array index underflow"); - } - } - } - } else if (access_reg_type == T_PACKET) { - variable_t lb = access_reg.packet_offset; - linear_expression_t ub = lb + width; - check_access_packet(inv, lb, ub, {}); - // Packet memory is both readable and writable. - } else if (access_reg_type == T_SHARED) { - variable_t lb = access_reg.shared_offset; - linear_expression_t ub = lb + width; - check_access_shared(inv, lb, ub, access_reg.shared_region_size); - require(inv, access_reg.svalue > 0, "Possible null access"); - // Shared memory is zero-initialized when created so is safe to read and write. - } else { - require(inv, linear_constraint_t::false_const(), "Only stack or packet can be used as a parameter"); - } - }); -} -static std::tuple lb_ub_access_pair(const ValidAccess& s, - const variable_t offset_var) { - using namespace crab::dsl_syntax; - linear_expression_t lb = offset_var + s.offset; - linear_expression_t ub = std::holds_alternative(s.width) ? lb + std::get(s.width).v - : lb + reg_pack(std::get(s.width)).svalue; - return {lb, ub}; -} -void ebpf_domain_t::operator()(const ValidAccess& s) { - using namespace crab::dsl_syntax; - - const bool is_comparison_check = s.width == Value{Imm{0}}; - - const auto reg = reg_pack(s.reg); - // join_over_types instead of simple iteration is only needed for assume-assert - m_inv = type_inv.join_over_types(m_inv, s.reg, [&](NumAbsDomain& inv, type_encoding_t type) { - switch (type) { - case T_PACKET: { - auto [lb, ub] = lb_ub_access_pair(s, reg.packet_offset); - check_access_packet(inv, lb, ub, - is_comparison_check ? std::optional{} : variable_t::packet_size()); - // if within bounds, it can never be null - // Context memory is both readable and writable. - break; - } - case T_STACK: { - auto [lb, ub] = lb_ub_access_pair(s, reg.stack_offset); - check_access_stack(inv, lb, ub, s.call_stack_depth); - // if within bounds, it can never be null - if (s.access_type == AccessType::read) { - // Require that the stack range contains numbers. - if (!stack.all_num(inv, lb, ub)) { - if (s.offset < 0) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } else if (const auto pimm = std::get_if(&s.width)) { - if (!inv.entail(gsl::narrow(pimm->v) <= reg.stack_numeric_size - s.offset)) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } - } else { - if (!inv.entail(reg_pack(std::get(s.width)).svalue <= reg.stack_numeric_size - s.offset)) { - require(inv, linear_constraint_t::false_const(), "Stack content is not numeric"); - } - } - } - } - break; - } - case T_CTX: { - auto [lb, ub] = lb_ub_access_pair(s, reg.ctx_offset); - check_access_context(inv, lb, ub); - // if within bounds, it can never be null - // The context is both readable and writable. - break; - } - case T_SHARED: { - auto [lb, ub] = lb_ub_access_pair(s, reg.shared_offset); - check_access_shared(inv, lb, ub, reg.shared_region_size); - if (!is_comparison_check && !s.or_null) { - require(inv, reg.svalue > 0, "Possible null access"); - } - // Shared memory is zero-initialized when created so is safe to read and write. - break; - } - case T_NUM: - if (!is_comparison_check) { - if (s.or_null) { - require(inv, reg.svalue == 0, "Non-null number"); - } else { - require(inv, linear_constraint_t::false_const(), "Only pointers can be dereferenced"); - } - } - break; - case T_MAP: - case T_MAP_PROGRAMS: - if (!is_comparison_check) { - require(inv, linear_constraint_t::false_const(), "FDs cannot be dereferenced directly"); - } - break; - default: require(inv, linear_constraint_t::false_const(), "Invalid type"); break; - } - }); -} - -void ebpf_domain_t::operator()(const ZeroCtxOffset& s) { - using namespace crab::dsl_syntax; - const auto reg = reg_pack(s.reg); - require(m_inv, reg.ctx_offset == 0, "Nonzero context offset"); -} - -void ebpf_domain_t::operator()(const Packet& a) { - const auto reg = reg_pack(R0_RETURN_VALUE); - constexpr Reg r0_reg{R0_RETURN_VALUE}; - type_inv.assign_type(m_inv, r0_reg, T_NUM); - havoc_offsets(r0_reg); - havoc(reg.svalue); - havoc(reg.uvalue); - scratch_caller_saved_registers(); -} - -void ebpf_domain_t::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, - const int width, const Reg& src_reg) { - type_inv.assign_type(inv, target_reg, stack.load(inv, data_kind_t::types, addr, width)); - using namespace crab::dsl_syntax; - if (inv.entail(width <= reg_pack(src_reg).stack_numeric_size)) { - type_inv.assign_type(inv, target_reg, T_NUM); - } - - const reg_pack_t& target = reg_pack(target_reg); - if (width == 1 || width == 2 || width == 4 || width == 8) { - // Use the addr before we havoc the destination register since we might be getting the - // addr from that same register. - const std::optional sresult = stack.load(inv, data_kind_t::svalues, addr, width); - const std::optional uresult = stack.load(inv, data_kind_t::uvalues, addr, width); - havoc_register(inv, target_reg); - inv.assign(target.svalue, sresult); - inv.assign(target.uvalue, uresult); - - if (type_inv.has_type(inv, target.type, T_CTX)) { - inv.assign(target.ctx_offset, stack.load(inv, data_kind_t::ctx_offsets, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_MAP) || type_inv.has_type(inv, target.type, T_MAP_PROGRAMS)) { - inv.assign(target.map_fd, stack.load(inv, data_kind_t::map_fds, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_PACKET)) { - inv.assign(target.packet_offset, stack.load(inv, data_kind_t::packet_offsets, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_SHARED)) { - inv.assign(target.shared_offset, stack.load(inv, data_kind_t::shared_offsets, addr, width)); - inv.assign(target.shared_region_size, stack.load(inv, data_kind_t::shared_region_sizes, addr, width)); - } - if (type_inv.has_type(inv, target.type, T_STACK)) { - inv.assign(target.stack_offset, stack.load(inv, data_kind_t::stack_offsets, addr, width)); - inv.assign(target.stack_numeric_size, stack.load(inv, data_kind_t::stack_numeric_sizes, addr, width)); - } - } else { - havoc_register(inv, target_reg); - } -} - -void ebpf_domain_t::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, - const int width) { - using namespace crab::dsl_syntax; - if (inv.is_bottom()) { - return; - } - - const ebpf_context_descriptor_t* desc = global_program_info->type.context_descriptor; - - const reg_pack_t& target = reg_pack(target_reg); - - if (desc->end < 0) { - havoc_register(inv, target_reg); - type_inv.assign_type(inv, target_reg, T_NUM); - return; - } - - const interval_t interval = inv.eval_interval(addr_vague); - const std::optional maybe_addr = interval.singleton(); - havoc_register(inv, target_reg); - - const bool may_touch_ptr = - interval.contains(desc->data) || interval.contains(desc->meta) || interval.contains(desc->end); - - if (!maybe_addr) { - if (may_touch_ptr) { - type_inv.havoc_type(inv, target_reg); - } else { - type_inv.assign_type(inv, target_reg, T_NUM); - } - return; +// We can deal with a range of key sizes. +interval_t ebpf_domain_t::get_map_key_size(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } - const number_t addr = *maybe_addr; - - // We use offsets for packet data, data_end, and meta during verification, - // but at runtime they will be 64-bit pointers. We can use the offset values - // for verification like we use map_fd's as a proxy for maps which - // at runtime are actually 64-bit memory pointers. - const int offset_width = desc->end - desc->data; - if (addr == desc->data) { - if (width == offset_width) { - inv.assign(target.packet_offset, 0); - } - } else if (addr == desc->end) { - if (width == offset_width) { - inv.assign(target.packet_offset, variable_t::packet_size()); - } - } else if (addr == desc->meta) { - if (width == offset_width) { - inv.assign(target.packet_offset, variable_t::meta_offset()); - } - } else { - if (may_touch_ptr) { - type_inv.havoc_type(inv, target_reg); + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t{map->key_size}; } else { - type_inv.assign_type(inv, target_reg, T_NUM); + return interval_t::top(); } - return; - } - if (width == offset_width) { - type_inv.assign_type(inv, target_reg, T_PACKET); - inv += 4098 <= target.svalue; - inv += target.svalue <= PTR_MAX; - } -} - -void ebpf_domain_t::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, - const int width) { - if (inv.is_bottom()) { - return; - } - const reg_pack_t& target = reg_pack(target_reg); - - type_inv.assign_type(inv, target_reg, T_NUM); - havoc_register(inv, target_reg); - - // A 1 or 2 byte copy results in a limited range of values that may be used as array indices. - if (width == 1) { - inv.set(target.svalue, interval_t::full()); - inv.set(target.uvalue, interval_t::full()); - } else if (width == 2) { - inv.set(target.svalue, interval_t::full()); - inv.set(target.uvalue, interval_t::full()); - } -} - -void ebpf_domain_t::do_load(const Mem& b, const Reg& target_reg) { - using namespace crab::dsl_syntax; - - const auto mem_reg = reg_pack(b.access.basereg); - const int width = b.access.width; - const int offset = b.access.offset; - - if (b.access.basereg.v == R10_STACK_POINTER) { - const linear_expression_t addr = mem_reg.stack_offset + offset; - do_load_stack(m_inv, target_reg, addr, width, b.access.basereg); - return; } - - m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, type_encoding_t type) { - switch (type) { - case T_UNINIT: return; - case T_MAP: return; - case T_MAP_PROGRAMS: return; - case T_NUM: return; - case T_CTX: { - linear_expression_t addr = mem_reg.ctx_offset + offset; - do_load_ctx(inv, target_reg, addr, width); - break; - } - case T_STACK: { - linear_expression_t addr = mem_reg.stack_offset + offset; - do_load_stack(inv, target_reg, addr, width, b.access.basereg); - break; - } - case T_PACKET: { - linear_expression_t addr = mem_reg.packet_offset + offset; - do_load_packet_or_shared(inv, target_reg, addr, width); - break; - } - default: { - linear_expression_t addr = mem_reg.shared_offset + offset; - do_load_packet_or_shared(inv, target_reg, addr, width); - break; - } - } - }); + return result; } -void ebpf_domain_t::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, - const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { - { - const std::optional var = stack.store_type(inv, addr, width, val_type); - type_inv.assign_type(inv, var, val_type); - } - if (width == 8) { - inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); - inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_CTX)) { - inv.assign(stack.store(inv, data_kind_t::ctx_offsets, addr, width, opt_val_reg->ctx_offset), - opt_val_reg->ctx_offset); - } else { - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - } - - if (opt_val_reg && - (type_inv.has_type(m_inv, val_type, T_MAP) || type_inv.has_type(m_inv, val_type, T_MAP_PROGRAMS))) { - inv.assign(stack.store(inv, data_kind_t::map_fds, addr, width, opt_val_reg->map_fd), opt_val_reg->map_fd); - } else { - stack.havoc(inv, data_kind_t::map_fds, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_PACKET)) { - inv.assign(stack.store(inv, data_kind_t::packet_offsets, addr, width, opt_val_reg->packet_offset), - opt_val_reg->packet_offset); - } else { - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_SHARED)) { - inv.assign(stack.store(inv, data_kind_t::shared_offsets, addr, width, opt_val_reg->shared_offset), - opt_val_reg->shared_offset); - inv.assign(stack.store(inv, data_kind_t::shared_region_sizes, addr, width, opt_val_reg->shared_region_size), - opt_val_reg->shared_region_size); - } else { - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - } - - if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_STACK)) { - inv.assign(stack.store(inv, data_kind_t::stack_offsets, addr, width, opt_val_reg->stack_offset), - opt_val_reg->stack_offset); - inv.assign(stack.store(inv, data_kind_t::stack_numeric_sizes, addr, width, opt_val_reg->stack_numeric_size), - opt_val_reg->stack_numeric_size); - } else { - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); - } - } else { - if ((width == 1 || width == 2 || width == 4) && type_inv.get_type(m_inv, val_type) == T_NUM) { - // Keep track of numbers on the stack that might be used as array indices. - inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); - inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); - } else { - stack.havoc(inv, data_kind_t::svalues, addr, width); - stack.havoc(inv, data_kind_t::uvalues, addr, width); - } - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - stack.havoc(inv, data_kind_t::map_fds, addr, width); - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); - } - - // Update stack_numeric_size for any stack type variables. - // stack_numeric_size holds the number of continuous bytes starting from stack_offset that are known to be numeric. - auto updated_lb = m_inv.eval_interval(addr).lb(); - auto updated_ub = m_inv.eval_interval(addr).ub() + width; - for (const variable_t type_variable : variable_t::get_type_variables()) { - if (!type_inv.has_type(inv, type_variable, T_STACK)) { - continue; - } - const variable_t stack_offset_variable = variable_t::kind_var(data_kind_t::stack_offsets, type_variable); - const variable_t stack_numeric_size_variable = - variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); - - using namespace crab::dsl_syntax; - // See if the variable's numeric interval overlaps with changed bytes. - if (m_inv.intersect(dsl_syntax::operator<=(addr, stack_offset_variable + stack_numeric_size_variable)) && - m_inv.intersect(operator>=(addr + width, stack_offset_variable))) { - havoc(stack_numeric_size_variable); - recompute_stack_numeric_size(m_inv, type_variable); - } +// We can deal with a range of value sizes. +interval_t ebpf_domain_t::get_map_value_size(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } -} -void ebpf_domain_t::operator()(const Mem& b) { - if (m_inv.is_bottom()) { - return; - } - if (const auto preg = std::get_if(&b.value)) { - if (b.is_load) { - do_load(b, *preg); + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t(map->value_size); } else { - const auto data_reg = reg_pack(*preg); - do_mem_store(b, data_reg.type, data_reg.svalue, data_reg.uvalue, data_reg); - } - } else { - const uint64_t imm = std::get(b.value).v; - do_mem_store(b, T_NUM, to_signed(imm), imm, {}); - } -} - -void ebpf_domain_t::do_mem_store(const Mem& b, const linear_expression_t& val_type, - const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, - const std::optional& opt_val_reg) { - if (m_inv.is_bottom()) { - return; - } - const int width = b.access.width; - const number_t offset{b.access.offset}; - if (b.access.basereg.v == R10_STACK_POINTER) { - const auto r10_stack_offset = reg_pack(b.access.basereg).stack_offset; - const auto r10_interval = m_inv.eval_interval(r10_stack_offset); - if (r10_interval.is_singleton()) { - const int32_t stack_offset = r10_interval.singleton()->cast_to(); - const number_t base_addr{stack_offset}; - do_store_stack(m_inv, base_addr + offset, width, val_type, val_svalue, val_uvalue, opt_val_reg); - } - return; - } - m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_STACK) { - const auto base_addr = linear_expression_t(get_type_offset_variable(b.access.basereg, type).value()); - do_store_stack(inv, dsl_syntax::operator+(base_addr, offset), width, val_type, val_svalue, val_uvalue, - opt_val_reg); - } - // do nothing for any other type - }); -} - -// Construct a Bin operation that does the main operation that a given Atomic operation does atomically. -static Bin atomic_to_bin(const Atomic& a) { - Bin bin{.dst = Reg{R11_ATOMIC_SCRATCH}, .v = a.valreg, .is64 = a.access.width == sizeof(uint64_t), .lddw = false}; - switch (a.op) { - case Atomic::Op::ADD: bin.op = Bin::Op::ADD; break; - case Atomic::Op::OR: bin.op = Bin::Op::OR; break; - case Atomic::Op::AND: bin.op = Bin::Op::AND; break; - case Atomic::Op::XOR: bin.op = Bin::Op::XOR; break; - case Atomic::Op::XCHG: - case Atomic::Op::CMPXCHG: bin.op = Bin::Op::MOV; break; - default: throw std::exception(); - } - return bin; -} - -void ebpf_domain_t::operator()(const Atomic& a) { - if (m_inv.is_bottom()) { - return; - } - if (!m_inv.entail(type_is_pointer(reg_pack(a.access.basereg))) || - !m_inv.entail(type_is_number(reg_pack(a.valreg)))) { - return; - } - if (m_inv.entail(type_is_not_stack(reg_pack(a.access.basereg)))) { - // Shared memory regions are volatile so we can just havoc - // any register that will be updated. - if (a.op == Atomic::Op::CMPXCHG) { - havoc_register(m_inv, Reg{R0_RETURN_VALUE}); - } else if (a.fetch) { - havoc_register(m_inv, a.valreg); - } - return; - } - - // Fetch the current value into the R11 pseudo-register. - constexpr Reg r11{R11_ATOMIC_SCRATCH}; - (*this)(Mem{.access = a.access, .value = r11, .is_load = true}); - - // Compute the new value in R11. - (*this)(atomic_to_bin(a)); - - if (a.op == Atomic::Op::CMPXCHG) { - // For CMPXCHG, store the original value in r0. - (*this)(Mem{.access = a.access, .value = Reg{R0_RETURN_VALUE}, .is_load = true}); - - // For the destination, there are 3 possibilities: - // 1) dst.value == r0.value : set R11 to valreg - // 2) dst.value != r0.value : don't modify R11 - // 3) dst.value may or may not == r0.value : set R11 to the union of R11 and valreg - // For now we just havoc the value of R11. - havoc_register(m_inv, r11); - } else if (a.fetch) { - // For other FETCH operations, store the original value in the src register. - (*this)(Mem{.access = a.access, .value = a.valreg, .is_load = true}); - } - - // Store the new value back in the original shared memory location. - // Note that do_mem_store() currently doesn't track shared memory values, - // but stack memory values are tracked and are legal here. - (*this)(Mem{.access = a.access, .value = r11, .is_load = false}); - - // Clear the R11 pseudo-register. - havoc_register(m_inv, r11); - type_inv.havoc_type(m_inv, r11); -} - -void ebpf_domain_t::operator()(const Call& call) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - std::optional maybe_fd_reg{}; - for (ArgSingle param : call.singles) { - switch (param.kind) { - case ArgSingle::Kind::MAP_FD: maybe_fd_reg = param.reg; break; - case ArgSingle::Kind::ANYTHING: - case ArgSingle::Kind::MAP_FD_PROGRAMS: - case ArgSingle::Kind::PTR_TO_MAP_KEY: - case ArgSingle::Kind::PTR_TO_MAP_VALUE: - case ArgSingle::Kind::PTR_TO_CTX: - // Do nothing. We don't track the content of relevant memory regions - break; - } - } - for (ArgPair param : call.pairs) { - switch (param.kind) { - case ArgPair::Kind::PTR_TO_READABLE_MEM_OR_NULL: - case ArgPair::Kind::PTR_TO_READABLE_MEM: - // Do nothing. No side effect allowed. - break; - - case ArgPair::Kind::PTR_TO_WRITABLE_MEM: { - bool store_numbers = true; - auto variable = get_type_offset_variable(param.mem); - if (!variable.has_value()) { - require(m_inv, linear_constraint_t::false_const(), "Argument must be a pointer to writable memory"); - return; - } - variable_t addr = variable.value(); - variable_t width = reg_pack(param.size).svalue; - - m_inv = type_inv.join_over_types(m_inv, param.mem, [&](NumAbsDomain& inv, const type_encoding_t type) { - if (type == T_STACK) { - // Pointer to a memory region that the called function may change, - // so we must havoc. - stack.havoc(inv, data_kind_t::types, addr, width); - stack.havoc(inv, data_kind_t::svalues, addr, width); - stack.havoc(inv, data_kind_t::uvalues, addr, width); - stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); - stack.havoc(inv, data_kind_t::map_fds, addr, width); - stack.havoc(inv, data_kind_t::packet_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_offsets, addr, width); - stack.havoc(inv, data_kind_t::stack_offsets, addr, width); - stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); - } else { - store_numbers = false; - } - }); - if (store_numbers) { - // Functions are not allowed to write sensitive data, - // and initialization is guaranteed - stack.store_numbers(m_inv, addr, width); - } - } - } - } - - constexpr Reg r0_reg{R0_RETURN_VALUE}; - const auto r0_pack = reg_pack(r0_reg); - havoc(r0_pack.stack_numeric_size); - if (call.is_map_lookup) { - // This is the only way to get a null pointer - if (maybe_fd_reg) { - if (const auto map_type = get_map_type(*maybe_fd_reg)) { - if (global_program_info->platform->get_map_type(*map_type).value_type == EbpfMapValueType::MAP) { - if (const auto inner_map_fd = get_map_inner_map_fd(*maybe_fd_reg)) { - do_load_mapfd(r0_reg, to_signed(*inner_map_fd), true); - goto out; - } - } else { - assign_valid_ptr(r0_reg, true); - assign(r0_pack.shared_offset, 0); - m_inv.set(r0_pack.shared_region_size, get_map_value_size(*maybe_fd_reg)); - type_inv.assign_type(m_inv, r0_reg, T_SHARED); - } - } - } - assign_valid_ptr(r0_reg, true); - assign(r0_pack.shared_offset, 0); - type_inv.assign_type(m_inv, r0_reg, T_SHARED); - } else { - havoc(r0_pack.svalue); - havoc(r0_pack.uvalue); - havoc_offsets(r0_reg); - type_inv.assign_type(m_inv, r0_reg, T_NUM); - // assume(r0_pack.value < 0); for INTEGER_OR_NO_RETURN_IF_SUCCEED. - } -out: - scratch_caller_saved_registers(); - if (call.reallocate_packet) { - forget_packet_pointers(); - } -} - -void ebpf_domain_t::operator()(const CallLocal& call) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - save_callee_saved_registers(call.stack_frame_prefix); -} - -void ebpf_domain_t::operator()(const Callx& callx) { - using namespace crab::dsl_syntax; - if (m_inv.is_bottom()) { - return; - } - - // Look up the helper function id. - const reg_pack_t& reg = reg_pack(callx.func); - const auto src_interval = m_inv.eval_interval(reg.svalue); - if (const auto sn = src_interval.singleton()) { - if (sn->fits()) { - // We can now process it as if the id was immediate. - const int32_t imm = sn->cast_to(); - if (!global_program_info->platform->is_helper_usable(imm)) { - return; - } - const Call call = make_call(imm, *global_program_info->platform); - (*this)(call); - } - } -} - -void ebpf_domain_t::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { - const EbpfMapDescriptor& desc = global_program_info->platform->get_map_descriptor(mapfd); - const EbpfMapType& type = global_program_info->platform->get_map_type(desc.type); - if (type.value_type == EbpfMapValueType::PROGRAM) { - type_inv.assign_type(m_inv, dst_reg, T_MAP_PROGRAMS); - } else { - type_inv.assign_type(m_inv, dst_reg, T_MAP); - } - const reg_pack_t& dst = reg_pack(dst_reg); - assign(dst.map_fd, mapfd); - assign_valid_ptr(dst_reg, maybe_null); -} - -void ebpf_domain_t::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } - -void ebpf_domain_t::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { - using namespace crab::dsl_syntax; - const reg_pack_t& reg = reg_pack(dst_reg); - havoc(reg.svalue); - havoc(reg.uvalue); - if (maybe_null) { - m_inv += 0 <= reg.svalue; - } else { - m_inv += 0 < reg.svalue; - } - m_inv += reg.svalue <= PTR_MAX; - assign(reg.uvalue, reg.svalue); -} - -// If nothing is known of the stack_numeric_size, -// try to recompute the stack_numeric_size. -void ebpf_domain_t::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { - const variable_t stack_numeric_size_variable = - variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); - - if (!inv.eval_interval(stack_numeric_size_variable).is_top()) { - return; - } - - if (type_inv.has_type(inv, type_variable, T_STACK)) { - const int numeric_size = - stack.min_all_num_size(inv, variable_t::kind_var(data_kind_t::stack_offsets, type_variable)); - if (numeric_size > 0) { - inv.assign(stack_numeric_size_variable, numeric_size); - } - } -} - -void ebpf_domain_t::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { - recompute_stack_numeric_size(inv, reg_pack(reg).type); -} - -void ebpf_domain_t::add(const Reg& reg, const int imm, const int finite_width) { - const auto dst = reg_pack(reg); - const auto offset = get_type_offset_variable(reg); - add_overflow(dst.svalue, dst.uvalue, imm, finite_width); - if (offset.has_value()) { - add(offset.value(), imm); - if (imm > 0) { - // Since the start offset is increasing but - // the end offset is not, the numeric size decreases. - sub(dst.stack_numeric_size, imm); - } else if (imm < 0) { - havoc(dst.stack_numeric_size); - } - recompute_stack_numeric_size(m_inv, reg); - } -} - -void ebpf_domain_t::shl(const Reg& dst_reg, int imm, const int finite_width) { - const reg_pack_t dst = reg_pack(dst_reg); - - // The BPF ISA requires masking the imm. - imm &= finite_width - 1; - - if (m_inv.entail(type_is_number(dst))) { - const auto interval = m_inv.eval_interval(dst.uvalue); - if (interval.finite_size()) { - const number_t lb = interval.lb().number().value(); - const number_t ub = interval.ub().number().value(); - uint64_t lb_n = lb.cast_to(); - uint64_t ub_n = ub.cast_to(); - const uint64_t uint_max = finite_width == 64 ? uint64_t{std::numeric_limits::max()} - : uint64_t{std::numeric_limits::max()}; - if (lb_n >> (finite_width - imm) != ub_n >> (finite_width - imm)) { - // The bits that will be shifted out to the left are different, - // which means all combinations of remaining bits are possible. - lb_n = 0; - ub_n = uint_max << imm & uint_max; - } else { - // The bits that will be shifted out to the left are identical - // for all values in the interval, so we can safely shift left - // to get a new interval. - lb_n = lb_n << imm & uint_max; - ub_n = ub_n << imm & uint_max; - } - m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.svalue, dst.uvalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; - } - } - shl_overflow(dst.svalue, dst.uvalue, imm); - havoc_offsets(dst_reg); -} - -void ebpf_domain_t::lshr(const Reg& dst_reg, int imm, int finite_width) { - reg_pack_t dst = reg_pack(dst_reg); - - // The BPF ISA requires masking the imm. - imm &= finite_width - 1; - - if (m_inv.entail(type_is_number(dst))) { - auto interval = m_inv.eval_interval(dst.uvalue); - number_t lb_n{0}; - number_t ub_n{std::numeric_limits::max() >> imm}; - if (interval.finite_size()) { - number_t lb = interval.lb().number().value(); - number_t ub = interval.ub().number().value(); - if (finite_width == 64) { - lb_n = lb.cast_to() >> imm; - ub_n = ub.cast_to() >> imm; - } else { - number_t lb_w = lb.cast_to_sint(finite_width); - number_t ub_w = ub.cast_to_sint(finite_width); - lb_n = lb_w.cast_to() >> imm; - ub_n = ub_w.cast_to() >> imm; - - // The interval must be valid since a signed range crossing 0 - // was earlier converted to a full unsigned range. - assert(lb_n <= ub_n); - } + return interval_t::top(); } - m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.svalue, dst.uvalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(dst_reg); -} - -static int _movsx_bits(const Bin::Op op) { - switch (op) { - case Bin::Op::MOVSX8: return 8; - case Bin::Op::MOVSX16: return 16; - case Bin::Op::MOVSX32: return 32; - default: throw std::exception(); } + return result; } -void ebpf_domain_t::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, - const Bin::Op op) { - using namespace crab; - - const int bits = _movsx_bits(op); - const reg_pack_t dst = reg_pack(dst_reg); - interval_t right_interval = m_inv.eval_interval(right_svalue); - type_inv.assign_type(m_inv, dst_reg, T_NUM); - havoc_offsets(dst_reg); - const int64_t span = 1ULL << bits; - if (right_interval.ub() - right_interval.lb() >= span) { - // Interval covers the full space. - if (bits == 64) { - havoc(dst.svalue); - return; - } - right_interval = interval_t::signed_int(bits); - } - const int64_t mask = 1ULL << (bits - 1); - - // Sign extend each bound. - int64_t lb = right_interval.lb().number().value().cast_to(); - lb &= span - 1; - lb = (lb ^ mask) - mask; - int64_t ub = right_interval.ub().number().value().cast_to(); - ub &= span - 1; - ub = (ub ^ mask) - mask; - m_inv.set(dst.svalue, interval_t{lb, ub}); - - if (finite_width) { - m_inv.assign(dst.uvalue, dst.svalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); +// We can deal with a range of max_entries values. +interval_t ebpf_domain_t::get_map_max_entries(const Reg& map_fd_reg) const { + int start_fd, end_fd; + if (!get_map_fd_range(map_fd_reg, &start_fd, &end_fd)) { + return interval_t::top(); } -} - -void ebpf_domain_t::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { - using namespace crab; - reg_pack_t dst = reg_pack(dst_reg); - if (m_inv.entail(type_is_number(dst))) { - interval_t left_interval = interval_t::bottom(); - interval_t right_interval = interval_t::bottom(); - interval_t left_interval_positive = interval_t::bottom(); - interval_t left_interval_negative = interval_t::bottom(); - get_signed_intervals(m_inv, finite_width == 64, dst.svalue, dst.uvalue, right_svalue, left_interval, - right_interval, left_interval_positive, left_interval_negative); - if (auto sn = right_interval.singleton()) { - // The BPF ISA requires masking the imm. - int64_t imm = sn->cast_to() & (finite_width - 1); - - int64_t lb_n = std::numeric_limits::min() >> imm; - int64_t ub_n = std::numeric_limits::max() >> imm; - if (left_interval.finite_size()) { - const auto [lb, ub] = left_interval.pair_number(); - if (finite_width == 64) { - lb_n = lb.cast_to() >> imm; - ub_n = ub.cast_to() >> imm; - } else { - number_t lb_w = lb.cast_to_sint(finite_width) >> gsl::narrow(imm); - number_t ub_w = ub.cast_to_sint(finite_width) >> gsl::narrow(imm); - if (lb_w.cast_to() <= ub_w.cast_to()) { - lb_n = lb_w.cast_to(); - ub_n = ub_w.cast_to(); - } - } - } - m_inv.set(dst.svalue, interval_t{lb_n, ub_n}); - m_inv.assign(dst.uvalue, dst.svalue); - overflow_signed(m_inv, dst.svalue, finite_width); - overflow_unsigned(m_inv, dst.uvalue, finite_width); - return; + interval_t result = interval_t::bottom(); + for (int map_fd = start_fd; map_fd <= end_fd; map_fd++) { + if (const EbpfMapDescriptor* map = &global_program_info->platform->get_map_descriptor(map_fd)) { + result = result | interval_t(map->max_entries); + } else { + return interval_t::top(); } } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(dst_reg); -} - -static void apply(NumAbsDomain& inv, const binop_t& op, const variable_t x, const variable_t y, const variable_t z) { - inv.apply(op, x, y, z, 0); + return result; } -void ebpf_domain_t::operator()(const Bin& bin) { - using namespace crab::dsl_syntax; - - auto dst = reg_pack(bin.dst); - int finite_width = bin.is64 ? 64 : 32; - - if (auto pimm = std::get_if(&bin.v)) { - // dst += K - int64_t imm; - if (bin.is64) { - // Use the full signed value. - imm = to_signed(pimm->v); - } else { - // Use only the low 32 bits of the value. - imm = gsl::narrow_cast(pimm->v); - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - // If this is a 32-bit operation and the destination is not a number, forget everything about the register. - if (!type_inv.has_type(m_inv, bin.dst, T_NUM)) { - havoc_register(m_inv, bin.dst); - havoc_offsets(bin.dst); - havoc(dst.type); - } - } - switch (bin.op) { - case Bin::Op::MOV: - assign(dst.svalue, imm); - assign(dst.uvalue, imm); - overflow_unsigned(m_inv, dst.uvalue, bin.is64 ? 64 : 32); - type_inv.assign_type(m_inv, bin.dst, T_NUM); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOVSX8: - case Bin::Op::MOVSX16: - case Bin::Op::MOVSX32: CRAB_ERROR("Unsupported operation"); - case Bin::Op::ADD: - if (imm == 0) { - return; - } - add(bin.dst, gsl::narrow(imm), finite_width); - break; - case Bin::Op::SUB: - if (imm == 0) { - return; - } - add(bin.dst, gsl::narrow(-imm), finite_width); - break; - case Bin::Op::MUL: - mul(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UDIV: - udiv(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UMOD: - urem(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SDIV: - sdiv(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SMOD: - srem(dst.svalue, dst.uvalue, imm, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::OR: - bitwise_or(dst.svalue, dst.uvalue, imm); - havoc_offsets(bin.dst); - break; - case Bin::Op::AND: - // FIX: what to do with ptr&-8 as in counter/simple_loop_unrolled? - bitwise_and(dst.svalue, dst.uvalue, imm); - if (gsl::narrow(imm) > 0) { - // AND with immediate is only a 32-bit operation so svalue and uvalue are the same. - assume(dst.svalue <= imm); - assume(dst.uvalue <= imm); - assume(0 <= dst.svalue); - assume(0 <= dst.uvalue); - } - havoc_offsets(bin.dst); - break; - case Bin::Op::LSH: shl(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::RSH: lshr(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::ARSH: ashr(bin.dst, gsl::narrow(imm), finite_width); break; - case Bin::Op::XOR: - bitwise_xor(dst.svalue, dst.uvalue, imm); - havoc_offsets(bin.dst); - break; - } - } else { - // dst op= src - auto src_reg = std::get(bin.v); - auto src = reg_pack(src_reg); - switch (bin.op) { - case Bin::Op::ADD: { - if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { - // both must be numbers - add_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); - } else { - // Here we're not sure that lhs and rhs are the same type; they might be. - // But previous assertions should fail unless we know that exactly one of lhs or rhs is a pointer. - m_inv = - type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t dst_type) { - inv = type_inv.join_over_types( - inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t src_type) { - if (dst_type == T_NUM && src_type != T_NUM) { - // num += ptr - type_inv.assign_type(inv, bin.dst, src_type); - if (const auto dst_offset = get_type_offset_variable(bin.dst, src_type)) { - crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst.svalue, - get_type_offset_variable(src_reg, src_type).value()); - } - if (src_type == T_SHARED) { - inv.assign(dst.shared_region_size, src.shared_region_size); - } - } else if (dst_type != T_NUM && src_type == T_NUM) { - // ptr += num - type_inv.assign_type(inv, bin.dst, dst_type); - if (const auto dst_offset = get_type_offset_variable(bin.dst, dst_type)) { - crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst_offset.value(), - src.svalue); - if (dst_type == T_STACK) { - // Reduce the numeric size. - using namespace crab::dsl_syntax; - if (m_inv.intersect(src.svalue < 0)) { - inv -= dst.stack_numeric_size; - recompute_stack_numeric_size(inv, dst.type); - } else { - apply_signed(inv, arith_binop_t::SUB, dst.stack_numeric_size, - dst.stack_numeric_size, dst.stack_numeric_size, src.svalue, - 0); - } - } - } - } else if (dst_type == T_NUM && src_type == T_NUM) { - // dst and src don't necessarily have the same type, but among the possibilities - // enumerated is the case where they are both numbers. - apply_signed(inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, - src.svalue, finite_width); - } else { - // We ignore the cases here that do not match the assumption described - // above. Joining bottom with another results will leave the other - // results unchanged. - inv.set_to_bottom(); - } - }); - }); - // careful: change dst.value only after dealing with offset - apply_signed(m_inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, src.svalue, finite_width); - } - break; - } - case Bin::Op::SUB: { - if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { - // src and dest have the same type. - m_inv = type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t type) { - switch (type) { - case T_NUM: - // This is: sub_overflow(inv, dst.value, src.value, finite_width); - apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst.svalue, src.svalue, - finite_width); - type_inv.assign_type(inv, bin.dst, T_NUM); - crab::havoc_offsets(inv, bin.dst); - break; - default: - // ptr -= ptr - // Assertions should make sure we only perform this on non-shared pointers. - if (const auto dst_offset = get_type_offset_variable(bin.dst, type)) { - apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst_offset.value(), - get_type_offset_variable(src_reg, type).value(), finite_width); - inv -= dst_offset.value(); - } - crab::havoc_offsets(inv, bin.dst); - type_inv.assign_type(inv, bin.dst, T_NUM); - break; - } - }); - } else { - // We're not sure that lhs and rhs are the same type. - // Either they're different, or at least one is not a singleton. - if (type_inv.get_type(m_inv, std::get(bin.v)) != T_NUM) { - type_inv.havoc_type(m_inv, bin.dst); - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - } else { - sub_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); - if (auto dst_offset = get_type_offset_variable(bin.dst)) { - sub(dst_offset.value(), src.svalue); - if (type_inv.has_type(m_inv, dst.type, T_STACK)) { - // Reduce the numeric size. - using namespace crab::dsl_syntax; - if (m_inv.intersect(src.svalue > 0)) { - m_inv -= dst.stack_numeric_size; - recompute_stack_numeric_size(m_inv, dst.type); - } else { - crab::apply(m_inv, arith_binop_t::ADD, dst.stack_numeric_size, dst.stack_numeric_size, - src.svalue); - } - } - } - } - } - break; - } - case Bin::Op::MUL: - mul(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UDIV: - udiv(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::UMOD: - urem(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SDIV: - sdiv(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::SMOD: - srem(dst.svalue, dst.uvalue, src.svalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::OR: - bitwise_or(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::AND: - bitwise_and(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::LSH: - if (m_inv.entail(type_is_number(src_reg))) { - auto src_interval = m_inv.eval_interval(src.uvalue); - if (std::optional sn = src_interval.singleton()) { - // truncate to uint64? - uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); - if (imm <= std::numeric_limits::max()) { - if (!bin.is64) { - // Use only the low 32 bits of the value. - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - } - shl(bin.dst, gsl::narrow_cast(imm), finite_width); - break; - } - } - } - shl_overflow(dst.svalue, dst.uvalue, src.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::RSH: - if (m_inv.entail(type_is_number(src_reg))) { - auto src_interval = m_inv.eval_interval(src.uvalue); - if (std::optional sn = src_interval.singleton()) { - uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); - if (imm <= std::numeric_limits::max()) { - if (!bin.is64) { - // Use only the low 32 bits of the value. - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); - } - lshr(bin.dst, gsl::narrow_cast(imm), finite_width); - break; - } - } - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::ARSH: - if (m_inv.entail(type_is_number(src_reg))) { - ashr(bin.dst, src.svalue, finite_width); - break; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::XOR: - bitwise_xor(dst.svalue, dst.uvalue, src.uvalue, finite_width); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOVSX8: - case Bin::Op::MOVSX16: - case Bin::Op::MOVSX32: - // Keep relational information if operation is a no-op. - if (dst.svalue == src.svalue && - m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(_movsx_bits(bin.op))) { - return; - } - if (m_inv.entail(type_is_number(src_reg))) { - sign_extend(bin.dst, src.svalue, finite_width, bin.op); - break; - } - havoc(dst.svalue); - havoc(dst.uvalue); - havoc_offsets(bin.dst); - break; - case Bin::Op::MOV: - // Keep relational information if operation is a no-op. - if (dst.svalue == src.svalue && - m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(bin.is64 ? 64 : 32)) { - return; - } - assign(dst.svalue, src.svalue); - assign(dst.uvalue, src.uvalue); - havoc_offsets(bin.dst); - m_inv = type_inv.join_over_types(m_inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t type) { - switch (type) { - case T_CTX: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.ctx_offset, src.ctx_offset); - } - break; - case T_MAP: - case T_MAP_PROGRAMS: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.map_fd, src.map_fd); - } - break; - case T_PACKET: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.packet_offset, src.packet_offset); - } - break; - case T_SHARED: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.shared_region_size, src.shared_region_size); - inv.assign(dst.shared_offset, src.shared_offset); - } - break; - case T_STACK: - if (bin.is64) { - inv.assign(dst.type, type); - inv.assign(dst.stack_offset, src.stack_offset); - inv.assign(dst.stack_numeric_size, src.stack_numeric_size); - } - break; - default: inv.assign(dst.type, type); break; - } - }); - if (bin.is64) { - // Add dst.type=src.type invariant. - if (bin.dst.v != std::get(bin.v).v || type_inv.get_type(m_inv, dst.type) == T_UNINIT) { - // Only forget the destination type if we're copying from a different register, - // or from the same uninitialized register. - havoc(dst.type); - } - type_inv.assign_type(m_inv, bin.dst, std::get(bin.v)); - } - break; - } - } - if (!bin.is64) { - bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); +extended_number ebpf_domain_t::get_loop_count_upper_bound() const { + extended_number ub{0}; + for (const variable_t counter : variable_t::get_loop_counters()) { + ub = std::max(ub, m_inv[counter].ub()); } + return ub; } -string_invariant ebpf_domain_t::to_set() const { return this->m_inv.to_set() + this->stack.to_set(); } - std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom) { if (dom.is_bottom()) { o << "_|_"; @@ -2895,9 +290,9 @@ std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom) { return o; } -void ebpf_domain_t::initialize_packet(ebpf_domain_t& inv) { +void ebpf_domain_t::initialize_packet() { using namespace crab::dsl_syntax; - + ebpf_domain_t& inv = *this; inv -= variable_t::packet_size(); inv -= variable_t::meta_offset(); @@ -2908,7 +303,7 @@ void ebpf_domain_t::initialize_packet(ebpf_domain_t& inv) { inv += variable_t::meta_offset() <= 0; inv += variable_t::meta_offset() >= -4098; } else { - inv.assign(variable_t::meta_offset(), 0); + inv.m_inv.assign(variable_t::meta_offset(), 0); } } @@ -2936,9 +331,9 @@ ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { ebpf_domain_t inv; const auto r10 = reg_pack(R10_STACK_POINTER); constexpr Reg r10_reg{R10_STACK_POINTER}; - inv += EBPF_TOTAL_STACK_SIZE <= r10.svalue; - inv += r10.svalue <= PTR_MAX; - inv.assign(r10.stack_offset, EBPF_TOTAL_STACK_SIZE); + inv.m_inv += EBPF_TOTAL_STACK_SIZE <= r10.svalue; + inv.m_inv += r10.svalue <= PTR_MAX; + inv.m_inv.assign(r10.stack_offset, EBPF_TOTAL_STACK_SIZE); // stack_numeric_size would be 0, but TOP has the same result // so no need to assign it. inv.type_inv.assign_type(inv.m_inv, r10_reg, T_STACK); @@ -2946,30 +341,14 @@ ebpf_domain_t ebpf_domain_t::setup_entry(const bool init_r1) { if (init_r1) { const auto r1 = reg_pack(R1_ARG); constexpr Reg r1_reg{R1_ARG}; - inv += 1 <= r1.svalue; - inv += r1.svalue <= PTR_MAX; - inv.assign(r1.ctx_offset, 0); + inv.m_inv += 1 <= r1.svalue; + inv.m_inv += r1.svalue <= PTR_MAX; + inv.m_inv.assign(r1.ctx_offset, 0); inv.type_inv.assign_type(inv.m_inv, r1_reg, T_CTX); } - initialize_packet(inv); + inv.initialize_packet(); return inv; } -void ebpf_domain_t::initialize_loop_counter(const label_t& label) { - m_inv.assign(variable_t::loop_counter(to_string(label)), 0); -} - -extended_number ebpf_domain_t::get_loop_count_upper_bound() const { - extended_number ub{0}; - for (const variable_t counter : variable_t::get_loop_counters()) { - ub = std::max(ub, m_inv[counter].ub()); - } - return ub; -} - -void ebpf_domain_t::operator()(const IncrementLoopCounter& ins) { - const auto counter = variable_t::loop_counter(to_string(ins.name)); - this->add(counter, 1); -} } // namespace crab diff --git a/src/crab/ebpf_domain.hpp b/src/crab/ebpf_domain.hpp index 9f3cee592..001dcfaf3 100644 --- a/src/crab/ebpf_domain.hpp +++ b/src/crab/ebpf_domain.hpp @@ -15,7 +15,30 @@ namespace crab { +// Pointers in the BPF VM are defined to be 64 bits. Some contexts, like +// data, data_end, and meta in Linux's struct xdp_md are only 32 bit offsets +// from a base address not exposed to the program, but when a program is loaded, +// the offsets get replaced with 64-bit address pointers. However, we currently +// need to do pointer arithmetic on 64-bit numbers so for now we cap the interval +// to 32 bits. +constexpr int MAX_PACKET_SIZE = 0xffff; +constexpr int64_t PTR_MAX = std::numeric_limits::max() - MAX_PACKET_SIZE; + +class ebpf_domain_t; + +void ebpf_domain_transform(ebpf_domain_t& inv, const Instruction& ins); +void ebpf_domain_assume(ebpf_domain_t& dom, const Assertion& assertion); +std::vector ebpf_domain_check(ebpf_domain_t& dom, const label_t& label, const Assertion& assertion); + +// TODO: make this an explicit instruction +void ebpf_domain_initialize_loop_counter(ebpf_domain_t& dom, const label_t& label); + class ebpf_domain_t final { + friend class ebpf_checker; + friend class ebpf_transformer; + + friend std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom); + public: ebpf_domain_t(); ebpf_domain_t(NumAbsDomain inv, domains::array_domain_t stack); @@ -41,118 +64,20 @@ class ebpf_domain_t final { ebpf_domain_t widening_thresholds(const ebpf_domain_t& other, const thresholds_t& ts); ebpf_domain_t narrow(const ebpf_domain_t& other) const; - typedef bool check_require_func_t(NumAbsDomain&, const linear_constraint_t&, std::string); - void set_require_check(std::function f); + static ebpf_domain_t calculate_constant_limits(); extended_number get_loop_count_upper_bound() const; - static ebpf_domain_t setup_entry(bool init_r1); + static ebpf_domain_t setup_entry(bool init_r1); static ebpf_domain_t from_constraints(const std::set& constraints, bool setup_constraints); - string_invariant to_set() const; + void initialize_packet(); - // abstract transformers - void operator()(const basic_block_t& bb); - - void operator()(const Assume&); - void operator()(const Bin&); - void operator()(const Call&); - void operator()(const CallLocal&); - void operator()(const Callx&); - void operator()(const Exit&); - void operator()(const Jmp&) const; - void operator()(const LoadMapFd&); - void operator()(const Atomic&); - void operator()(const Mem&); - void operator()(const Packet&); - void operator()(const Un&); - void operator()(const Undefined&); - void operator()(const IncrementLoopCounter&); - - void operator()(const Assertion&); - - void operator()(const Addable&); - void operator()(const Comparable&); - void operator()(const FuncConstraint&); - void operator()(const ValidDivisor&); - void operator()(const TypeConstraint&); - void operator()(const ValidAccess&); - void operator()(const ValidCall&); - void operator()(const ValidMapKeyValue&); - void operator()(const ValidSize&); - void operator()(const ValidStore&); - void operator()(const ZeroCtxOffset&); - void operator()(const BoundedLoopCount&); - - void initialize_loop_counter(const label_t& label); - static ebpf_domain_t calculate_constant_limits(); + string_invariant to_set() const; private: // private generic domain functions void operator+=(const linear_constraint_t& cst); void operator-=(variable_t var); - void assign(variable_t lhs, variable_t rhs); - void assign(variable_t x, const linear_expression_t& e); - void assign(variable_t x, int64_t e); - - void apply(arith_binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); - void apply(arith_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - void apply(bitwise_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - void apply(bitwise_binop_t op, variable_t x, variable_t y, const number_t& k, int finite_width); - void apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); - void apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); - - void add(const Reg& reg, int imm, int finite_width); - void add(variable_t lhs, variable_t op2); - void add(variable_t lhs, const number_t& op2); - void sub(variable_t lhs, variable_t op2); - void sub(variable_t lhs, const number_t& op2); - void add_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void add_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void sub_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void sub_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void neg(variable_t lhss, variable_t lhsu, int finite_width); - void mul(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void mul(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void sdiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void sdiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void udiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void udiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void srem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void srem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - void urem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void urem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); - - void bitwise_and(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_and(variable_t lhss, variable_t lhsu, const number_t& op2); - void bitwise_or(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_or(variable_t lhss, variable_t lhsu, const number_t& op2); - void bitwise_xor(variable_t lhsss, variable_t lhsu, variable_t op2, int finite_width); - void bitwise_xor(variable_t lhss, variable_t lhsu, const number_t& op2); - void shl(const Reg& reg, int imm, int finite_width); - void shl_overflow(variable_t lhss, variable_t lhsu, variable_t op2); - void shl_overflow(variable_t lhss, variable_t lhsu, const number_t& op2); - void lshr(const Reg& reg, int imm, int finite_width); - void ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width); - void sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, Bin::Op op); - - void assume(const linear_constraint_t& cst); - - /// Forget everything we know about the value of a variable. - void havoc(variable_t v); - - /// Forget everything about all offset variables for a given register. - void havoc_offsets(const Reg& reg); - - static std::optional get_type_offset_variable(const Reg& reg, int type); - [[nodiscard]] - std::optional get_type_offset_variable(const Reg& reg, const NumAbsDomain& inv) const; - [[nodiscard]] - std::optional get_type_offset_variable(const Reg& reg) const; - - void scratch_caller_saved_registers(); - void save_callee_saved_registers(const std::string& prefix); - void restore_callee_saved_registers(const std::string& prefix); - void havoc_subprogram_stack(const std::string& prefix); [[nodiscard]] std::optional get_map_type(const Reg& map_fd_reg) const; [[nodiscard]] @@ -163,42 +88,15 @@ class ebpf_domain_t final { interval_t get_map_value_size(const Reg& map_fd_reg) const; [[nodiscard]] interval_t get_map_max_entries(const Reg& map_fd_reg) const; - void forget_packet_pointers(); - void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); - - void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); - - void require(NumAbsDomain& inv, const linear_constraint_t& cst, const std::string& s) const; - - // memory check / load / store - void check_access_stack(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - int call_stack_depth) const; - void check_access_context(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub) const; - void check_access_packet(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - std::optional packet_size) const; - void check_access_shared(NumAbsDomain& inv, const linear_expression_t& lb, const linear_expression_t& ub, - variable_t shared_region_size) const; - void recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const; - void recompute_stack_numeric_size(NumAbsDomain& inv, variable_t type_variable) const; - void do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width, - const Reg& src_reg); - void do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, int width); - void do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width); - void do_load(const Mem& b, const Reg& target_reg); - - void do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, int width, - const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); - - void do_mem_store(const Mem& b, const linear_expression_t& val_type, const linear_expression_t& val_svalue, - const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); - - friend std::ostream& operator<<(std::ostream& o, const ebpf_domain_t& dom); + static std::optional get_type_offset_variable(const Reg& reg, int type); + [[nodiscard]] + std::optional get_type_offset_variable(const Reg& reg, const NumAbsDomain& inv) const; + [[nodiscard]] + std::optional get_type_offset_variable(const Reg& reg) const; - static void initialize_packet(ebpf_domain_t& inv); + bool get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const; - private: /// Mapping from variables (including registers, types, offsets, /// memory locations, etc.) to numeric intervals or relationships /// to other variables. @@ -209,11 +107,7 @@ class ebpf_domain_t final { /// while dealing with overlapping byte ranges. domains::array_domain_t stack; - std::function check_require{}; - bool get_map_fd_range(const Reg& map_fd_reg, int32_t* start_fd, int32_t* end_fd) const; - TypeDomain type_inv; - std::string current_assertion; -}; // end ebpf_domain_t +}; } // namespace crab diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp new file mode 100644 index 000000000..743427ad0 --- /dev/null +++ b/src/crab/ebpf_transformer.cpp @@ -0,0 +1,2429 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT + +// This file is eBPF-specific, not derived from CRAB. + +#include +#include +#include +#include + +#include "boost/endian/conversion.hpp" + +#include "asm_unmarshal.hpp" +#include "config.hpp" +#include "crab/array_domain.hpp" +#include "crab/ebpf_domain.hpp" +#include "crab_utils/num_safety.hpp" +#include "dsl_syntax.hpp" +#include "platform.hpp" +#include "string_constraints.hpp" + +using crab::domains::NumAbsDomain; +namespace crab { + +class ebpf_transformer final { + ebpf_domain_t& dom; + // shorthands: + NumAbsDomain& m_inv; + domains::array_domain_t& stack; + TypeDomain& type_inv; + + public: + explicit ebpf_transformer(ebpf_domain_t& dom) + : dom(dom), m_inv(dom.m_inv), stack(dom.stack), type_inv(dom.type_inv) {} + + // abstract transformers + void operator()(const Assume&); + void operator()(const Atomic&); + void operator()(const Bin&); + void operator()(const Call&); + void operator()(const CallLocal&); + void operator()(const Callx&); + void operator()(const Exit&); + void operator()(const IncrementLoopCounter&); + void operator()(const Jmp&) const; + void operator()(const LoadMapFd&); + void operator()(const Mem&); + void operator()(const Packet&); + void operator()(const Un&); + void operator()(const Undefined&); + + void initialize_loop_counter(const label_t& label); + + static ebpf_domain_t setup_entry(bool init_r1); + + private: + void assign(variable_t lhs, variable_t rhs); + void assign(variable_t x, const linear_expression_t& e); + void assign(variable_t x, int64_t e); + + void apply(arith_binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); + void apply(arith_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + void apply(bitwise_binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + void apply(bitwise_binop_t op, variable_t x, variable_t y, const number_t& k, int finite_width); + void apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width); + void apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width); + + void add(const Reg& reg, int imm, int finite_width); + void add(variable_t lhs, variable_t op2); + void add(variable_t lhs, const number_t& op2); + void sub(variable_t lhs, variable_t op2); + void sub(variable_t lhs, const number_t& op2); + void add_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void add_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void sub_overflow(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void sub_overflow(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void neg(variable_t lhss, variable_t lhsu, int finite_width); + void mul(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void mul(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void sdiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void sdiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void udiv(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void udiv(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void srem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void srem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + void urem(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void urem(variable_t lhss, variable_t lhsu, const number_t& op2, int finite_width); + + void bitwise_and(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_and(variable_t lhss, variable_t lhsu, const number_t& op2); + void bitwise_or(variable_t lhss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_or(variable_t lhss, variable_t lhsu, const number_t& op2); + void bitwise_xor(variable_t lhsss, variable_t lhsu, variable_t op2, int finite_width); + void bitwise_xor(variable_t lhss, variable_t lhsu, const number_t& op2); + void shl(const Reg& reg, int imm, int finite_width); + void shl_overflow(variable_t lhss, variable_t lhsu, variable_t op2); + void shl_overflow(variable_t lhss, variable_t lhsu, const number_t& op2); + void lshr(const Reg& reg, int imm, int finite_width); + void ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width); + void sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width, Bin::Op op); + + void assume(const linear_constraint_t& cst); + + /// Forget everything we know about the value of a variable. + void havoc(variable_t v); + + /// Forget everything about all offset variables for a given register. + void havoc_offsets(const Reg& reg); + + void scratch_caller_saved_registers(); + void save_callee_saved_registers(const std::string& prefix); + void restore_callee_saved_registers(const std::string& prefix); + void havoc_subprogram_stack(const std::string& prefix); + void forget_packet_pointers(); + void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); + + void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); + + void recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const; + void recompute_stack_numeric_size(NumAbsDomain& inv, variable_t type_variable) const; + void do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width, + const Reg& src_reg); + void do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, int width); + void do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, int width); + void do_load(const Mem& b, const Reg& target_reg); + + void do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, int width, + const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); + + void do_mem_store(const Mem& b, const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, const std::optional& opt_val_reg); +}; // end ebpf_domain_t + +void ebpf_domain_transform(ebpf_domain_t& inv, const Instruction& ins) { std::visit(ebpf_transformer{inv}, ins); } + +/** Linear constraint for a pointer comparison. + */ +static linear_constraint_t assume_cst_offsets_reg(const Condition::Op op, const variable_t dst_offset, + const variable_t src_offset) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + switch (op) { + case Op::EQ: return eq(dst_offset, src_offset); + case Op::NE: return neq(dst_offset, src_offset); + case Op::GE: return dst_offset >= src_offset; + case Op::SGE: return dst_offset >= src_offset; // pointer comparison is unsigned + case Op::LE: return dst_offset <= src_offset; + case Op::SLE: return dst_offset <= src_offset; // pointer comparison is unsigned + case Op::GT: return dst_offset > src_offset; + case Op::SGT: return dst_offset > src_offset; // pointer comparison is unsigned + case Op::SLT: return src_offset > dst_offset; + // Note: reverse the test as a workaround strange lookup: + case Op::LT: return src_offset > dst_offset; // FIX unsigned + default: return dst_offset - dst_offset == 0; + } +} + +static std::vector assume_bit_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, + variable_t dst_uvalue, interval_t src_interval) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + + auto dst_interval = inv.eval_interval(dst_uvalue); + std::optional dst_n = dst_interval.singleton(); + if (!dst_n || !dst_n.value().fits_cast_to()) { + return {}; + } + + std::optional src_n = src_interval.singleton(); + if (!src_n || !src_n->fits_cast_to()) { + return {}; + } + uint64_t src_int_value = src_n.value().cast_to(); + if (!is64) { + src_int_value = gsl::narrow_cast(src_int_value); + } + + bool result; + switch (op) { + case Op::SET: result = (dst_n.value().cast_to() & src_int_value) != 0; break; + case Op::NSET: result = (dst_n.value().cast_to() & src_int_value) == 0; break; + default: throw std::exception(); + } + + return {result ? linear_constraint_t::true_const() : linear_constraint_t::false_const()}; +} + +static std::vector assume_signed_64bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, + const variable_t left_uvalue, + const interval_t& right_interval, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using namespace crab::dsl_syntax; + if (right_interval <= interval_t::nonnegative(64) && !right_interval.is_singleton()) { + return {(left_svalue == right_svalue), (left_uvalue == right_uvalue), eq(left_svalue, left_uvalue)}; + } else { + return {(left_svalue == right_svalue), (left_uvalue == right_uvalue)}; + } +} + +static std::vector assume_signed_32bit_eq(const NumAbsDomain& inv, const variable_t left_svalue, + const variable_t left_uvalue, + const interval_t& right_interval) { + using namespace crab::dsl_syntax; + + if (const auto rn = right_interval.singleton()) { + const auto left_svalue_interval = inv.eval_interval(left_svalue); + if (auto size = left_svalue_interval.finite_size()) { + // Find the lowest 64-bit svalue whose low 32 bits match the singleton. + + // Get lower bound as a 64-bit value. + int64_t lb = left_svalue_interval.lb().number()->cast_to(); + + // Use the high 32-bits from the left lower bound and the low 32-bits from the right singleton. + // The result might be lower than the lower bound. + const int64_t lb_match = (lb & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); + if (lb_match < lb) { + // The result is lower than the left interval, so try the next higher matching 64-bit value. + // It's ok if this goes higher than the left upper bound. + lb += 0x100000000; + } + + // Find the highest 64-bit svalue whose low 32 bits match the singleton. + + // Get upper bound as a 64-bit value. + const int64_t ub = left_svalue_interval.ub().number()->cast_to(); + + // Use the high 32-bits from the left upper bound and the low 32-bits from the right singleton. + // The result might be higher than the upper bound. + const int64_t ub_match = (ub & 0xFFFFFFFF00000000) | (rn->cast_to() & 0xFFFFFFFF); + if (ub_match > ub) { + // The result is higher than the left interval, so try the next lower matching 64-bit value. + // It's ok if this goes lower than the left lower bound. + lb -= 0x100000000; + } + + if (to_unsigned(lb_match) <= to_unsigned(ub_match)) { + // The interval is also valid when cast to a uvalue, meaning + // both bounds are positive or both are negative. + return {left_svalue >= lb_match, left_svalue <= ub_match, left_uvalue >= to_unsigned(lb_match), + left_uvalue <= to_unsigned(ub_match)}; + } else { + // The interval can only be represented as an svalue. + return {left_svalue >= lb_match, left_svalue <= ub_match}; + } + } + } + return {}; +} + +// Given left and right values, get the left and right intervals, and also split +// the left interval into separate negative and positive intervals. +static void get_signed_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, + const variable_t left_uvalue, const linear_expression_t& right_svalue, + interval_t& left_interval, interval_t& right_interval, + interval_t& left_interval_positive, interval_t& left_interval_negative) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + // Get intervals as 32-bit or 64-bit as appropriate. + left_interval = inv.eval_interval(left_svalue); + right_interval = inv.eval_interval(right_svalue); + if (!is64) { + if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || + (left_interval <= interval_t::negative(32) && right_interval <= interval_t::negative(32))) { + is64 = true; + // fallthrough as 64bit, including deduction of relational information + } else { + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); + // continue as 32bit + } + } + + if (!left_interval.is_top()) { + left_interval_positive = left_interval & interval_t::nonnegative(64); + left_interval_negative = left_interval & interval_t::negative(64); + } else { + left_interval = inv.eval_interval(left_uvalue); + if (!left_interval.is_top()) { + // The interval is TOP as a signed interval but is represented precisely as an unsigned interval, + // so split into two signed intervals that can be treated separately. + left_interval_positive = left_interval & interval_t::nonnegative(64); + const number_t lih_ub = + left_interval.ub().number() ? left_interval.ub().number()->truncate_to() : -1; + left_interval_negative = interval_t{std::numeric_limits::min(), lih_ub}; + } else { + left_interval_positive = interval_t::nonnegative(64); + left_interval_negative = interval_t::negative(64); + } + } + + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); +} + +// Given left and right values, get the left and right intervals, and also split +// the left interval into separate low and high intervals. +static void get_unsigned_intervals(const NumAbsDomain& inv, bool is64, const variable_t left_svalue, + const variable_t left_uvalue, const linear_expression_t& right_uvalue, + interval_t& left_interval, interval_t& right_interval, interval_t& left_interval_low, + interval_t& left_interval_high) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + // Get intervals as 32-bit or 64-bit as appropriate. + left_interval = inv.eval_interval(left_uvalue); + right_interval = inv.eval_interval(right_uvalue); + if (!is64) { + if ((left_interval <= interval_t::nonnegative(32) && right_interval <= interval_t::nonnegative(32)) || + (left_interval <= interval_t::unsigned_high(32) && right_interval <= interval_t::unsigned_high(32))) { + is64 = true; + // fallthrough as 64bit, including deduction of relational information + } else { + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); + // continue as 32bit + } + } + + if (!left_interval.is_top()) { + left_interval_low = left_interval & interval_t::nonnegative(64); + left_interval_high = left_interval & interval_t::unsigned_high(64); + } else { + left_interval = inv.eval_interval(left_svalue); + if (!left_interval.is_top()) { + // The interval is TOP as an unsigned interval but is represented precisely as a signed interval, + // so split into two unsigned intervals that can be treated separately. + left_interval_low = interval_t(0, left_interval.ub()).truncate_to(); + left_interval_high = interval_t(left_interval.lb(), -1).truncate_to(); + } else { + left_interval_low = interval_t::nonnegative(64); + left_interval_high = interval_t::unsigned_high(64); + } + } + + left_interval = left_interval.truncate_to(); + right_interval = right_interval.truncate_to(); +} + +static std::vector +assume_signed_64bit_lt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, + const interval_t& left_interval_positive, const interval_t& left_interval_negative, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::negative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1]. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(64) && + right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue, 0 <= left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // Interval can only be represented as an svalue. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } +} + +static std::vector +assume_signed_32bit_lt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_positive, + const interval_t& left_interval_negative, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {std::numeric_limits::max() < left_uvalue, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::nonnegative(32) && + right_interval <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX] + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && + inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { + // Interval can only be represented as an svalue. + return {strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else { + // We can't directly compare the svalues since they may differ in high order bits. + return {}; + } +} + +static std::vector +assume_signed_64bit_gt(const bool strict, const variable_t left_svalue, const variable_t left_uvalue, + const interval_t& left_interval_positive, const interval_t& left_interval_negative, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(64) && + right_interval <= interval_t::negative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {std::numeric_limits::max() < left_uvalue, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // Interval can only be represented as an svalue. + return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } +} + +static std::vector +assume_signed_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_positive, + const interval_t& left_interval_negative, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + const auto lpub = left_interval_positive.truncate_to().ub(); + return {left_svalue >= 0, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue, + left_svalue <= left_uvalue, + left_svalue >= left_uvalue, + left_uvalue >= 0, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + left_uvalue <= *lpub.number()}; + } else if ((left_interval_negative | left_interval_positive) <= interval_t::negative(32) && + right_interval <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MIN, -1], + // aka [INT_MAX+1, UINT_MAX]. + return {left_uvalue >= number_t{std::numeric_limits::max()} + 1, + strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if (inv.eval_interval(left_svalue) <= interval_t::signed_int(32) && + inv.eval_interval(right_svalue) <= interval_t::signed_int(32)) { + // Interval can only be represented as an svalue. + return {strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // We can't directly compare the svalues since they may differ in high order bits. + return {}; + } +} + +static std::vector assume_signed_cst_interval(const NumAbsDomain& inv, Condition::Op op, bool is64, + variable_t left_svalue, variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_positive = interval_t::bottom(); + interval_t left_interval_negative = interval_t::bottom(); + get_signed_intervals(inv, is64, left_svalue, left_uvalue, right_svalue, left_interval, right_interval, + left_interval_positive, left_interval_negative); + + if (op == Condition::Op::EQ) { + // Handle svalue == right. + if (is64) { + return assume_signed_64bit_eq(inv, left_svalue, left_uvalue, right_interval, right_svalue, right_uvalue); + } else { + return assume_signed_32bit_eq(inv, left_svalue, left_uvalue, right_interval); + } + } + + const bool is_lt = op == Condition::Op::SLT || op == Condition::Op::SLE; + bool strict = op == Condition::Op::SLT || op == Condition::Op::SGT; + + auto llb = left_interval.lb(); + auto lub = left_interval.ub(); + auto rlb = right_interval.lb(); + auto rub = right_interval.ub(); + if (!is_lt && (strict ? lub <= rlb : lub < rlb)) { + // Left signed interval is lower than right signed interval. + return {linear_constraint_t::false_const()}; + } else if (is_lt && (strict ? llb >= rub : llb > rub)) { + // Left signed interval is higher than right signed interval. + return {linear_constraint_t::false_const()}; + } + if (is_lt && (strict ? lub < rlb : lub <= rlb)) { + // Left signed interval is lower than right signed interval. + return {linear_constraint_t::true_const()}; + } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { + // Left signed interval is higher than right signed interval. + return {linear_constraint_t::true_const()}; + } + + if (is64) { + if (is_lt) { + return assume_signed_64bit_lt(strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } else { + return assume_signed_64bit_gt(strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } + } else { + // 32-bit compare. + if (is_lt) { + return assume_signed_32bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } else { + return assume_signed_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_positive, + left_interval_negative, right_svalue, right_uvalue, right_interval); + } + } + return {}; +} + +static std::vector +assume_unsigned_64bit_lt(const NumAbsDomain& inv, bool strict, variable_t left_svalue, variable_t left_uvalue, + const interval_t& left_interval_low, const interval_t& left_interval_high, + const linear_expression_t& right_svalue, const linear_expression_t& right_uvalue, + const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + auto rub = right_interval.ub(); + auto lllb = left_interval_low.truncate_to().lb(); + if (right_interval <= interval_t::nonnegative(64) && (strict ? lllb >= rub : lllb > rub)) { + // The high interval is out of range. + if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { + return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), + left_uvalue <= *lsubn, left_svalue >= 0}; + } else { + return {left_uvalue >= 0, (strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue), + left_svalue >= 0}; + } + } + auto lhlb = left_interval_high.truncate_to().lb(); + if (right_interval <= interval_t::unsigned_high(64) && (strict ? lhlb >= rub : lhlb > rub)) { + // The high interval is out of range. + if (auto lsubn = inv.eval_interval(left_svalue).ub().number()) { + return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), + left_uvalue <= *lsubn, left_svalue >= 0}; + } else { + return {left_uvalue >= 0, (strict ? left_uvalue < *rub.number() : left_uvalue <= *rub.number()), + left_svalue >= 0}; + } + } + if (right_interval <= interval_t::signed_int(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + auto llub = left_interval_low.truncate_to().ub(); + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + left_uvalue <= *llub.number(), 0 <= left_svalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (left_interval_low.is_bottom() && right_interval <= interval_t::unsigned_high(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if ((left_interval_low | left_interval_high) == interval_t::unsigned_int(64)) { + // Interval can only be represented as a uvalue, and was TOP before. + return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } +} + +static std::vector assume_unsigned_32bit_lt(const NumAbsDomain& inv, const bool strict, + const variable_t left_svalue, + const variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (inv.eval_interval(left_uvalue) <= interval_t::nonnegative(32) && + inv.eval_interval(right_uvalue) <= interval_t::nonnegative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT32_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (inv.eval_interval(left_svalue) <= interval_t::negative(32) && + inv.eval_interval(right_svalue) <= interval_t::negative(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT32_MIN, -1]. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue, + strict ? left_svalue < right_svalue : left_svalue <= right_svalue}; + } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && + inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } else { + // We can't directly compare the uvalues since they may differ in high order bits. + return {}; + } +} + +static std::vector +assume_unsigned_64bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_low, + const interval_t& left_interval_high, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + const auto rlb = right_interval.lb(); + const auto llub = left_interval_low.truncate_to().ub(); + const auto lhlb = left_interval_high.truncate_to().lb(); + + if (right_interval <= interval_t::nonnegative(64) && (strict ? llub <= rlb : llub < rlb)) { + // The low interval is out of range. + return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + *lhlb.number() == std::numeric_limits::max() ? left_uvalue == *lhlb.number() + : left_uvalue >= *lhlb.number(), + left_svalue < 0}; + } else if (right_interval <= interval_t::unsigned_high(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if ((left_interval_low | left_interval_high) <= interval_t::nonnegative(64) && + right_interval <= interval_t::nonnegative(64)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [0, INT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } +} + +static std::vector +assume_unsigned_32bit_gt(const NumAbsDomain& inv, const bool strict, const variable_t left_svalue, + const variable_t left_uvalue, const interval_t& left_interval_low, + const interval_t& left_interval_high, const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue, const interval_t& right_interval) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + if (right_interval <= interval_t::unsigned_high(32)) { + // Interval can be represented as both an svalue and a uvalue since it fits in [INT_MAX+1, UINT_MAX]. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue, + strict ? left_svalue > right_svalue : left_svalue >= right_svalue}; + } else if (inv.eval_interval(left_uvalue) <= interval_t::unsigned_int(32) && + inv.eval_interval(right_uvalue) <= interval_t::unsigned_int(32)) { + // Interval can only be represented as a uvalue. + return {0 <= left_uvalue, strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } else { + // We can't directly compare the uvalues since they may differ in high order bits. + return {}; + }; +} + +static std::vector assume_unsigned_cst_interval(const NumAbsDomain& inv, Condition::Op op, + bool is64, variable_t left_svalue, + variable_t left_uvalue, + const linear_expression_t& right_svalue, + const linear_expression_t& right_uvalue) { + using crab::interval_t; + using namespace crab::dsl_syntax; + + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_low = interval_t::bottom(); + interval_t left_interval_high = interval_t::bottom(); + get_unsigned_intervals(inv, is64, left_svalue, left_uvalue, right_uvalue, left_interval, right_interval, + left_interval_low, left_interval_high); + + // Handle uvalue != right. + if (op == Condition::Op::NE) { + if (auto rn = right_interval.singleton()) { + if (rn == left_interval.truncate_to_uint(is64 ? 64 : 32).lb().number()) { + // "NE lower bound" is equivalent to "GT lower bound". + op = Condition::Op::GT; + right_interval = interval_t{left_interval.lb()}; + } else if (rn == left_interval.ub().number()) { + // "NE upper bound" is equivalent to "LT upper bound". + op = Condition::Op::LT; + right_interval = interval_t{left_interval.ub()}; + } else { + return {}; + } + } else { + return {}; + } + } + + const bool is_lt = op == Condition::Op::LT || op == Condition::Op::LE; + bool strict = op == Condition::Op::LT || op == Condition::Op::GT; + + auto [llb, lub] = left_interval.pair(); + auto [rlb, rub] = right_interval.pair(); + if (is_lt ? (strict ? llb >= rub : llb > rub) : (strict ? lub <= rlb : lub < rlb)) { + // Left unsigned interval is lower than right unsigned interval. + return {linear_constraint_t::false_const()}; + } + if (is_lt && (strict ? lub < rlb : lub <= rlb)) { + // Left unsigned interval is lower than right unsigned interval. We still add a + // relationship for use when widening, such as is used in the prime conformance test. + if (is64) { + return {strict ? left_uvalue < right_uvalue : left_uvalue <= right_uvalue}; + } + return {}; + } else if (!is_lt && (strict ? llb > rub : llb >= rub)) { + // Left unsigned interval is higher than right unsigned interval. We still add a + // relationship for use when widening, such as is used in the prime conformance test. + if (is64) { + return {strict ? left_uvalue > right_uvalue : left_uvalue >= right_uvalue}; + } else { + return {}; + } + } + + if (is64) { + if (is_lt) { + return assume_unsigned_64bit_lt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } else { + return assume_unsigned_64bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } + } else { + if (is_lt) { + return assume_unsigned_32bit_lt(inv, strict, left_svalue, left_uvalue, right_svalue, right_uvalue); + } else { + return assume_unsigned_32bit_gt(inv, strict, left_svalue, left_uvalue, left_interval_low, + left_interval_high, right_svalue, right_uvalue, right_interval); + } + } +} + +/** Linear constraints for a comparison with a constant. + */ +static std::vector assume_cst_imm(const NumAbsDomain& inv, const Condition::Op op, const bool is64, + const variable_t dst_svalue, const variable_t dst_uvalue, + const int64_t imm) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + switch (op) { + case Op::EQ: + case Op::SGE: + case Op::SLE: + case Op::SGT: + case Op::SLT: + return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, gsl::narrow_cast(imm)); + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, interval_t{imm}); + case Op::NE: + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: + return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, imm, + gsl::narrow_cast(imm)); + } + return {}; +} + +/** Linear constraint for a numerical comparison between registers. + */ +static std::vector assume_cst_reg(const NumAbsDomain& inv, const Condition::Op op, const bool is64, + const variable_t dst_svalue, const variable_t dst_uvalue, + const variable_t src_svalue, const variable_t src_uvalue) { + using namespace crab::dsl_syntax; + using Op = Condition::Op; + if (is64) { + switch (op) { + case Op::EQ: { + const interval_t src_interval = inv.eval_interval(src_svalue); + if (!src_interval.is_singleton() && src_interval <= interval_t::nonnegative(64)) { + return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue), eq(dst_svalue, dst_uvalue)}; + } else { + return {eq(dst_svalue, src_svalue), eq(dst_uvalue, src_uvalue)}; + } + } + case Op::NE: return {neq(dst_svalue, src_svalue)}; + case Op::SGE: return {dst_svalue >= src_svalue}; + case Op::SLE: return {dst_svalue <= src_svalue}; + case Op::SGT: return {dst_svalue > src_svalue}; + // Note: reverse the test as a workaround strange lookup: + case Op::SLT: return {src_svalue > dst_svalue}; + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + } + } else { + switch (op) { + case Op::EQ: + case Op::SGE: + case Op::SLE: + case Op::SGT: + case Op::SLT: return assume_signed_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + case Op::SET: + case Op::NSET: return assume_bit_cst_interval(inv, op, is64, dst_uvalue, inv.eval_interval(src_uvalue)); + case Op::NE: + case Op::GE: + case Op::LE: + case Op::GT: + case Op::LT: return assume_unsigned_cst_interval(inv, op, is64, dst_svalue, dst_uvalue, src_svalue, src_uvalue); + } + } + assert(false); + throw std::exception(); +} + +void ebpf_transformer::assign(const variable_t x, const linear_expression_t& e) { m_inv.assign(x, e); } +void ebpf_transformer::assign(const variable_t x, const int64_t e) { m_inv.set(x, interval_t(e)); } + +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const number_t& z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const arith_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const variable_t z, + const int finite_width) { + m_inv.apply(op, x, y, z, finite_width); +} + +void ebpf_transformer::apply(const bitwise_binop_t op, const variable_t x, const variable_t y, const number_t& k, + const int finite_width) { + m_inv.apply(op, x, y, k, finite_width); +} + +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, const number_t& z, int finite_width) { + std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); +} + +void ebpf_transformer::apply(binop_t op, variable_t x, variable_t y, variable_t z, int finite_width) { + std::visit([&](auto top) { apply(top, x, y, z, finite_width); }, op); +} + +static void havoc_offsets(NumAbsDomain& inv, const Reg& reg) { + const reg_pack_t r = reg_pack(reg); + inv -= r.ctx_offset; + inv -= r.map_fd; + inv -= r.packet_offset; + inv -= r.shared_offset; + inv -= r.shared_region_size; + inv -= r.stack_offset; + inv -= r.stack_numeric_size; +} +static void havoc_register(NumAbsDomain& inv, const Reg& reg) { + const reg_pack_t r = reg_pack(reg); + havoc_offsets(inv, reg); + inv -= r.svalue; + inv -= r.uvalue; +} + +void ebpf_transformer::scratch_caller_saved_registers() { + for (int i = R1_ARG; i <= R5_ARG; i++) { + Reg r{gsl::narrow(i)}; + havoc_register(m_inv, r); + type_inv.havoc_type(m_inv, r); + } +} + +void ebpf_transformer::save_callee_saved_registers(const std::string& prefix) { + // Create variables specific to the new call stack frame that store + // copies of the states of r6 through r9. + for (int r = R6; r <= R9; r++) { + for (const data_kind_t kind : iterate_kinds()) { + const variable_t src_var = variable_t::reg(kind, r); + if (!m_inv[src_var].is_top()) { + assign(variable_t::stack_frame_var(kind, r, prefix), src_var); + } + } + } +} + +void ebpf_transformer::restore_callee_saved_registers(const std::string& prefix) { + for (int r = R6; r <= R9; r++) { + for (const data_kind_t kind : iterate_kinds()) { + const variable_t src_var = variable_t::stack_frame_var(kind, r, prefix); + if (!m_inv[src_var].is_top()) { + assign(variable_t::reg(kind, r), src_var); + } else { + havoc(variable_t::reg(kind, r)); + } + havoc(src_var); + } + } +} + +void ebpf_transformer::havoc_subprogram_stack(const std::string& prefix) { + const variable_t r10_stack_offset = reg_pack(R10_STACK_POINTER).stack_offset; + const auto intv = m_inv.eval_interval(r10_stack_offset); + if (!intv.is_singleton()) { + return; + } + const int64_t stack_start = intv.singleton()->cast_to() - EBPF_SUBPROGRAM_STACK_SIZE; + for (const data_kind_t kind : iterate_kinds()) { + stack.havoc(m_inv, kind, stack_start, EBPF_SUBPROGRAM_STACK_SIZE); + } +} + +void ebpf_transformer::forget_packet_pointers() { + using namespace crab::dsl_syntax; + + for (const variable_t type_variable : variable_t::get_type_variables()) { + if (type_inv.has_type(m_inv, type_variable, T_PACKET)) { + havoc(variable_t::kind_var(data_kind_t::types, type_variable)); + havoc(variable_t::kind_var(data_kind_t::packet_offsets, type_variable)); + havoc(variable_t::kind_var(data_kind_t::svalues, type_variable)); + havoc(variable_t::kind_var(data_kind_t::uvalues, type_variable)); + } + } + + dom.initialize_packet(); +} + +static void overflow_bounds(NumAbsDomain& inv, variable_t lhs, number_t span, int finite_width, bool issigned) { + using namespace crab::dsl_syntax; + auto interval = inv[lhs]; + if (interval.ub() - interval.lb() >= span) { + // Interval covers the full space. + inv -= lhs; + return; + } + if (interval.is_bottom()) { + inv -= lhs; + return; + } + number_t lb_value = interval.lb().number().value(); + number_t ub_value = interval.ub().number().value(); + + // Compute the interval, taking overflow into account. + // For a signed result, we need to ensure the signed and unsigned results match + // so for a 32-bit operation, 0x80000000 should be a positive 64-bit number not + // a sign extended negative one. + number_t lb = lb_value.truncate_to_uint(finite_width); + number_t ub = ub_value.truncate_to_uint(finite_width); + if (issigned) { + lb = lb.truncate_to(); + ub = ub.truncate_to(); + } + if (lb > ub) { + // Range wraps in the middle, so we cannot represent as an unsigned interval. + inv -= lhs; + return; + } + auto new_interval = interval_t{lb, ub}; + if (new_interval != interval) { + // Update the variable, which will lose any relationships to other variables. + inv.set(lhs, new_interval); + } +} + +static void overflow_signed(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { + const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} + : finite_width == 32 ? number_t{std::numeric_limits::max()} + : throw std::exception()}; + overflow_bounds(inv, lhs, span, finite_width, true); +} + +static void overflow_unsigned(NumAbsDomain& inv, const variable_t lhs, const int finite_width) { + const auto span{finite_width == 64 ? number_t{std::numeric_limits::max()} + : finite_width == 32 ? number_t{std::numeric_limits::max()} + : throw std::exception()}; + overflow_bounds(inv, lhs, span, finite_width, false); +} +static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const number_t& z, const int finite_width) { + inv.apply(op, xs, y, z, finite_width); + if (finite_width) { + inv.assign(xu, xs); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const number_t& z, const int finite_width) { + inv.apply(op, xu, y, z, finite_width); + if (finite_width) { + inv.assign(xs, xu); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_signed(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const variable_t z, const int finite_width) { + inv.apply(op, xs, y, z, finite_width); + if (finite_width) { + inv.assign(xu, xs); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +static void apply_unsigned(NumAbsDomain& inv, const binop_t& op, const variable_t xs, const variable_t xu, + const variable_t y, const variable_t z, const int finite_width) { + inv.apply(op, xu, y, z, finite_width); + if (finite_width) { + inv.assign(xs, xu); + overflow_signed(inv, xs, finite_width); + overflow_unsigned(inv, xu, finite_width); + } +} + +void ebpf_transformer::add(const variable_t lhs, const variable_t op2) { + apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::add(const variable_t lhs, const number_t& op2) { + apply_signed(m_inv, arith_binop_t::ADD, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::sub(const variable_t lhs, const variable_t op2) { + apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); +} +void ebpf_transformer::sub(const variable_t lhs, const number_t& op2) { + apply_signed(m_inv, arith_binop_t::SUB, lhs, lhs, lhs, op2, 0); +} + +// Add/subtract with overflow are both signed and unsigned. We can use either one of the two to compute the +// result before adjusting for overflow, though if one is top we want to use the other to retain precision. +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::add_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::ADD, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} +void ebpf_transformer::sub_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SUB, lhss, lhsu, !m_inv.eval_interval(lhss).is_top() ? lhss : lhsu, op2, + finite_width); +} + +void ebpf_transformer::neg(const variable_t lhss, const variable_t lhsu, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, -1, finite_width); +} +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const variable_t op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::mul(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::MUL, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::sdiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::SDIV, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::udiv(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UDIV, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::srem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_signed(m_inv, arith_binop_t::SREM, lhss, lhsu, lhss, op2, finite_width); +} +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::urem(const variable_t lhss, const variable_t lhsu, const number_t& op2, const int finite_width) { + apply_unsigned(m_inv, arith_binop_t::UREM, lhss, lhsu, lhsu, op2, finite_width); +} + +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_and(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + // Use finite width 64 to make the svalue be set as well as the uvalue. + apply_unsigned(m_inv, bitwise_binop_t::AND, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_or(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::OR, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const variable_t op2, + const int finite_width) { + apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, finite_width); +} +void ebpf_transformer::bitwise_xor(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::XOR, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const variable_t op2) { + apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); +} +void ebpf_transformer::shl_overflow(const variable_t lhss, const variable_t lhsu, const number_t& op2) { + apply_unsigned(m_inv, bitwise_binop_t::SHL, lhss, lhsu, lhsu, op2, 64); +} + +static void assume(NumAbsDomain& inv, const linear_constraint_t& cst) { inv += cst; } +void ebpf_transformer::assume(const linear_constraint_t& cst) { crab::assume(m_inv, cst); } + +/// Forget everything we know about the value of a variable. +void ebpf_transformer::havoc(const variable_t v) { m_inv -= v; } +void ebpf_transformer::havoc_offsets(const Reg& reg) { crab::havoc_offsets(m_inv, reg); } + +void ebpf_transformer::assign(const variable_t lhs, const variable_t rhs) { m_inv.assign(lhs, rhs); } + +static linear_constraint_t type_is_pointer(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type >= T_CTX; +} + +static linear_constraint_t type_is_number(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type == T_NUM; +} + +static linear_constraint_t type_is_number(const Reg& r) { return type_is_number(reg_pack(r)); } + +static linear_constraint_t type_is_not_stack(const reg_pack_t& r) { + using namespace crab::dsl_syntax; + return r.type != T_STACK; +} + +void ebpf_transformer::operator()(const Assume& s) { + const Condition cond = s.cond; + const auto dst = reg_pack(cond.left); + if (const auto psrc_reg = std::get_if(&cond.right)) { + const auto src_reg = *psrc_reg; + const auto src = reg_pack(src_reg); + if (type_inv.same_type(m_inv, cond.left, std::get(cond.right))) { + m_inv = type_inv.join_over_types(m_inv, cond.left, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_NUM) { + for (const linear_constraint_t& cst : + assume_cst_reg(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, src.svalue, src.uvalue)) { + inv += cst; + } + } else { + // Either pointers to a singleton region, + // or an equality comparison on map descriptors/pointers to non-singleton locations + if (const auto dst_offset = dom.get_type_offset_variable(cond.left, type)) { + if (const auto src_offset = dom.get_type_offset_variable(src_reg, type)) { + inv += assume_cst_offsets_reg(cond.op, dst_offset.value(), src_offset.value()); + } + } + } + }); + } else { + // We should only reach here if `--assume-assert` is off + assert(!thread_local_options.assume_assertions || dom.is_bottom()); + // be sound in any case, it happens to flush out bugs: + m_inv.set_to_top(); + } + } else { + const int64_t imm = gsl::narrow_cast(std::get(cond.right).v); + for (const linear_constraint_t& cst : assume_cst_imm(m_inv, cond.op, cond.is64, dst.svalue, dst.uvalue, imm)) { + assume(cst); + } + } +} + +void ebpf_transformer::operator()(const Undefined& a) {} + +// Simple truncation function usable with swap_endianness(). +template +constexpr T truncate(T x) noexcept { + return x; +} + +void ebpf_transformer::operator()(const Un& stmt) { + const auto dst = reg_pack(stmt.dst); + auto swap_endianness = [&](const variable_t v, auto be_or_le) { + if (m_inv.entail(type_is_number(stmt.dst))) { + if (const auto n = m_inv.eval_interval(v).singleton()) { + if (n->fits_cast_to()) { + m_inv.set(v, interval_t{be_or_le(n->cast_to())}); + return; + } + } + } + havoc(v); + havoc_offsets(stmt.dst); + }; + // Swap bytes if needed. For 64-bit types we need the weights to fit in a + // signed int64, but for smaller types we don't want sign extension, + // so we use unsigned which still fits in a signed int64. + switch (stmt.op) { + case Un::Op::BE16: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::BE32: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::BE64: + if (!thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } + break; + case Un::Op::LE16: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::LE32: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } else { + swap_endianness(dst.svalue, truncate); + swap_endianness(dst.uvalue, truncate); + } + break; + case Un::Op::LE64: + if (thread_local_options.big_endian) { + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + } + break; + case Un::Op::SWAP16: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::SWAP32: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::SWAP64: + swap_endianness(dst.svalue, boost::endian::endian_reverse); + swap_endianness(dst.uvalue, boost::endian::endian_reverse); + break; + case Un::Op::NEG: + neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); + havoc_offsets(stmt.dst); + break; + } +} + +void ebpf_transformer::operator()(const Exit& a) { + // Clean up any state for the current stack frame. + const std::string prefix = a.stack_frame_prefix; + if (prefix.empty()) { + return; + } + havoc_subprogram_stack(prefix); + restore_callee_saved_registers(prefix); + + // Restore r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, EBPF_SUBPROGRAM_STACK_SIZE, 64); +} + +void ebpf_transformer::operator()(const Jmp&) const { + // This is a NOP. It only exists to hold the jump preconditions. +} + +void ebpf_transformer::operator()(const Packet& a) { + const auto reg = reg_pack(R0_RETURN_VALUE); + constexpr Reg r0_reg{R0_RETURN_VALUE}; + type_inv.assign_type(m_inv, r0_reg, T_NUM); + havoc_offsets(r0_reg); + havoc(reg.svalue); + havoc(reg.uvalue); + scratch_caller_saved_registers(); +} + +void ebpf_transformer::do_load_stack(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr, + const int width, const Reg& src_reg) { + type_inv.assign_type(inv, target_reg, stack.load(inv, data_kind_t::types, addr, width)); + using namespace crab::dsl_syntax; + if (inv.entail(width <= reg_pack(src_reg).stack_numeric_size)) { + type_inv.assign_type(inv, target_reg, T_NUM); + } + + const reg_pack_t& target = reg_pack(target_reg); + if (width == 1 || width == 2 || width == 4 || width == 8) { + // Use the addr before we havoc the destination register since we might be getting the + // addr from that same register. + const std::optional sresult = stack.load(inv, data_kind_t::svalues, addr, width); + const std::optional uresult = stack.load(inv, data_kind_t::uvalues, addr, width); + havoc_register(inv, target_reg); + inv.assign(target.svalue, sresult); + inv.assign(target.uvalue, uresult); + + if (type_inv.has_type(inv, target.type, T_CTX)) { + inv.assign(target.ctx_offset, stack.load(inv, data_kind_t::ctx_offsets, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_MAP) || type_inv.has_type(inv, target.type, T_MAP_PROGRAMS)) { + inv.assign(target.map_fd, stack.load(inv, data_kind_t::map_fds, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_PACKET)) { + inv.assign(target.packet_offset, stack.load(inv, data_kind_t::packet_offsets, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_SHARED)) { + inv.assign(target.shared_offset, stack.load(inv, data_kind_t::shared_offsets, addr, width)); + inv.assign(target.shared_region_size, stack.load(inv, data_kind_t::shared_region_sizes, addr, width)); + } + if (type_inv.has_type(inv, target.type, T_STACK)) { + inv.assign(target.stack_offset, stack.load(inv, data_kind_t::stack_offsets, addr, width)); + inv.assign(target.stack_numeric_size, stack.load(inv, data_kind_t::stack_numeric_sizes, addr, width)); + } + } else { + havoc_register(inv, target_reg); + } +} + +void ebpf_transformer::do_load_ctx(NumAbsDomain& inv, const Reg& target_reg, const linear_expression_t& addr_vague, + const int width) { + using namespace crab::dsl_syntax; + if (inv.is_bottom()) { + return; + } + + const ebpf_context_descriptor_t* desc = global_program_info->type.context_descriptor; + + const reg_pack_t& target = reg_pack(target_reg); + + if (desc->end < 0) { + havoc_register(inv, target_reg); + type_inv.assign_type(inv, target_reg, T_NUM); + return; + } + + const interval_t interval = inv.eval_interval(addr_vague); + const std::optional maybe_addr = interval.singleton(); + havoc_register(inv, target_reg); + + const bool may_touch_ptr = + interval.contains(desc->data) || interval.contains(desc->meta) || interval.contains(desc->end); + + if (!maybe_addr) { + if (may_touch_ptr) { + type_inv.havoc_type(inv, target_reg); + } else { + type_inv.assign_type(inv, target_reg, T_NUM); + } + return; + } + + const number_t addr = *maybe_addr; + + // We use offsets for packet data, data_end, and meta during verification, + // but at runtime they will be 64-bit pointers. We can use the offset values + // for verification like we use map_fd's as a proxy for maps which + // at runtime are actually 64-bit memory pointers. + const int offset_width = desc->end - desc->data; + if (addr == desc->data) { + if (width == offset_width) { + inv.assign(target.packet_offset, 0); + } + } else if (addr == desc->end) { + if (width == offset_width) { + inv.assign(target.packet_offset, variable_t::packet_size()); + } + } else if (addr == desc->meta) { + if (width == offset_width) { + inv.assign(target.packet_offset, variable_t::meta_offset()); + } + } else { + if (may_touch_ptr) { + type_inv.havoc_type(inv, target_reg); + } else { + type_inv.assign_type(inv, target_reg, T_NUM); + } + return; + } + if (width == offset_width) { + type_inv.assign_type(inv, target_reg, T_PACKET); + inv += 4098 <= target.svalue; + inv += target.svalue <= PTR_MAX; + } +} + +void ebpf_transformer::do_load_packet_or_shared(NumAbsDomain& inv, const Reg& target_reg, + const linear_expression_t& addr, const int width) { + if (inv.is_bottom()) { + return; + } + const reg_pack_t& target = reg_pack(target_reg); + + type_inv.assign_type(inv, target_reg, T_NUM); + havoc_register(inv, target_reg); + + // A 1 or 2 byte copy results in a limited range of values that may be used as array indices. + if (width == 1) { + const interval_t full = interval_t::full(); + inv.set(target.svalue, full); + inv.set(target.uvalue, full); + } else if (width == 2) { + const interval_t full = interval_t::full(); + inv.set(target.svalue, full); + inv.set(target.uvalue, full); + } +} + +void ebpf_transformer::do_load(const Mem& b, const Reg& target_reg) { + using namespace crab::dsl_syntax; + + const auto mem_reg = reg_pack(b.access.basereg); + const int width = b.access.width; + const int offset = b.access.offset; + + if (b.access.basereg.v == R10_STACK_POINTER) { + const linear_expression_t addr = mem_reg.stack_offset + offset; + do_load_stack(m_inv, target_reg, addr, width, b.access.basereg); + return; + } + + m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, type_encoding_t type) { + switch (type) { + case T_UNINIT: return; + case T_MAP: return; + case T_MAP_PROGRAMS: return; + case T_NUM: return; + case T_CTX: { + linear_expression_t addr = mem_reg.ctx_offset + offset; + do_load_ctx(inv, target_reg, addr, width); + break; + } + case T_STACK: { + linear_expression_t addr = mem_reg.stack_offset + offset; + do_load_stack(inv, target_reg, addr, width, b.access.basereg); + break; + } + case T_PACKET: { + linear_expression_t addr = mem_reg.packet_offset + offset; + do_load_packet_or_shared(inv, target_reg, addr, width); + break; + } + default: { + linear_expression_t addr = mem_reg.shared_offset + offset; + do_load_packet_or_shared(inv, target_reg, addr, width); + break; + } + } + }); +} + +void ebpf_transformer::do_store_stack(NumAbsDomain& inv, const linear_expression_t& addr, const int width, + const linear_expression_t& val_type, const linear_expression_t& val_svalue, + const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { + { + const std::optional var = stack.store_type(inv, addr, width, val_type); + type_inv.assign_type(inv, var, val_type); + } + if (width == 8) { + inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); + inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_CTX)) { + inv.assign(stack.store(inv, data_kind_t::ctx_offsets, addr, width, opt_val_reg->ctx_offset), + opt_val_reg->ctx_offset); + } else { + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + } + + if (opt_val_reg && + (type_inv.has_type(m_inv, val_type, T_MAP) || type_inv.has_type(m_inv, val_type, T_MAP_PROGRAMS))) { + inv.assign(stack.store(inv, data_kind_t::map_fds, addr, width, opt_val_reg->map_fd), opt_val_reg->map_fd); + } else { + stack.havoc(inv, data_kind_t::map_fds, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_PACKET)) { + inv.assign(stack.store(inv, data_kind_t::packet_offsets, addr, width, opt_val_reg->packet_offset), + opt_val_reg->packet_offset); + } else { + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_SHARED)) { + inv.assign(stack.store(inv, data_kind_t::shared_offsets, addr, width, opt_val_reg->shared_offset), + opt_val_reg->shared_offset); + inv.assign(stack.store(inv, data_kind_t::shared_region_sizes, addr, width, opt_val_reg->shared_region_size), + opt_val_reg->shared_region_size); + } else { + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + } + + if (opt_val_reg && type_inv.has_type(m_inv, val_type, T_STACK)) { + inv.assign(stack.store(inv, data_kind_t::stack_offsets, addr, width, opt_val_reg->stack_offset), + opt_val_reg->stack_offset); + inv.assign(stack.store(inv, data_kind_t::stack_numeric_sizes, addr, width, opt_val_reg->stack_numeric_size), + opt_val_reg->stack_numeric_size); + } else { + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); + } + } else { + if ((width == 1 || width == 2 || width == 4) && type_inv.get_type(m_inv, val_type) == T_NUM) { + // Keep track of numbers on the stack that might be used as array indices. + inv.assign(stack.store(inv, data_kind_t::svalues, addr, width, val_svalue), val_svalue); + inv.assign(stack.store(inv, data_kind_t::uvalues, addr, width, val_uvalue), val_uvalue); + } else { + stack.havoc(inv, data_kind_t::svalues, addr, width); + stack.havoc(inv, data_kind_t::uvalues, addr, width); + } + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + stack.havoc(inv, data_kind_t::map_fds, addr, width); + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + stack.havoc(inv, data_kind_t::stack_numeric_sizes, addr, width); + } + + // Update stack_numeric_size for any stack type variables. + // stack_numeric_size holds the number of continuous bytes starting from stack_offset that are known to be numeric. + auto updated_lb = m_inv.eval_interval(addr).lb(); + auto updated_ub = m_inv.eval_interval(addr).ub() + width; + for (const variable_t type_variable : variable_t::get_type_variables()) { + if (!type_inv.has_type(inv, type_variable, T_STACK)) { + continue; + } + const variable_t stack_offset_variable = variable_t::kind_var(data_kind_t::stack_offsets, type_variable); + const variable_t stack_numeric_size_variable = + variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); + + using namespace crab::dsl_syntax; + // See if the variable's numeric interval overlaps with changed bytes. + if (m_inv.intersect(dsl_syntax::operator<=(addr, stack_offset_variable + stack_numeric_size_variable)) && + m_inv.intersect(operator>=(addr + width, stack_offset_variable))) { + havoc(stack_numeric_size_variable); + recompute_stack_numeric_size(m_inv, type_variable); + } + } +} + +void ebpf_transformer::operator()(const Mem& b) { + if (m_inv.is_bottom()) { + return; + } + if (const auto preg = std::get_if(&b.value)) { + if (b.is_load) { + do_load(b, *preg); + } else { + const auto data_reg = reg_pack(*preg); + do_mem_store(b, data_reg.type, data_reg.svalue, data_reg.uvalue, data_reg); + } + } else { + const uint64_t imm = std::get(b.value).v; + do_mem_store(b, T_NUM, to_signed(imm), imm, {}); + } +} + +void ebpf_transformer::do_mem_store(const Mem& b, const linear_expression_t& val_type, + const linear_expression_t& val_svalue, const linear_expression_t& val_uvalue, + const std::optional& opt_val_reg) { + if (m_inv.is_bottom()) { + return; + } + const int width = b.access.width; + const number_t offset{b.access.offset}; + if (b.access.basereg.v == R10_STACK_POINTER) { + const auto r10_stack_offset = reg_pack(b.access.basereg).stack_offset; + const auto r10_interval = m_inv.eval_interval(r10_stack_offset); + if (r10_interval.is_singleton()) { + const int32_t stack_offset = r10_interval.singleton()->cast_to(); + const number_t base_addr{stack_offset}; + do_store_stack(m_inv, base_addr + offset, width, val_type, val_svalue, val_uvalue, opt_val_reg); + } + return; + } + m_inv = type_inv.join_over_types(m_inv, b.access.basereg, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_STACK) { + const auto base_addr = linear_expression_t(dom.get_type_offset_variable(b.access.basereg, type).value()); + do_store_stack(inv, dsl_syntax::operator+(base_addr, offset), width, val_type, val_svalue, val_uvalue, + opt_val_reg); + } + // do nothing for any other type + }); +} + +// Construct a Bin operation that does the main operation that a given Atomic operation does atomically. +static Bin atomic_to_bin(const Atomic& a) { + Bin bin{.dst = Reg{R11_ATOMIC_SCRATCH}, .v = a.valreg, .is64 = a.access.width == sizeof(uint64_t), .lddw = false}; + switch (a.op) { + case Atomic::Op::ADD: bin.op = Bin::Op::ADD; break; + case Atomic::Op::OR: bin.op = Bin::Op::OR; break; + case Atomic::Op::AND: bin.op = Bin::Op::AND; break; + case Atomic::Op::XOR: bin.op = Bin::Op::XOR; break; + case Atomic::Op::XCHG: + case Atomic::Op::CMPXCHG: bin.op = Bin::Op::MOV; break; + default: throw std::exception(); + } + return bin; +} + +void ebpf_transformer::operator()(const Atomic& a) { + if (m_inv.is_bottom()) { + return; + } + if (!m_inv.entail(type_is_pointer(reg_pack(a.access.basereg))) || + !m_inv.entail(type_is_number(reg_pack(a.valreg)))) { + return; + } + if (m_inv.entail(type_is_not_stack(reg_pack(a.access.basereg)))) { + // Shared memory regions are volatile so we can just havoc + // any register that will be updated. + if (a.op == Atomic::Op::CMPXCHG) { + havoc_register(m_inv, Reg{R0_RETURN_VALUE}); + } else if (a.fetch) { + havoc_register(m_inv, a.valreg); + } + return; + } + + // Fetch the current value into the R11 pseudo-register. + constexpr Reg r11{R11_ATOMIC_SCRATCH}; + (*this)(Mem{.access = a.access, .value = r11, .is_load = true}); + + // Compute the new value in R11. + (*this)(atomic_to_bin(a)); + + if (a.op == Atomic::Op::CMPXCHG) { + // For CMPXCHG, store the original value in r0. + (*this)(Mem{.access = a.access, .value = Reg{R0_RETURN_VALUE}, .is_load = true}); + + // For the destination, there are 3 possibilities: + // 1) dst.value == r0.value : set R11 to valreg + // 2) dst.value != r0.value : don't modify R11 + // 3) dst.value may or may not == r0.value : set R11 to the union of R11 and valreg + // For now we just havoc the value of R11. + havoc_register(m_inv, r11); + } else if (a.fetch) { + // For other FETCH operations, store the original value in the src register. + (*this)(Mem{.access = a.access, .value = a.valreg, .is_load = true}); + } + + // Store the new value back in the original shared memory location. + // Note that do_mem_store() currently doesn't track shared memory values, + // but stack memory values are tracked and are legal here. + (*this)(Mem{.access = a.access, .value = r11, .is_load = false}); + + // Clear the R11 pseudo-register. + havoc_register(m_inv, r11); + type_inv.havoc_type(m_inv, r11); +} + +void ebpf_transformer::operator()(const Call& call) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + std::optional maybe_fd_reg{}; + for (ArgSingle param : call.singles) { + switch (param.kind) { + case ArgSingle::Kind::MAP_FD: maybe_fd_reg = param.reg; break; + case ArgSingle::Kind::ANYTHING: + case ArgSingle::Kind::MAP_FD_PROGRAMS: + case ArgSingle::Kind::PTR_TO_MAP_KEY: + case ArgSingle::Kind::PTR_TO_MAP_VALUE: + case ArgSingle::Kind::PTR_TO_CTX: + // Do nothing. We don't track the content of relevant memory regions + break; + } + } + for (ArgPair param : call.pairs) { + switch (param.kind) { + case ArgPair::Kind::PTR_TO_READABLE_MEM_OR_NULL: + case ArgPair::Kind::PTR_TO_READABLE_MEM: + // Do nothing. No side effect allowed. + break; + + case ArgPair::Kind::PTR_TO_WRITABLE_MEM: { + bool store_numbers = true; + auto variable = dom.get_type_offset_variable(param.mem); + if (!variable.has_value()) { + // checked by the checker + break; + } + variable_t addr = variable.value(); + variable_t width = reg_pack(param.size).svalue; + + m_inv = type_inv.join_over_types(m_inv, param.mem, [&](NumAbsDomain& inv, const type_encoding_t type) { + if (type == T_STACK) { + // Pointer to a memory region that the called function may change, + // so we must havoc. + stack.havoc(inv, data_kind_t::types, addr, width); + stack.havoc(inv, data_kind_t::svalues, addr, width); + stack.havoc(inv, data_kind_t::uvalues, addr, width); + stack.havoc(inv, data_kind_t::ctx_offsets, addr, width); + stack.havoc(inv, data_kind_t::map_fds, addr, width); + stack.havoc(inv, data_kind_t::packet_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_offsets, addr, width); + stack.havoc(inv, data_kind_t::stack_offsets, addr, width); + stack.havoc(inv, data_kind_t::shared_region_sizes, addr, width); + } else { + store_numbers = false; + } + }); + if (store_numbers) { + // Functions are not allowed to write sensitive data, + // and initialization is guaranteed + stack.store_numbers(m_inv, addr, width); + } + } + } + } + + constexpr Reg r0_reg{R0_RETURN_VALUE}; + const auto r0_pack = reg_pack(r0_reg); + havoc(r0_pack.stack_numeric_size); + if (call.is_map_lookup) { + // This is the only way to get a null pointer + if (maybe_fd_reg) { + if (const auto map_type = dom.get_map_type(*maybe_fd_reg)) { + if (global_program_info->platform->get_map_type(*map_type).value_type == EbpfMapValueType::MAP) { + if (const auto inner_map_fd = dom.get_map_inner_map_fd(*maybe_fd_reg)) { + do_load_mapfd(r0_reg, to_signed(*inner_map_fd), true); + goto out; + } + } else { + assign_valid_ptr(r0_reg, true); + assign(r0_pack.shared_offset, 0); + m_inv.set(r0_pack.shared_region_size, dom.get_map_value_size(*maybe_fd_reg)); + type_inv.assign_type(m_inv, r0_reg, T_SHARED); + } + } + } + assign_valid_ptr(r0_reg, true); + assign(r0_pack.shared_offset, 0); + type_inv.assign_type(m_inv, r0_reg, T_SHARED); + } else { + havoc(r0_pack.svalue); + havoc(r0_pack.uvalue); + havoc_offsets(r0_reg); + type_inv.assign_type(m_inv, r0_reg, T_NUM); + // assume(r0_pack.value < 0); for INTEGER_OR_NO_RETURN_IF_SUCCEED. + } +out: + scratch_caller_saved_registers(); + if (call.reallocate_packet) { + forget_packet_pointers(); + } +} + +void ebpf_transformer::operator()(const CallLocal& call) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + save_callee_saved_registers(call.stack_frame_prefix); + + // Update r10. + constexpr Reg r10_reg{R10_STACK_POINTER}; + add(r10_reg, -EBPF_SUBPROGRAM_STACK_SIZE, 64); +} + +void ebpf_transformer::operator()(const Callx& callx) { + using namespace crab::dsl_syntax; + if (m_inv.is_bottom()) { + return; + } + + // Look up the helper function id. + const reg_pack_t& reg = reg_pack(callx.func); + const auto src_interval = m_inv.eval_interval(reg.svalue); + if (const auto sn = src_interval.singleton()) { + if (sn->fits()) { + // We can now process it as if the id was immediate. + const int32_t imm = sn->cast_to(); + if (!global_program_info->platform->is_helper_usable(imm)) { + return; + } + const Call call = make_call(imm, *global_program_info->platform); + (*this)(call); + } + } +} + +void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const bool maybe_null) { + const EbpfMapDescriptor& desc = global_program_info->platform->get_map_descriptor(mapfd); + const EbpfMapType& type = global_program_info->platform->get_map_type(desc.type); + if (type.value_type == EbpfMapValueType::PROGRAM) { + type_inv.assign_type(m_inv, dst_reg, T_MAP_PROGRAMS); + } else { + type_inv.assign_type(m_inv, dst_reg, T_MAP); + } + const reg_pack_t& dst = reg_pack(dst_reg); + assign(dst.map_fd, mapfd); + assign_valid_ptr(dst_reg, maybe_null); +} + +void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } + +void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { + using namespace crab::dsl_syntax; + const reg_pack_t& reg = reg_pack(dst_reg); + havoc(reg.svalue); + havoc(reg.uvalue); + if (maybe_null) { + m_inv += 0 <= reg.svalue; + } else { + m_inv += 0 < reg.svalue; + } + m_inv += reg.svalue <= PTR_MAX; + assign(reg.uvalue, reg.svalue); +} + +// If nothing is known of the stack_numeric_size, +// try to recompute the stack_numeric_size. +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const variable_t type_variable) const { + const variable_t stack_numeric_size_variable = + variable_t::kind_var(data_kind_t::stack_numeric_sizes, type_variable); + + if (!inv.eval_interval(stack_numeric_size_variable).is_top()) { + return; + } + + if (type_inv.has_type(inv, type_variable, T_STACK)) { + const int numeric_size = + stack.min_all_num_size(inv, variable_t::kind_var(data_kind_t::stack_offsets, type_variable)); + if (numeric_size > 0) { + inv.assign(stack_numeric_size_variable, numeric_size); + } + } +} + +void ebpf_transformer::recompute_stack_numeric_size(NumAbsDomain& inv, const Reg& reg) const { + recompute_stack_numeric_size(inv, reg_pack(reg).type); +} + +void ebpf_transformer::add(const Reg& reg, const int imm, const int finite_width) { + const auto dst = reg_pack(reg); + const auto offset = dom.get_type_offset_variable(reg); + add_overflow(dst.svalue, dst.uvalue, imm, finite_width); + if (offset.has_value()) { + add(offset.value(), imm); + if (imm > 0) { + // Since the start offset is increasing but + // the end offset is not, the numeric size decreases. + sub(dst.stack_numeric_size, imm); + } else if (imm < 0) { + havoc(dst.stack_numeric_size); + } + recompute_stack_numeric_size(m_inv, reg); + } +} + +void ebpf_transformer::shl(const Reg& dst_reg, int imm, const int finite_width) { + const reg_pack_t dst = reg_pack(dst_reg); + + // The BPF ISA requires masking the imm. + imm &= finite_width - 1; + + if (m_inv.entail(type_is_number(dst))) { + const auto interval = m_inv.eval_interval(dst.uvalue); + if (interval.finite_size()) { + const number_t lb = interval.lb().number().value(); + const number_t ub = interval.ub().number().value(); + uint64_t lb_n = lb.cast_to(); + uint64_t ub_n = ub.cast_to(); + const uint64_t uint_max = finite_width == 64 ? uint64_t{std::numeric_limits::max()} + : uint64_t{std::numeric_limits::max()}; + if (lb_n >> (finite_width - imm) != ub_n >> (finite_width - imm)) { + // The bits that will be shifted out to the left are different, + // which means all combinations of remaining bits are possible. + lb_n = 0; + ub_n = uint_max << imm & uint_max; + } else { + // The bits that will be shifted out to the left are identical + // for all values in the interval, so we can safely shift left + // to get a new interval. + lb_n = lb_n << imm & uint_max; + ub_n = ub_n << imm & uint_max; + } + m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.svalue, dst.uvalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + } + shl_overflow(dst.svalue, dst.uvalue, imm); + havoc_offsets(dst_reg); +} + +void ebpf_transformer::lshr(const Reg& dst_reg, int imm, int finite_width) { + reg_pack_t dst = reg_pack(dst_reg); + + // The BPF ISA requires masking the imm. + imm &= finite_width - 1; + + if (m_inv.entail(type_is_number(dst))) { + auto interval = m_inv.eval_interval(dst.uvalue); + number_t lb_n{0}; + number_t ub_n{std::numeric_limits::max() >> imm}; + if (interval.finite_size()) { + number_t lb = interval.lb().number().value(); + number_t ub = interval.ub().number().value(); + if (finite_width == 64) { + lb_n = lb.cast_to() >> imm; + ub_n = ub.cast_to() >> imm; + } else { + number_t lb_w = lb.cast_to_sint(finite_width); + number_t ub_w = ub.cast_to_sint(finite_width); + lb_n = lb_w.cast_to() >> imm; + ub_n = ub_w.cast_to() >> imm; + + // The interval must be valid since a signed range crossing 0 + // was earlier converted to a full unsigned range. + assert(lb_n <= ub_n); + } + } + m_inv.set(dst.uvalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.svalue, dst.uvalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(dst_reg); +} + +static int _movsx_bits(const Bin::Op op) { + switch (op) { + case Bin::Op::MOVSX8: return 8; + case Bin::Op::MOVSX16: return 16; + case Bin::Op::MOVSX32: return 32; + default: throw std::exception(); + } +} + +void ebpf_transformer::sign_extend(const Reg& dst_reg, const linear_expression_t& right_svalue, const int finite_width, + const Bin::Op op) { + using namespace crab; + + const int bits = _movsx_bits(op); + const reg_pack_t dst = reg_pack(dst_reg); + interval_t right_interval = m_inv.eval_interval(right_svalue); + type_inv.assign_type(m_inv, dst_reg, T_NUM); + havoc_offsets(dst_reg); + const int64_t span = 1ULL << bits; + if (right_interval.ub() - right_interval.lb() >= span) { + // Interval covers the full space. + if (bits == 64) { + havoc(dst.svalue); + return; + } + right_interval = interval_t::signed_int(bits); + } + const int64_t mask = 1ULL << (bits - 1); + + // Sign extend each bound. + int64_t lb = right_interval.lb().number().value().cast_to(); + lb &= span - 1; + lb = (lb ^ mask) - mask; + int64_t ub = right_interval.ub().number().value().cast_to(); + ub &= span - 1; + ub = (ub ^ mask) - mask; + m_inv.set(dst.svalue, interval_t{lb, ub}); + + if (finite_width) { + m_inv.assign(dst.uvalue, dst.svalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + } +} + +void ebpf_transformer::ashr(const Reg& dst_reg, const linear_expression_t& right_svalue, int finite_width) { + using namespace crab; + + reg_pack_t dst = reg_pack(dst_reg); + if (m_inv.entail(type_is_number(dst))) { + interval_t left_interval = interval_t::bottom(); + interval_t right_interval = interval_t::bottom(); + interval_t left_interval_positive = interval_t::bottom(); + interval_t left_interval_negative = interval_t::bottom(); + get_signed_intervals(m_inv, finite_width == 64, dst.svalue, dst.uvalue, right_svalue, left_interval, + right_interval, left_interval_positive, left_interval_negative); + if (auto sn = right_interval.singleton()) { + // The BPF ISA requires masking the imm. + int64_t imm = sn->cast_to() & (finite_width - 1); + + int64_t lb_n = std::numeric_limits::min() >> imm; + int64_t ub_n = std::numeric_limits::max() >> imm; + if (left_interval.finite_size()) { + const auto [lb, ub] = left_interval.pair_number(); + if (finite_width == 64) { + lb_n = lb.cast_to() >> imm; + ub_n = ub.cast_to() >> imm; + } else { + number_t lb_w = lb.cast_to_sint(finite_width) >> gsl::narrow(imm); + number_t ub_w = ub.cast_to_sint(finite_width) >> gsl::narrow(imm); + if (lb_w.cast_to() <= ub_w.cast_to()) { + lb_n = lb_w.cast_to(); + ub_n = ub_w.cast_to(); + } + } + } + m_inv.set(dst.svalue, interval_t{lb_n, ub_n}); + m_inv.assign(dst.uvalue, dst.svalue); + overflow_signed(m_inv, dst.svalue, finite_width); + overflow_unsigned(m_inv, dst.uvalue, finite_width); + return; + } + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(dst_reg); +} + +static void apply(NumAbsDomain& inv, const binop_t& op, const variable_t x, const variable_t y, const variable_t z) { + inv.apply(op, x, y, z, 0); +} + +void ebpf_transformer::operator()(const Bin& bin) { + using namespace crab::dsl_syntax; + + auto dst = reg_pack(bin.dst); + int finite_width = bin.is64 ? 64 : 32; + + if (auto pimm = std::get_if(&bin.v)) { + // dst += K + int64_t imm; + if (bin.is64) { + // Use the full signed value. + imm = to_signed(pimm->v); + } else { + // Use only the low 32 bits of the value. + imm = gsl::narrow_cast(pimm->v); + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + // If this is a 32-bit operation and the destination is not a number, forget everything about the register. + if (!type_inv.has_type(m_inv, bin.dst, T_NUM)) { + havoc_register(m_inv, bin.dst); + havoc_offsets(bin.dst); + havoc(dst.type); + } + } + switch (bin.op) { + case Bin::Op::MOV: + assign(dst.svalue, imm); + assign(dst.uvalue, imm); + overflow_unsigned(m_inv, dst.uvalue, bin.is64 ? 64 : 32); + type_inv.assign_type(m_inv, bin.dst, T_NUM); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: CRAB_ERROR("Unsupported operation"); + case Bin::Op::ADD: + if (imm == 0) { + return; + } + add(bin.dst, gsl::narrow(imm), finite_width); + break; + case Bin::Op::SUB: + if (imm == 0) { + return; + } + add(bin.dst, gsl::narrow(-imm), finite_width); + break; + case Bin::Op::MUL: + mul(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UDIV: + udiv(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UMOD: + urem(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SDIV: + sdiv(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SMOD: + srem(dst.svalue, dst.uvalue, imm, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::OR: + bitwise_or(dst.svalue, dst.uvalue, imm); + havoc_offsets(bin.dst); + break; + case Bin::Op::AND: + // FIX: what to do with ptr&-8 as in counter/simple_loop_unrolled? + bitwise_and(dst.svalue, dst.uvalue, imm); + if (gsl::narrow(imm) > 0) { + // AND with immediate is only a 32-bit operation so svalue and uvalue are the same. + assume(dst.svalue <= imm); + assume(dst.uvalue <= imm); + assume(0 <= dst.svalue); + assume(0 <= dst.uvalue); + } + havoc_offsets(bin.dst); + break; + case Bin::Op::LSH: shl(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::RSH: lshr(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::ARSH: ashr(bin.dst, gsl::narrow(imm), finite_width); break; + case Bin::Op::XOR: + bitwise_xor(dst.svalue, dst.uvalue, imm); + havoc_offsets(bin.dst); + break; + } + } else { + // dst op= src + auto src_reg = std::get(bin.v); + auto src = reg_pack(src_reg); + switch (bin.op) { + case Bin::Op::ADD: { + if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { + // both must be numbers + add_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); + } else { + // Here we're not sure that lhs and rhs are the same type; they might be. + // But previous assertions should fail unless we know that exactly one of lhs or rhs is a pointer. + m_inv = + type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t dst_type) { + inv = type_inv.join_over_types( + inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t src_type) { + if (dst_type == T_NUM && src_type != T_NUM) { + // num += ptr + type_inv.assign_type(inv, bin.dst, src_type); + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, src_type)) { + crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst.svalue, + dom.get_type_offset_variable(src_reg, src_type).value()); + } + if (src_type == T_SHARED) { + inv.assign(dst.shared_region_size, src.shared_region_size); + } + } else if (dst_type != T_NUM && src_type == T_NUM) { + // ptr += num + type_inv.assign_type(inv, bin.dst, dst_type); + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, dst_type)) { + crab::apply(inv, arith_binop_t::ADD, dst_offset.value(), dst_offset.value(), + src.svalue); + if (dst_type == T_STACK) { + // Reduce the numeric size. + using namespace crab::dsl_syntax; + if (m_inv.intersect(src.svalue < 0)) { + inv -= dst.stack_numeric_size; + recompute_stack_numeric_size(inv, dst.type); + } else { + apply_signed(inv, arith_binop_t::SUB, dst.stack_numeric_size, + dst.stack_numeric_size, dst.stack_numeric_size, src.svalue, + 0); + } + } + } + } else if (dst_type == T_NUM && src_type == T_NUM) { + // dst and src don't necessarily have the same type, but among the possibilities + // enumerated is the case where they are both numbers. + apply_signed(inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, + src.svalue, finite_width); + } else { + // We ignore the cases here that do not match the assumption described + // above. Joining bottom with another results will leave the other + // results unchanged. + inv.set_to_bottom(); + } + }); + }); + // careful: change dst.value only after dealing with offset + apply_signed(m_inv, arith_binop_t::ADD, dst.svalue, dst.uvalue, dst.svalue, src.svalue, finite_width); + } + break; + } + case Bin::Op::SUB: { + if (type_inv.same_type(m_inv, bin.dst, std::get(bin.v))) { + // src and dest have the same type. + m_inv = type_inv.join_over_types(m_inv, bin.dst, [&](NumAbsDomain& inv, const type_encoding_t type) { + switch (type) { + case T_NUM: + // This is: sub_overflow(inv, dst.value, src.value, finite_width); + apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst.svalue, src.svalue, + finite_width); + type_inv.assign_type(inv, bin.dst, T_NUM); + crab::havoc_offsets(inv, bin.dst); + break; + default: + // ptr -= ptr + // Assertions should make sure we only perform this on non-shared pointers. + if (const auto dst_offset = dom.get_type_offset_variable(bin.dst, type)) { + apply_signed(inv, arith_binop_t::SUB, dst.svalue, dst.uvalue, dst_offset.value(), + dom.get_type_offset_variable(src_reg, type).value(), finite_width); + inv -= dst_offset.value(); + } + crab::havoc_offsets(inv, bin.dst); + type_inv.assign_type(inv, bin.dst, T_NUM); + break; + } + }); + } else { + // We're not sure that lhs and rhs are the same type. + // Either they're different, or at least one is not a singleton. + if (type_inv.get_type(m_inv, std::get(bin.v)) != T_NUM) { + type_inv.havoc_type(m_inv, bin.dst); + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + } else { + sub_overflow(dst.svalue, dst.uvalue, src.svalue, finite_width); + if (auto dst_offset = dom.get_type_offset_variable(bin.dst)) { + sub(dst_offset.value(), src.svalue); + if (type_inv.has_type(m_inv, dst.type, T_STACK)) { + // Reduce the numeric size. + using namespace crab::dsl_syntax; + if (m_inv.intersect(src.svalue > 0)) { + m_inv -= dst.stack_numeric_size; + recompute_stack_numeric_size(m_inv, dst.type); + } else { + crab::apply(m_inv, arith_binop_t::ADD, dst.stack_numeric_size, dst.stack_numeric_size, + src.svalue); + } + } + } + } + } + break; + } + case Bin::Op::MUL: + mul(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UDIV: + udiv(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::UMOD: + urem(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SDIV: + sdiv(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::SMOD: + srem(dst.svalue, dst.uvalue, src.svalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::OR: + bitwise_or(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::AND: + bitwise_and(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::LSH: + if (m_inv.entail(type_is_number(src_reg))) { + auto src_interval = m_inv.eval_interval(src.uvalue); + if (std::optional sn = src_interval.singleton()) { + // truncate to uint64? + uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); + if (imm <= std::numeric_limits::max()) { + if (!bin.is64) { + // Use only the low 32 bits of the value. + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } + shl(bin.dst, gsl::narrow_cast(imm), finite_width); + break; + } + } + } + shl_overflow(dst.svalue, dst.uvalue, src.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::RSH: + if (m_inv.entail(type_is_number(src_reg))) { + auto src_interval = m_inv.eval_interval(src.uvalue); + if (std::optional sn = src_interval.singleton()) { + uint64_t imm = sn->cast_to() & (bin.is64 ? 63 : 31); + if (imm <= std::numeric_limits::max()) { + if (!bin.is64) { + // Use only the low 32 bits of the value. + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } + lshr(bin.dst, gsl::narrow_cast(imm), finite_width); + break; + } + } + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::ARSH: + if (m_inv.entail(type_is_number(src_reg))) { + ashr(bin.dst, src.svalue, finite_width); + break; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::XOR: + bitwise_xor(dst.svalue, dst.uvalue, src.uvalue, finite_width); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOVSX8: + case Bin::Op::MOVSX16: + case Bin::Op::MOVSX32: + // Keep relational information if operation is a no-op. + if (dst.svalue == src.svalue && + m_inv.eval_interval(dst.svalue) <= interval_t::signed_int(_movsx_bits(bin.op))) { + return; + } + if (m_inv.entail(type_is_number(src_reg))) { + sign_extend(bin.dst, src.svalue, finite_width, bin.op); + break; + } + havoc(dst.svalue); + havoc(dst.uvalue); + havoc_offsets(bin.dst); + break; + case Bin::Op::MOV: + // Keep relational information if operation is a no-op. + if (dst.svalue == src.svalue && + m_inv.eval_interval(dst.uvalue) <= interval_t::unsigned_int(bin.is64 ? 64 : 32)) { + return; + } + assign(dst.svalue, src.svalue); + assign(dst.uvalue, src.uvalue); + havoc_offsets(bin.dst); + m_inv = type_inv.join_over_types(m_inv, src_reg, [&](NumAbsDomain& inv, const type_encoding_t type) { + switch (type) { + case T_CTX: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.ctx_offset, src.ctx_offset); + } + break; + case T_MAP: + case T_MAP_PROGRAMS: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.map_fd, src.map_fd); + } + break; + case T_PACKET: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.packet_offset, src.packet_offset); + } + break; + case T_SHARED: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.shared_region_size, src.shared_region_size); + inv.assign(dst.shared_offset, src.shared_offset); + } + break; + case T_STACK: + if (bin.is64) { + inv.assign(dst.type, type); + inv.assign(dst.stack_offset, src.stack_offset); + inv.assign(dst.stack_numeric_size, src.stack_numeric_size); + } + break; + default: inv.assign(dst.type, type); break; + } + }); + if (bin.is64) { + // Add dst.type=src.type invariant. + if (bin.dst.v != std::get(bin.v).v || type_inv.get_type(m_inv, dst.type) == T_UNINIT) { + // Only forget the destination type if we're copying from a different register, + // or from the same uninitialized register. + havoc(dst.type); + } + type_inv.assign_type(m_inv, bin.dst, std::get(bin.v)); + } + break; + } + } + if (!bin.is64) { + bitwise_and(dst.svalue, dst.uvalue, std::numeric_limits::max()); + } +} + +void ebpf_transformer::initialize_loop_counter(const label_t& label) { + m_inv.assign(variable_t::loop_counter(to_string(label)), 0); +} + +void ebpf_transformer::operator()(const IncrementLoopCounter& ins) { + const auto counter = variable_t::loop_counter(to_string(ins.name)); + this->add(counter, 1); +} + +void ebpf_domain_initialize_loop_counter(ebpf_domain_t& dom, const label_t& label) { + ebpf_transformer{dom}.initialize_loop_counter(label); +} + +} // namespace crab diff --git a/src/crab/fwd_analyzer.cpp b/src/crab/fwd_analyzer.cpp index 992ae1964..f77e8e90a 100644 --- a/src/crab/fwd_analyzer.cpp +++ b/src/crab/fwd_analyzer.cpp @@ -68,8 +68,16 @@ class interleaved_fwd_fixpoint_iterator_t final { void set_pre(const label_t& label, const ebpf_domain_t& v) { _pre[label] = v; } void transform_to_post(const label_t& label, ebpf_domain_t pre) { - const basic_block_t& bb = _cfg.get_node(label); - pre(bb); + const GuardedInstruction& ins = _cfg.at(label); + + if (thread_local_options.assume_assertions) { + for (const auto& assertion : ins.preconditions) { + // avoid redundant errors + ebpf_domain_assume(pre, assertion); + } + } + ebpf_domain_transform(pre, ins.cmd); + _post[label] = std::move(pre); } @@ -129,7 +137,8 @@ std::pair run_forward_analyzer(const cfg_t // This enables enforcement of upper bounds on loop iterations // during program verification. // TODO: Consider making this an instruction instead of an explicit call. - analyzer._wto.for_each_loop_head([&](const label_t& label) { entry_inv.initialize_loop_counter(label); }); + analyzer._wto.for_each_loop_head( + [&](const label_t& label) { ebpf_domain_initialize_loop_counter(entry_inv, label); }); } analyzer.set_pre(cfg.entry_label(), entry_inv); for (const auto& component : analyzer._wto) { diff --git a/src/crab/interval.hpp b/src/crab/interval.hpp index c7de6dda6..c4a2c0a7d 100644 --- a/src/crab/interval.hpp +++ b/src/crab/interval.hpp @@ -104,7 +104,8 @@ class interval_t final { template [[nodiscard]] std::tuple bound(T elb, T eub) const { - auto [lb, ub] = bound(static_cast>(elb), static_cast>(eub)); + using C = std::underlying_type_t; + auto [lb, ub] = bound(static_cast(elb), static_cast(eub)); return {static_cast(lb), static_cast(ub)}; } diff --git a/src/crab/label.hpp b/src/crab/label.hpp new file mode 100644 index 000000000..db8c55ea5 --- /dev/null +++ b/src/crab/label.hpp @@ -0,0 +1,94 @@ +// Copyright (c) Prevail Verifier contributors. +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crab_utils/num_safety.hpp" + +constexpr char STACK_FRAME_DELIMITER = '/'; + +namespace crab { +struct label_t { + std::string stack_frame_prefix; ///< Variable prefix when calling this label. + int from{}; ///< Jump source, or simply index of instruction + int to{}; ///< Jump target or -1 + std::string special_label; ///< Special label for special instructions. + + explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept + : stack_frame_prefix(std::move(stack_frame_prefix)), from(index), to(to) {} + + static label_t make_jump(const label_t& src_label, const label_t& target_label) { + return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; + } + + static label_t make_increment_counter(const label_t& label) { + // XXX: This is a hack to increment the loop counter. + label_t res{label.from, label.to, label.stack_frame_prefix}; + res.special_label = "counter"; + return res; + } + + std::strong_ordering operator<=>(const label_t& other) const = default; + + // no hash; intended for use in ordered containers. + + [[nodiscard]] + constexpr bool isjump() const { + return to != -1; + } + + [[nodiscard]] + int call_stack_depth() const { + // The call stack depth is the number of '/' separated components in the label, + // which is one more than the number of '/' separated components in the prefix, + // hence two more than the number of '/' in the prefix, if any. + if (stack_frame_prefix.empty()) { + return 1; + } + return gsl::narrow(2 + std::ranges::count(stack_frame_prefix, STACK_FRAME_DELIMITER)); + } + + static const label_t entry; + static const label_t exit; +}; + +inline const label_t label_t::entry{-1}; +inline const label_t label_t::exit{INT_MAX}; + +std::ostream& operator<<(std::ostream& os, const label_t& label); +std::string to_string(label_t const& label); + +// cpu=v4 supports 32-bit PC offsets so we need a large enough type. +using pc_t = uint32_t; + +// We use a 16-bit offset whenever it fits in 16 bits. +inline std::function label_to_offset16(const pc_t pc) { + return [=](const label_t& label) { + const int64_t offset = label.from - gsl::narrow(pc) - 1; + const bool is16 = + std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); + return gsl::narrow(is16 ? offset : 0); + }; +} + +// We use the JA32 opcode with the offset in 'imm' when the offset +// of an unconditional jump doesn't fit in an int16_t. +inline std::function label_to_offset32(const pc_t pc) { + return [=](const label_t& label) { + const int64_t offset = label.from - gsl::narrow(pc) - 1; + const bool is16 = + std::numeric_limits::min() <= offset && offset <= std::numeric_limits::max(); + return is16 ? 0 : gsl::narrow(offset); + }; +} + +} // namespace crab diff --git a/src/crab/split_dbm.cpp b/src/crab/split_dbm.cpp index 1c8620718..b8cf2ce50 100644 --- a/src/crab/split_dbm.cpp +++ b/src/crab/split_dbm.cpp @@ -1143,9 +1143,8 @@ string_invariant SplitDBM::to_set() const { if (!this->g.elem(0, v) && !this->g.elem(v, 0)) { continue; } - interval_t v_out = - interval_t(this->g.elem(v, 0) ? -number_t(this->g.edge_val(v, 0)) : extended_number::minus_infinity(), - this->g.elem(0, v) ? number_t(this->g.edge_val(0, v)) : extended_number::plus_infinity()); + interval_t v_out{this->g.elem(v, 0) ? -number_t(this->g.edge_val(v, 0)) : extended_number::minus_infinity(), + this->g.elem(0, v) ? number_t(this->g.edge_val(0, v)) : extended_number::plus_infinity()}; assert(!v_out.is_bottom()); variable_t variable = *this->rev_map[v]; @@ -1153,7 +1152,7 @@ string_invariant SplitDBM::to_set() const { std::stringstream elem; elem << variable; if (variable.is_type()) { - auto [lb, ub] = v_out.bound(T_MIN, T_MAX); + auto [lb, ub] = v_out.bound(T_UNINIT, T_MAX); if (lb == ub) { if (variable.is_in_stack() && lb == T_NUM) { // no need to show this @@ -1206,8 +1205,7 @@ string_invariant SplitDBM::to_set() const { std::ostream& operator<<(std::ostream& o, const SplitDBM& dom) { return o << dom.to_set(); } bool SplitDBM::eval_expression_overflow(const linear_expression_t& e, Weight& out) const { - [[maybe_unused]] - const bool overflow = convert_NtoW_overflow(e.constant_term(), out); + [[maybe_unused]] const bool overflow = convert_NtoW_overflow(e.constant_term(), out); assert(!overflow); for (const auto& [variable, coefficient] : e.variable_terms()) { Weight coef; diff --git a/src/crab/thresholds.cpp b/src/crab/thresholds.cpp index 88e3e689e..5cf436d30 100644 --- a/src/crab/thresholds.cpp +++ b/src/crab/thresholds.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "crab/thresholds.hpp" #include "crab/cfg.hpp" +#include "crab/label.hpp" namespace crab { @@ -48,7 +49,7 @@ std::ostream& operator<<(std::ostream& o, const thresholds_t& t) { return o; } -void wto_thresholds_t::get_thresholds(const basic_block_t& bb, thresholds_t& thresholds) const {} +void wto_thresholds_t::get_thresholds(const value_t& bb, thresholds_t& thresholds) const {} void wto_thresholds_t::operator()(const label_t& vertex) { if (m_stack.empty()) { @@ -59,7 +60,7 @@ void wto_thresholds_t::operator()(const label_t& vertex) { const auto it = m_head_to_thresholds.find(head); if (it != m_head_to_thresholds.end()) { thresholds_t& thresholds = it->second; - const basic_block_t& bb = m_cfg.get_node(vertex); + const value_t& bb = m_cfg.get_node(vertex); get_thresholds(bb, thresholds); } else { CRAB_ERROR("No head found while gathering thresholds"); @@ -73,7 +74,7 @@ void wto_thresholds_t::operator()(const std::shared_ptr& cycle) { // XXX: if we want to consider constants from loop // initializations - for (const auto& pre : boost::make_iterator_range(bb.prev_blocks())) { + for (const auto& pre : boost::make_iterator_range(bb.prev_labels())) { if (pre != cycle->head()) { auto& pred_bb = m_cfg.get_node(pre); get_thresholds(pred_bb, thresholds); @@ -90,7 +91,7 @@ void wto_thresholds_t::operator()(const std::shared_ptr& cycle) { std::ostream& operator<<(std::ostream& o, const wto_thresholds_t& t) { for (const auto& [label, th] : t.m_head_to_thresholds) { - o << label << "=" << th << "\n"; + o << to_string(label) << "=" << th << "\n"; } return o; } diff --git a/src/crab/thresholds.hpp b/src/crab/thresholds.hpp index fe5a2e079..1e3d67635 100644 --- a/src/crab/thresholds.hpp +++ b/src/crab/thresholds.hpp @@ -58,7 +58,7 @@ class wto_thresholds_t final { // the top of the stack is the current wto head std::vector m_stack; - void get_thresholds(const basic_block_t& bb, thresholds_t& thresholds) const; + void get_thresholds(const value_t& bb, thresholds_t& thresholds) const; public: wto_thresholds_t(cfg_t& cfg, const size_t max_size) : m_cfg(cfg), m_max_size(max_size) {} diff --git a/src/crab/var_factory.cpp b/src/crab/var_factory.cpp index f4c243229..00ace7328 100644 --- a/src/crab/var_factory.cpp +++ b/src/crab/var_factory.cpp @@ -4,7 +4,7 @@ * Factories for variable names. */ -#include "asm_syntax.hpp" +#include "crab/label.hpp" #include "crab/variable.hpp" #include "crab_utils/lazy_allocator.hpp" diff --git a/src/crab_utils/debug.hpp b/src/crab_utils/debug.hpp index e70d15c54..e71baa424 100644 --- a/src/crab_utils/debug.hpp +++ b/src/crab_utils/debug.hpp @@ -43,13 +43,14 @@ void ___print___(std::ostream& os, ArgTypes... args) { (void)expand_variadic_pack{0, ((os << args), void(), 0)...}; } -#define CRAB_ERROR(...) \ - do { \ - std::ostringstream os; \ - os << "CRAB ERROR: "; \ - crab::___print___(os, __VA_ARGS__); \ - os << "\n"; \ - throw std::runtime_error(os.str()); \ +#define CRAB_ERROR(...) \ + do { \ + std::ostringstream os; \ + os << "CRAB ERROR: "; \ + crab::___print___(os, __VA_ARGS__); \ + crab::___print___(os, "; function ", __func__, ", line ", __LINE__); \ + os << "\n"; \ + throw std::runtime_error(os.str()); \ } while (0) extern bool CrabWarningFlag; diff --git a/src/crab_utils/stats.cpp b/src/crab_utils/stats.cpp index f257819ae..8e9b24002 100644 --- a/src/crab_utils/stats.cpp +++ b/src/crab_utils/stats.cpp @@ -94,7 +94,6 @@ void CrabStats::reset() { sw.clear(); } -void CrabStats::count(const std::string& name) { ++(*counters)[name]; } void CrabStats::count_max(const std::string& name, const unsigned v) { (*counters)[name] = std::max((*counters)[name], v); } @@ -102,9 +101,6 @@ void CrabStats::count_max(const std::string& name, const unsigned v) { unsigned CrabStats::uset(const std::string& n, const unsigned v) { return (*counters)[n] = v; } unsigned CrabStats::get(const std::string& n) { return (*counters)[n]; } -void CrabStats::start(const std::string& name) { (*sw)[name].start(); } -void CrabStats::stop(const std::string& name) { (*sw)[name].stop(); } -void CrabStats::resume(const std::string& name) { (*sw)[name].resume(); } /** Outputs all statistics to std output */ void CrabStats::Print(std::ostream& OS) { diff --git a/src/crab_utils/stats.hpp b/src/crab_utils/stats.hpp index f402318f3..d60a1c49f 100644 --- a/src/crab_utils/stats.hpp +++ b/src/crab_utils/stats.hpp @@ -33,6 +33,9 @@ inline std::ostream& operator<<(std::ostream& OS, const Stopwatch& sw) { } class CrabStats { + /// Controls whether statistics collection is active. + /// When false, all statistics methods become no-ops for better performance. + static constexpr bool enabled = false; static thread_local lazy_allocator> counters; static thread_local lazy_allocator> sw; @@ -44,13 +47,29 @@ class CrabStats { /* counters */ static unsigned get(const std::string& n); static unsigned uset(const std::string& n, unsigned v); - static void count(const std::string& name); + static void count(const std::string& name) { + if constexpr (enabled) { + ++(*counters)[name]; + } + } static void count_max(const std::string& name, unsigned v); /* stop watch */ - static void start(const std::string& name); - static void stop(const std::string& name); - static void resume(const std::string& name); + static void start(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].start(); + } + } + static void stop(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].stop(); + } + } + static void resume(const std::string& name) { + if constexpr (enabled) { + (*sw)[name].resume(); + } + } /** Outputs all statistics to std output */ static void Print(std::ostream& OS); diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index a5af14653..00695c18e 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -60,37 +60,19 @@ static checks_db generate_report(const cfg_t& cfg, const crab::invariant_table_t const crab::invariant_table_t& post_invariants) { checks_db m_db; for (const label_t& label : cfg.sorted_labels()) { - const basic_block_t& bb = cfg.get_node(label); - ebpf_domain_t from_inv(pre_invariants.at(label)); - from_inv.set_require_check( - [&m_db, label](auto& inv, const crab::linear_constraint_t& cst, const std::string& s) { - if (inv.is_bottom()) { - return true; - } - if (cst.is_contradiction()) { - m_db.add_warning(label, s); - return false; - } - - if (inv.entail(cst)) { - // add_redundant(s); - return true; - } else if (inv.intersect(cst)) { - // TODO: add_error() if imply negation - m_db.add_warning(label, s); - return false; - } else { - m_db.add_warning(label, s); - return false; - } - }); - + ebpf_domain_t from_inv{pre_invariants.at(label)}; const bool pre_bot = from_inv.is_bottom(); - from_inv(bb); + const GuardedInstruction& instruction = cfg.at(label); + for (const Assertion& assertion : instruction.preconditions) { + for (const auto& warning : ebpf_domain_check(from_inv, label, assertion)) { + m_db.add_warning(label, warning); + } + } + ebpf_domain_transform(from_inv, instruction.cmd); if (!pre_bot && from_inv.is_bottom()) { - m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(bb.label())); + m_db.add_unreachable(label, std::string("Code is unreachable after ") + to_string(label)); } } diff --git a/src/main/check.cpp b/src/main/check.cpp index ef6241418..f6874a99a 100644 --- a/src/main/check.cpp +++ b/src/main/check.cpp @@ -38,7 +38,7 @@ static const std::map _conformance_groups {"callx", bpf_conformance_groups_t::callx}, {"divmul32", bpf_conformance_groups_t::divmul32}, {"divmul64", bpf_conformance_groups_t::divmul64}, {"packet", bpf_conformance_groups_t::packet}}; -static std::optional _get_conformance_group_by_name(std::string group) { +static std::optional _get_conformance_group_by_name(const std::string& group) { if (!_conformance_groups.contains(group)) { return {}; } @@ -124,7 +124,7 @@ int main(int argc, char** argv) { ->expected(0, _conformance_groups.size()) ->check(CLI::IsMember(_get_conformance_group_names())); - app.add_flag("--simplify,!--no-simplify", ebpf_verifier_options.cfg_opts.simplify, + app.add_flag("--simplify,!--no-simplify", ebpf_verifier_options.simplify, "Simplify the CFG before analysis by merging chains of instructions into a single basic block. " "Default: enabled") ->group("Verbosity"); diff --git a/src/spec_type_descriptors.hpp b/src/spec_type_descriptors.hpp index 3da18504e..89a7d537f 100644 --- a/src/spec_type_descriptors.hpp +++ b/src/spec_type_descriptors.hpp @@ -38,8 +38,6 @@ struct EbpfProgramType { bool is_privileged{}; }; -void print_map_descriptors(const std::vector& descriptors, std::ostream& o); - // Represents the key characteristics that determine equivalence between eBPF maps. // Used to cache and compare map configurations across the program. struct EquivalenceKey { @@ -74,4 +72,8 @@ struct raw_program { std::vector line_info{}; }; +void print_map_descriptors(const std::vector& descriptors, std::ostream& o); + +std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info); + extern thread_local crab::lazy_allocator global_program_info; diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index 017d17806..ec02500c4 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -11,8 +11,8 @@ #include -#include "asm_ostream.hpp" #include "asm_parse.hpp" +#include "asm_syntax.hpp" #include "ebpf_verifier.hpp" #include "ebpf_yaml.hpp" #include "string_constraints.hpp" @@ -171,7 +171,7 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set& ra ebpf_verifier_options_t options{}; // Use ~simplify for YAML tests unless otherwise specified. - options.cfg_opts.simplify = false; + options.simplify = false; // All YAML tests use !setup_constraints. options.setup_constraints = false; @@ -193,7 +193,7 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set& ra } else if (name == "strict") { options.strict = true; } else if (name == "simplify") { - options.cfg_opts.simplify = true; + options.simplify = true; } else if (name == "big_endian") { options.big_endian = true; } else if (name == "!big_endian") { @@ -251,7 +251,7 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { if (debug) { test_case.options.print_failures = true; test_case.options.print_invariants = true; - test_case.options.cfg_opts.simplify = false; + test_case.options.simplify = false; } ebpf_context_descriptor_t context_descriptor{64, 0, 4, -1}; @@ -360,7 +360,7 @@ ConformanceTestResult run_conformance_test_case(const std::vector& me print(prog, std::cout, {}); options.print_failures = true; options.print_invariants = true; - options.cfg_opts.simplify = false; + options.simplify = false; } try { diff --git a/src/test/test_conformance.cpp b/src/test/test_conformance.cpp index 074bae979..9d5eebb3a 100644 --- a/src/test/test_conformance.cpp +++ b/src/test/test_conformance.cpp @@ -6,7 +6,7 @@ #define CONFORMANCE_TEST_PATH "external/bpf_conformance/tests/" -static void test_conformance(const std::string& filename, bpf_conformance_test_result_t expected_result, +static void test_conformance(const std::string& filename, const bpf_conformance_test_result_t& expected_result, const std::string& expected_reason) { std::vector test_files = {CONFORMANCE_TEST_PATH + filename}; boost::filesystem::path test_path = boost::dll::program_location(); diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 230d045c9..82756292c 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -3,7 +3,6 @@ #include #include "asm_marshal.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" // Below we define a tample of instruction templates that specify diff --git a/src/test/test_print.cpp b/src/test/test_print.cpp index bcf1675f1..515c92ef6 100644 --- a/src/test/test_print.cpp +++ b/src/test/test_print.cpp @@ -12,7 +12,6 @@ #endif #include "asm_files.hpp" -#include "asm_ostream.hpp" #include "asm_unmarshal.hpp" #define TEST_OBJECT_FILE_DIRECTORY "ebpf-samples/build/" diff --git a/src/test/test_wto.cpp b/src/test/test_wto.cpp index 4e5ac738a..091efebc3 100644 --- a/src/test/test_wto.cpp +++ b/src/test/test_wto.cpp @@ -12,22 +12,22 @@ TEST_CASE("wto figure 1", "[wto]") { // Add nodes. for (int i = 1; i <= 8; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(7)); - cfg.get_node(label_t(5)) >> cfg.get_node(label_t(6)); - cfg.get_node(label_t(6)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(6)) >> cfg.get_node(label_t(7)); - cfg.get_node(label_t(7)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(7)) >> cfg.get_node(label_t(8)); - cfg.get_node(label_t(8)) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{7}); + cfg.get_node(label_t{5}) >> cfg.get_node(label_t{6}); + cfg.get_node(label_t{6}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{6}) >> cfg.get_node(label_t{7}); + cfg.get_node(label_t{7}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{7}) >> cfg.get_node(label_t{8}); + cfg.get_node(label_t{8}) >> cfg.get_node(label_t::exit); const wto_t wto(cfg); @@ -44,18 +44,18 @@ TEST_CASE("wto figure 2a", "[wto]") { // Add nodes. for (int i = 1; i <= 5; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(5)); - cfg.get_node(label_t(5)) >> cfg.get_node(label_t(4)); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{5}); + cfg.get_node(label_t{5}) >> cfg.get_node(label_t{4}); const wto_t wto(cfg); @@ -72,17 +72,17 @@ TEST_CASE("wto figure 2b", "[wto]") { // Add nodes. for (int i = 1; i <= 4; i++) { - cfg.insert(label_t(i)); + cfg.insert(label_t{i}, Undefined{}); } // Add edges. - cfg.get_node(label_t::entry) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(2)); - cfg.get_node(label_t(1)) >> cfg.get_node(label_t(4)); - cfg.get_node(label_t(2)) >> cfg.get_node(label_t(3)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t(1)); - cfg.get_node(label_t(3)) >> cfg.get_node(label_t::exit); - cfg.get_node(label_t(4)) >> cfg.get_node(label_t(3)); + cfg.get_node(label_t::entry) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{2}); + cfg.get_node(label_t{1}) >> cfg.get_node(label_t{4}); + cfg.get_node(label_t{2}) >> cfg.get_node(label_t{3}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t{1}); + cfg.get_node(label_t{3}) >> cfg.get_node(label_t::exit); + cfg.get_node(label_t{4}) >> cfg.get_node(label_t{3}); const wto_t wto(cfg); diff --git a/test-data/calllocal.yaml b/test-data/calllocal.yaml index 5f09446ae..74e558ef3 100644 --- a/test-data/calllocal.yaml +++ b/test-data/calllocal.yaml @@ -347,14 +347,14 @@ code: r0 += r1 ; r0 = 1 + 2 + 3 = 6 exit : | - *(u8 *)(r10 - 513) = 2 + *(u8 *)(r10 - 1) = 2 call - r1 = *(u8 *)(r10 - 513) + r1 = *(u8 *)(r10 - 1) r0 += r1 ; r0 = 2 + 3 = 5 exit : | - *(u8 *)(r10 - 1025) = 3 - r0 = *(u8 *)(r10 - 1025) + *(u8 *)(r10 - 1) = 3 + r0 = *(u8 *)(r10 - 1) exit post: diff --git a/test-data/jump.yaml b/test-data/jump.yaml index 12319afb5..decbd2910 100644 --- a/test-data/jump.yaml +++ b/test-data/jump.yaml @@ -1,5 +1,23 @@ # Copyright (c) Prevail Verifier contributors. # SPDX-License-Identifier: MIT +--- +test-case: jump to fallthrough + +pre: [] + +code: + : | + r0 = 0 + if r0 != 0 goto + : | + exit + +post: + - r0.type=number + - r0.svalue=0 + - r0.uvalue=0 +messages: [] + --- test-case: simple conditional jump forward @@ -873,22 +891,17 @@ pre: code: : | - if r1 <= 0 goto + if r1 > 0 goto r0 = 1 : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx - - r1.uvalue=[1, +oo] +post: [] messages: - - "0:2: Code is unreachable after 0:2" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JGE with imm 0 and pointer @@ -900,22 +913,17 @@ pre: code: : | - if r1 <= 0 goto + if r1 >= 0 goto r0 = 1 : | exit -post: - - r0.svalue=1 - - r0.type=number - - r0.uvalue=1 - - r1.ctx_offset=0 - - r1.svalue=[1, 2147418112] - - r1.type=ctx - - r1.uvalue=[1, +oo] +post: [] messages: - - "0:2: Code is unreachable after 0:2" + - "0:1: Code is unreachable after 0:1" + - "2: Code is unreachable after 2" + - "2: Invalid type (r0.type == number)" --- test-case: JSLT with imm 0 and pointer @@ -969,7 +977,7 @@ pre: code: : | - if r1 s<= 0 goto + if r1 s> 0 goto r0 = 1 : | exit @@ -990,7 +998,7 @@ pre: code: : | - if r1 s<= 0 goto + if r1 s>= 0 goto r0 = 1 : | exit @@ -1137,7 +1145,7 @@ pre: code: : | - if w1 <= 0 goto + if w1 > 0 goto r0 = 1 : | exit @@ -1158,7 +1166,7 @@ pre: code: : | - if w1 <= 0 goto + if w1 >= 0 goto r0 = 1 : | exit @@ -1221,7 +1229,7 @@ pre: code: : | - if w1 s<= 0 goto + if w1 s> 0 goto r0 = 1 : | exit @@ -1242,7 +1250,7 @@ pre: code: : | - if w1 s<= 0 goto + if w1 s>= 0 goto r0 = 1 : | exit diff --git a/test-data/loop.yaml b/test-data/loop.yaml index c40af4a87..aa65bab13 100644 --- a/test-data/loop.yaml +++ b/test-data/loop.yaml @@ -186,7 +186,7 @@ post: - "pc[1]=[1, +oo]" messages: - - "1: Loop counter is too large (pc[1] < 100000)" + - "1 (counter): Loop counter is too large (pc[1] < 100000)" --- test-case: realistic forward loop @@ -207,8 +207,8 @@ code: r2 -= r1 r3 = 0 r0 = 0 -# r2 <<= 32; this fails with "11: Upper bound must be at most packet_size (valid_access(r4.offset, width=1) for read)" -# r2 >>= 32 + # r2 <<= 32; this fails with "11: Upper bound must be at most packet_size (valid_access(r4.offset, width=1) for read)" + # r2 >>= 32 : | r4 = r1 r4 += r3 @@ -283,7 +283,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, less than or equal options: ["termination"] @@ -300,7 +300,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, equal @@ -318,7 +318,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, greater than @@ -336,7 +336,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: simple infinite loop, greater than or equal @@ -354,7 +354,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: infinite loop with multiple exits options: ["termination"] @@ -371,7 +371,7 @@ post: [] messages: - "1:2: Code is unreachable after 1:2" - "3:4: Code is unreachable after 3:4" - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- # Note: This test case terminates after 1000001 iterations, but the verifier assumes that the loop is infinite @@ -394,7 +394,7 @@ post: - "r0.type=number" - "r0.uvalue=1000001" messages: - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- test-case: possible infinite loop @@ -413,7 +413,7 @@ post: - "r0.type=number" messages: - - "0: Loop counter is too large (pc[0] < 100000)" + - "0 (counter): Loop counter is too large (pc[0] < 100000)" --- # Check for case where there are backwards jumps that don't form a loop. @@ -461,7 +461,7 @@ post: [] messages: - "1:3: Code is unreachable after 1:3" - - "2: Loop counter is too large (pc[2] < 100000)" + - "2 (counter): Loop counter is too large (pc[2] < 100000)" - "2:3: Code is unreachable after 2:3" --- diff --git a/test-data/packet.yaml b/test-data/packet.yaml index 682665dee..fd7e46bee 100644 --- a/test-data/packet.yaml +++ b/test-data/packet.yaml @@ -1,5 +1,34 @@ # Copyright (c) Prevail Verifier contributors. # SPDX-License-Identifier: MIT +--- +test-case: read 8bit is unsigned + +pre: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]"] + +code: + : | + r2 = *(u8 *)(r1 + 0) + +post: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]", + "r2.type=number", "r2.svalue=[0, 255]", "r2.uvalue=[0, 255]"] +messages: [] +--- +test-case: read 16bit is unsigned + +pre: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]"] + +code: + : | + r2 = *(u16 *)(r1 + 0) + +post: ["meta_offset=0", "packet_size=20", + "r1.type=packet", "r1.packet_offset=0", "r1.svalue=[4098, 2147418112]", "r1.uvalue=[4098, 2147418112]", + "r2.type=number", "r2.svalue=[0, 65535]", "r2.uvalue=[0, 65535]"] +messages: [] + --- test-case: simple invalid write diff --git a/test-data/stack.yaml b/test-data/stack.yaml index 456097c4c..8df80ca1e 100644 --- a/test-data/stack.yaml +++ b/test-data/stack.yaml @@ -594,4 +594,4 @@ post: - s[511].uvalue=0 messages: - - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE * call_stack_depth (valid_access(r10.offset-513, width=1) for write)" + - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE (valid_access(r10.offset-513, width=1) for write)" diff --git a/test-data/subtract.yaml b/test-data/subtract.yaml index 3c7047d96..a065a6b84 100644 --- a/test-data/subtract.yaml +++ b/test-data/subtract.yaml @@ -159,3 +159,4 @@ post: - r10.stack_offset=4096 messages: - "0: Upper bound must be at most EBPF_TOTAL_STACK_SIZE (r2.type == number or r1.type == r2.type in {ctx, stack, packet})" + - "0: Lower bound must be at least r10.stack_offset - EBPF_SUBPROGRAM_STACK_SIZE (r2.type == number or r1.type == r2.type in {ctx, stack, packet})" diff --git a/test-data/uninit.yaml b/test-data/uninit.yaml index 9dcb18a44..aada2a9d1 100644 --- a/test-data/uninit.yaml +++ b/test-data/uninit.yaml @@ -12,11 +12,11 @@ code: : | r0 += r3 -post: [] +post: + - "r0.type=uninit" messages: - "0: Invalid type (r3.type in {number, ctx, stack, packet, shared})" - - "CRAB ERROR: Cannot convert bottom to tuple" --- test-case: subtraction of a number and an uninitialized register