Stack memory tracking improvements (vbpf#614)

* Stack memory tracking improvements, fixes vbpf#566 * Expand byteswap tests to test big and little endian platforms * Update conformance test expected results to be more precise --------- Signed-off-by: Dave Thaler <[email protected]>
dthaler · May 12, 2024 · 78a6c82 · 78a6c82
1 parent ef17f7a
commit 78a6c82
Show file tree

Hide file tree

Showing 9 changed files with 677 additions and 39 deletions.
diff --git a/src/config.cpp b/src/config.cpp
@@ -13,4 +13,5 @@ const ebpf_verifier_options_t ebpf_verifier_default_options = {
     .print_line_info = false,
     .allow_division_by_zero = true,
     .setup_constraints = true,
+    .big_endian = false,
 };
diff --git a/src/config.hpp b/src/config.hpp
@@ -18,6 +18,7 @@ struct ebpf_verifier_options_t {
     bool print_line_info;
     bool allow_division_by_zero;
     bool setup_constraints;
+    bool big_endian;
 
     bool dump_btf_types_json;
 };

diff --git a/src/crab/array_domain.cpp b/src/crab/array_domain.cpp
@@ -8,10 +8,13 @@
 #include <utility>
 #include <vector>
 
+#include "boost/endian/conversion.hpp"
+
 #include "radix_tree/radix_tree.hpp"
 #include "crab/array_domain.hpp"
 
 #include "asm_ostream.hpp"
+#include "config.hpp"
 #include "dsl_syntax.hpp"
 #include "spec_type_descriptors.hpp"
 
@@ -110,9 +113,9 @@ class cell_t final {
         return {number_t{static_cast<int>(o)}, number_t{static_cast<int>(o)} + number_t{static_cast<int>(size - 1)}};
     }
 
+  public:
     [[nodiscard]] interval_t to_interval() const { return to_interval(_offset, _size); }
 
-  public:
     [[nodiscard]] bool is_null() const { return _offset == 0 && _size == 0; }
 
     [[nodiscard]] offset_t get_offset() const { return _offset; }
@@ -447,6 +450,68 @@ std::ostream& operator<<(std::ostream& o, offset_map_t& m) {
     return o;
 }
 
+// Create a new cell that is a subset of an existing cell.
+void array_domain_t::split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_start_index, unsigned int len) {
+    assert(kind == data_kind_t::svalues || kind == data_kind_t::uvalues);
+
+    // Get the values from the indicated stack range.
+    std::optional<linear_expression_t> svalue = load(inv, data_kind_t::svalues, number_t(cell_start_index), len);
+    std::optional<linear_expression_t> uvalue = load(inv, data_kind_t::uvalues, number_t(cell_start_index), len);
+
+    // Create a new cell for that range.
+    offset_map_t& offset_map = lookup_array_map(kind);
+    cell_t new_cell = offset_map.mk_cell(cell_start_index, len);
+    inv.assign(new_cell.get_scalar(data_kind_t::svalues), svalue);
+    inv.assign(new_cell.get_scalar(data_kind_t::uvalues), uvalue);
+}
+
+// Prepare to havoc bytes in the middle of a cell by potentially splitting the cell if it is numeric,
+// into the part to the left of the havoced portion, and the part to the right of the havoced portion.
+void array_domain_t::split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i,
+                                      const linear_expression_t& elem_size) {
+    assert(kind == data_kind_t::svalues || kind == data_kind_t::uvalues);
+    offset_map_t& offset_map = lookup_array_map(kind);
+    interval_t ii = inv.eval_interval(i);
+    std::optional<number_t> n = ii.singleton();
+    if (!n) {
+        // We can only split a singleton offset.
+        return;
+    }
+    interval_t i_elem_size = inv.eval_interval(elem_size);
+    std::optional<number_t> n_bytes = i_elem_size.singleton();
+    if (!n_bytes) {
+        // We can only split a singleton size.
+        return;
+    }
+    auto size = static_cast<unsigned int>(*n_bytes);
+    offset_t o((uint64_t)*n);
+
+    std::vector<cell_t> cells = offset_map.get_overlap_cells(o, size);
+    for (cell_t const& c : cells) {
+        interval_t intv = c.to_interval();
+        int cell_start_index = (int)*intv.lb().number();
+        int cell_end_index = (int)*intv.ub().number();
+
+        if (!this->num_bytes.all_num(cell_start_index, cell_end_index + 1) || (cell_end_index + 1 < cell_start_index + sizeof(int64_t))) {
+            // We can only split numeric cells of size 8 or less.
+            continue;
+        }
+
+        if (!inv.eval_interval(c.get_scalar(kind)).is_singleton()) {
+            // We can only split cells with a singleton value.
+            continue;
+        }
+        if (cell_start_index < o) {
+            // Use the bytes to the left of the specified range.
+            split_cell(inv, kind, cell_start_index, (unsigned int)(o - cell_start_index));
+        }
+        if (o + size - 1 < cell_end_index) {
+            // Use the bytes to the right of the specified range.
+            split_cell(inv, kind, (int)(o + size), (unsigned int)(cell_end_index - (o + size - 1)));
+        }
+    }
+}
+
 // we can only treat this as non-member because we use global state
 static std::optional<std::pair<offset_t, unsigned>>
 kill_and_find_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i, const linear_expression_t& elem_size) {
@@ -522,6 +587,28 @@ std::optional<uint8_t> get_value_byte(NumAbsDomain& inv, offset_t o, int width)
         return {};
     }
     uint64_t n = t->cast_to_uint64();
+
+    // Convert value to bytes of the appropriate endian-ness.
+    switch (width) {
+    case sizeof(uint16_t):
+        if (thread_local_options.big_endian)
+            n = boost::endian::native_to_big<uint16_t>(n);
+        else
+            n = boost::endian::native_to_little<uint16_t>(n);
+        break;
+    case sizeof(uint32_t):
+        if (thread_local_options.big_endian)
+            n = boost::endian::native_to_big<uint32_t>(n);
+        else
+            n = boost::endian::native_to_little<uint32_t>(n);
+        break;
+    case sizeof(uint64_t):
+        if (thread_local_options.big_endian)
+            n = boost::endian::native_to_big<uint64_t>(n);
+        else
+            n = boost::endian::native_to_little<uint64_t>(n);
+        break;
+    }
     uint8_t* bytes = (uint8_t*)&n;
     return bytes[o % width];
 }
@@ -578,14 +665,26 @@ std::optional<linear_expression_t> array_domain_t::load(NumAbsDomain& inv, data_
                 }
                 if (size == 2) {
                     uint16_t b = *(uint16_t*)result_buffer;
+                    if (thread_local_options.big_endian)
+                        b = boost::endian::native_to_big<uint16_t>(b);
+                    else
+                        b = boost::endian::native_to_little<uint16_t>(b);
                     return number_t{b};
                 }
                 if (size == 4) {
                     uint32_t b = *(uint32_t*)result_buffer;
+                    if (thread_local_options.big_endian)
+                        b = boost::endian::native_to_big<uint32_t>(b);
+                    else
+                        b = boost::endian::native_to_little<uint32_t>(b);
                     return number_t{b};
                 }
                 if (size == 8) {
                     uint64_t b = *(uint64_t*)result_buffer;
+                    if (thread_local_options.big_endian)
+                        b = boost::endian::native_to_big<uint64_t>(b);
+                    else
+                        b = boost::endian::native_to_little<uint64_t>(b);
                     return (kind == data_kind_t::uvalues) ? number_t(b) : number_t((int64_t)b);
                 }
             }
@@ -629,11 +728,22 @@ std::optional<linear_expression_t> array_domain_t::load(NumAbsDomain& inv, data_
     return {};
 }
 
+// We are about to write to a given range of bytes on the stack.
+// Any cells covering that range need to be removed, and any cells that only
+// partially cover that range can be split such that any non-covered portions become new cells.
+static std::optional<std::pair<offset_t, unsigned>> split_and_find_var(array_domain_t& array_domain, NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) {
+    if (kind == data_kind_t::svalues || kind == data_kind_t::uvalues) {
+        array_domain.split_number_var(inv, kind, idx, elem_size);
+    }
+    return kill_and_find_var(inv, kind, idx, elem_size);
+}
+
+
 std::optional<variable_t> array_domain_t::store(NumAbsDomain& inv, data_kind_t kind,
                                                 const linear_expression_t& idx,
                                                 const linear_expression_t& elem_size,
                                                 const linear_expression_t& val) {
-    auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
+    auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
     if (maybe_cell) {
         // perform strong update
         auto [offset, size] = *maybe_cell;
@@ -655,7 +765,7 @@ std::optional<variable_t> array_domain_t::store_type(NumAbsDomain& inv,
                                                      const linear_expression_t& elem_size,
                                                      const linear_expression_t& val) {
     auto kind = data_kind_t::types;
-    auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
+    auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
     if (maybe_cell) {
         // perform strong update
         auto [offset, size] = *maybe_cell;
@@ -678,7 +788,7 @@ std::optional<variable_t> array_domain_t::store_type(NumAbsDomain& inv,
 }
 
 void array_domain_t::havoc(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) {
-    auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
+    auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
     if (maybe_cell && kind == data_kind_t::types) {
         auto [offset, size] = *maybe_cell;
         num_bytes.havoc(offset, size);

diff --git a/src/crab/array_domain.hpp b/src/crab/array_domain.hpp
@@ -87,6 +87,10 @@ class array_domain_t final {
     // Perform array stores over an array segment
     void store_numbers(NumAbsDomain& inv, variable_t _idx, variable_t _width);
 
+    void split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i,
+                          const linear_expression_t& elem_size);
+    void split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_start_index, unsigned int len);
+
     void initialize_numbers(int lb, int width);
 };
 

diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp
@@ -1447,6 +1447,12 @@ void ebpf_domain_t::operator()(const Assume& s) {
 
 void ebpf_domain_t::operator()(const Undefined& a) {}
 
+// Simple truncation function usable with swap_endianness().
+template <class T>
+BOOST_CONSTEXPR T truncate(T x) BOOST_NOEXCEPT {
+    return x;
+}
+
 void ebpf_domain_t::operator()(const Un& stmt) {
     auto dst = reg_pack(stmt.dst);
     auto swap_endianness = [&](variable_t v, auto input, const auto& be_or_le) {
@@ -1464,33 +1470,57 @@ void ebpf_domain_t::operator()(const Un& stmt) {
         havoc(v);
         havoc_offsets(stmt.dst);
     };
-    // Swap bytes.  For 64-bit types we need the weights to fit in a
+    // Swap bytes if needed.  For 64-bit types we need the weights to fit in a
     // signed int64, but for smaller types we don't want sign extension,
     // so we use unsigned which still fits in a signed int64.
     switch (stmt.op) {
     case Un::Op::BE16:
-        swap_endianness(dst.svalue, uint16_t(0), boost::endian::native_to_big<uint16_t>);
-        swap_endianness(dst.uvalue, uint16_t(0), boost::endian::native_to_big<uint16_t>);
+        if (!thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+            swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+        } else {
+            swap_endianness(dst.svalue, uint16_t(0), truncate<uint16_t>);
+            swap_endianness(dst.uvalue, uint16_t(0), truncate<uint16_t>);
+        }
         break;
     case Un::Op::BE32:
-        swap_endianness(dst.svalue, uint32_t(0), boost::endian::native_to_big<uint32_t>);
-        swap_endianness(dst.uvalue, uint32_t(0), boost::endian::native_to_big<uint32_t>);
+        if (!thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+            swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+        } else {
+            swap_endianness(dst.svalue, uint32_t(0), truncate<uint32_t>);
+            swap_endianness(dst.uvalue, uint32_t(0), truncate<uint32_t>);
+        }
         break;
     case Un::Op::BE64:
-        swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_big<int64_t>);
-        swap_endianness(dst.uvalue, uint64_t(0), boost::endian::native_to_big<uint64_t>);
+        if (!thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse<int64_t>);
+            swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse<uint64_t>);
+        }
         break;
     case Un::Op::LE16:
-        swap_endianness(dst.svalue, uint16_t(0), boost::endian::native_to_little<uint16_t>);
-        swap_endianness(dst.uvalue, uint16_t(0), boost::endian::native_to_little<uint16_t>);
+        if (thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+            swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+        } else {
+            swap_endianness(dst.svalue, uint16_t(0), truncate<uint16_t>);
+            swap_endianness(dst.uvalue, uint16_t(0), truncate<uint16_t>);
+        }
         break;
     case Un::Op::LE32:
-        swap_endianness(dst.svalue, uint32_t(0), boost::endian::native_to_little<uint32_t>);
-        swap_endianness(dst.uvalue, uint32_t(0), boost::endian::native_to_little<uint32_t>);
+        if (thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+            swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+        } else {
+            swap_endianness(dst.svalue, uint32_t(0), truncate<uint32_t>);
+            swap_endianness(dst.uvalue, uint32_t(0), truncate<uint32_t>);
+        }
         break;
     case Un::Op::LE64:
-        swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_little<int64_t>);
-        swap_endianness(dst.svalue, uint64_t(0), boost::endian::native_to_little<uint64_t>);
+        if (thread_local_options.big_endian) {
+            swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse<int64_t>);
+            swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse<uint64_t>);
+        }
         break;
     case Un::Op::SWAP16:
         swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);

diff --git a/src/ebpf_yaml.cpp b/src/ebpf_yaml.cpp
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: MIT
 
 #include <algorithm>
+#include <bit>
 #include <iostream>
 #include <set>
 #include <variant>
@@ -177,13 +178,20 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set<string>& ra
     options.no_simplify = true;
     options.setup_constraints = false;
 
+    // Default to the machine's native endianness.
+    options.big_endian = (std::endian::native == std::endian::big);
+
     for (const string& name : raw_options) {
         if (name == "!allow_division_by_zero") {
             options.allow_division_by_zero = false;
         } else if (name == "termination") {
             options.check_termination = true;
         } else if (name == "strict") {
             options.strict = true;
+        } else if (name == "big_endian") {
+            options.big_endian = true;
+        } else if (name == "!big_endian") {
+            options.big_endian = false;
         } else {
             throw std::runtime_error("Unknown option: " + name);
         }

diff --git a/src/test/test_conformance.cpp b/src/test/test_conformance.cpp
@@ -151,25 +151,25 @@ TEST_CONFORMANCE("le16.data")
 TEST_CONFORMANCE("le32.data")
 TEST_CONFORMANCE("le64.data")
 TEST_CONFORMANCE("lock_add.data")
-TEST_CONFORMANCE_RANGE("lock_add32.data", "[0, 1311768467463790321]")
+TEST_CONFORMANCE("lock_add32.data")
 TEST_CONFORMANCE("lock_and.data")
-TEST_CONFORMANCE_TOP("lock_and32.data")
+TEST_CONFORMANCE("lock_and32.data")
 TEST_CONFORMANCE_TOP("lock_cmpxchg.data")
 TEST_CONFORMANCE_TOP("lock_cmpxchg32.data")
 TEST_CONFORMANCE("lock_fetch_add.data")
-TEST_CONFORMANCE_RANGE("lock_fetch_add32.data", "[0, 1311768467463790321]")
+TEST_CONFORMANCE("lock_fetch_add32.data")
 TEST_CONFORMANCE("lock_fetch_and.data")
-TEST_CONFORMANCE_TOP("lock_fetch_and32.data")
+TEST_CONFORMANCE("lock_fetch_and32.data")
 TEST_CONFORMANCE("lock_fetch_or.data")
-TEST_CONFORMANCE_TOP("lock_fetch_or32.data")
+TEST_CONFORMANCE("lock_fetch_or32.data")
 TEST_CONFORMANCE("lock_fetch_xor.data")
-TEST_CONFORMANCE_TOP("lock_fetch_xor32.data")
+TEST_CONFORMANCE("lock_fetch_xor32.data")
 TEST_CONFORMANCE("lock_or.data")
-TEST_CONFORMANCE_TOP("lock_or32.data")
+TEST_CONFORMANCE("lock_or32.data")
 TEST_CONFORMANCE("lock_xchg.data")
 TEST_CONFORMANCE("lock_xchg32.data")
 TEST_CONFORMANCE("lock_xor.data")
-TEST_CONFORMANCE_TOP("lock_xor32.data")
+TEST_CONFORMANCE("lock_xor32.data")
 TEST_CONFORMANCE("lsh32-imm.data")
 TEST_CONFORMANCE("lsh32-imm-high.data")
 TEST_CONFORMANCE("lsh32-imm-neg.data")