Skip to content

Commit

Permalink
Stack memory tracking improvements (vbpf#614)
Browse files Browse the repository at this point in the history
* Stack memory tracking improvements, fixes vbpf#566
* Expand byteswap tests to test big and little endian platforms
* Update conformance test expected results to be more precise
---------

Signed-off-by: Dave Thaler <[email protected]>
  • Loading branch information
dthaler authored May 12, 2024
1 parent ef17f7a commit 78a6c82
Show file tree
Hide file tree
Showing 9 changed files with 677 additions and 39 deletions.
1 change: 1 addition & 0 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ const ebpf_verifier_options_t ebpf_verifier_default_options = {
.print_line_info = false,
.allow_division_by_zero = true,
.setup_constraints = true,
.big_endian = false,
};
1 change: 1 addition & 0 deletions src/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct ebpf_verifier_options_t {
bool print_line_info;
bool allow_division_by_zero;
bool setup_constraints;
bool big_endian;

bool dump_btf_types_json;
};
Expand Down
118 changes: 114 additions & 4 deletions src/crab/array_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@
#include <utility>
#include <vector>

#include "boost/endian/conversion.hpp"

#include "radix_tree/radix_tree.hpp"
#include "crab/array_domain.hpp"

#include "asm_ostream.hpp"
#include "config.hpp"
#include "dsl_syntax.hpp"
#include "spec_type_descriptors.hpp"

Expand Down Expand Up @@ -110,9 +113,9 @@ class cell_t final {
return {number_t{static_cast<int>(o)}, number_t{static_cast<int>(o)} + number_t{static_cast<int>(size - 1)}};
}

public:
[[nodiscard]] interval_t to_interval() const { return to_interval(_offset, _size); }

public:
[[nodiscard]] bool is_null() const { return _offset == 0 && _size == 0; }

[[nodiscard]] offset_t get_offset() const { return _offset; }
Expand Down Expand Up @@ -447,6 +450,68 @@ std::ostream& operator<<(std::ostream& o, offset_map_t& m) {
return o;
}

// Create a new cell that is a subset of an existing cell.
void array_domain_t::split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_start_index, unsigned int len) {
assert(kind == data_kind_t::svalues || kind == data_kind_t::uvalues);

// Get the values from the indicated stack range.
std::optional<linear_expression_t> svalue = load(inv, data_kind_t::svalues, number_t(cell_start_index), len);
std::optional<linear_expression_t> uvalue = load(inv, data_kind_t::uvalues, number_t(cell_start_index), len);

// Create a new cell for that range.
offset_map_t& offset_map = lookup_array_map(kind);
cell_t new_cell = offset_map.mk_cell(cell_start_index, len);
inv.assign(new_cell.get_scalar(data_kind_t::svalues), svalue);
inv.assign(new_cell.get_scalar(data_kind_t::uvalues), uvalue);
}

// Prepare to havoc bytes in the middle of a cell by potentially splitting the cell if it is numeric,
// into the part to the left of the havoced portion, and the part to the right of the havoced portion.
void array_domain_t::split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i,
const linear_expression_t& elem_size) {
assert(kind == data_kind_t::svalues || kind == data_kind_t::uvalues);
offset_map_t& offset_map = lookup_array_map(kind);
interval_t ii = inv.eval_interval(i);
std::optional<number_t> n = ii.singleton();
if (!n) {
// We can only split a singleton offset.
return;
}
interval_t i_elem_size = inv.eval_interval(elem_size);
std::optional<number_t> n_bytes = i_elem_size.singleton();
if (!n_bytes) {
// We can only split a singleton size.
return;
}
auto size = static_cast<unsigned int>(*n_bytes);
offset_t o((uint64_t)*n);

std::vector<cell_t> cells = offset_map.get_overlap_cells(o, size);
for (cell_t const& c : cells) {
interval_t intv = c.to_interval();
int cell_start_index = (int)*intv.lb().number();
int cell_end_index = (int)*intv.ub().number();

if (!this->num_bytes.all_num(cell_start_index, cell_end_index + 1) || (cell_end_index + 1 < cell_start_index + sizeof(int64_t))) {
// We can only split numeric cells of size 8 or less.
continue;
}

if (!inv.eval_interval(c.get_scalar(kind)).is_singleton()) {
// We can only split cells with a singleton value.
continue;
}
if (cell_start_index < o) {
// Use the bytes to the left of the specified range.
split_cell(inv, kind, cell_start_index, (unsigned int)(o - cell_start_index));
}
if (o + size - 1 < cell_end_index) {
// Use the bytes to the right of the specified range.
split_cell(inv, kind, (int)(o + size), (unsigned int)(cell_end_index - (o + size - 1)));
}
}
}

// we can only treat this as non-member because we use global state
static std::optional<std::pair<offset_t, unsigned>>
kill_and_find_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i, const linear_expression_t& elem_size) {
Expand Down Expand Up @@ -522,6 +587,28 @@ std::optional<uint8_t> get_value_byte(NumAbsDomain& inv, offset_t o, int width)
return {};
}
uint64_t n = t->cast_to_uint64();

// Convert value to bytes of the appropriate endian-ness.
switch (width) {
case sizeof(uint16_t):
if (thread_local_options.big_endian)
n = boost::endian::native_to_big<uint16_t>(n);
else
n = boost::endian::native_to_little<uint16_t>(n);
break;
case sizeof(uint32_t):
if (thread_local_options.big_endian)
n = boost::endian::native_to_big<uint32_t>(n);
else
n = boost::endian::native_to_little<uint32_t>(n);
break;
case sizeof(uint64_t):
if (thread_local_options.big_endian)
n = boost::endian::native_to_big<uint64_t>(n);
else
n = boost::endian::native_to_little<uint64_t>(n);
break;
}
uint8_t* bytes = (uint8_t*)&n;
return bytes[o % width];
}
Expand Down Expand Up @@ -578,14 +665,26 @@ std::optional<linear_expression_t> array_domain_t::load(NumAbsDomain& inv, data_
}
if (size == 2) {
uint16_t b = *(uint16_t*)result_buffer;
if (thread_local_options.big_endian)
b = boost::endian::native_to_big<uint16_t>(b);
else
b = boost::endian::native_to_little<uint16_t>(b);
return number_t{b};
}
if (size == 4) {
uint32_t b = *(uint32_t*)result_buffer;
if (thread_local_options.big_endian)
b = boost::endian::native_to_big<uint32_t>(b);
else
b = boost::endian::native_to_little<uint32_t>(b);
return number_t{b};
}
if (size == 8) {
uint64_t b = *(uint64_t*)result_buffer;
if (thread_local_options.big_endian)
b = boost::endian::native_to_big<uint64_t>(b);
else
b = boost::endian::native_to_little<uint64_t>(b);
return (kind == data_kind_t::uvalues) ? number_t(b) : number_t((int64_t)b);
}
}
Expand Down Expand Up @@ -629,11 +728,22 @@ std::optional<linear_expression_t> array_domain_t::load(NumAbsDomain& inv, data_
return {};
}

// We are about to write to a given range of bytes on the stack.
// Any cells covering that range need to be removed, and any cells that only
// partially cover that range can be split such that any non-covered portions become new cells.
static std::optional<std::pair<offset_t, unsigned>> split_and_find_var(array_domain_t& array_domain, NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) {
if (kind == data_kind_t::svalues || kind == data_kind_t::uvalues) {
array_domain.split_number_var(inv, kind, idx, elem_size);
}
return kill_and_find_var(inv, kind, idx, elem_size);
}


std::optional<variable_t> array_domain_t::store(NumAbsDomain& inv, data_kind_t kind,
const linear_expression_t& idx,
const linear_expression_t& elem_size,
const linear_expression_t& val) {
auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
if (maybe_cell) {
// perform strong update
auto [offset, size] = *maybe_cell;
Expand All @@ -655,7 +765,7 @@ std::optional<variable_t> array_domain_t::store_type(NumAbsDomain& inv,
const linear_expression_t& elem_size,
const linear_expression_t& val) {
auto kind = data_kind_t::types;
auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
if (maybe_cell) {
// perform strong update
auto [offset, size] = *maybe_cell;
Expand All @@ -678,7 +788,7 @@ std::optional<variable_t> array_domain_t::store_type(NumAbsDomain& inv,
}

void array_domain_t::havoc(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& idx, const linear_expression_t& elem_size) {
auto maybe_cell = kill_and_find_var(inv, kind, idx, elem_size);
auto maybe_cell = split_and_find_var(*this, inv, kind, idx, elem_size);
if (maybe_cell && kind == data_kind_t::types) {
auto [offset, size] = *maybe_cell;
num_bytes.havoc(offset, size);
Expand Down
4 changes: 4 additions & 0 deletions src/crab/array_domain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ class array_domain_t final {
// Perform array stores over an array segment
void store_numbers(NumAbsDomain& inv, variable_t _idx, variable_t _width);

void split_number_var(NumAbsDomain& inv, data_kind_t kind, const linear_expression_t& i,
const linear_expression_t& elem_size);
void split_cell(NumAbsDomain& inv, data_kind_t kind, int cell_start_index, unsigned int len);

void initialize_numbers(int lb, int width);
};

Expand Down
56 changes: 43 additions & 13 deletions src/crab/ebpf_domain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,12 @@ void ebpf_domain_t::operator()(const Assume& s) {

void ebpf_domain_t::operator()(const Undefined& a) {}

// Simple truncation function usable with swap_endianness().
template <class T>
BOOST_CONSTEXPR T truncate(T x) BOOST_NOEXCEPT {
return x;
}

void ebpf_domain_t::operator()(const Un& stmt) {
auto dst = reg_pack(stmt.dst);
auto swap_endianness = [&](variable_t v, auto input, const auto& be_or_le) {
Expand All @@ -1464,33 +1470,57 @@ void ebpf_domain_t::operator()(const Un& stmt) {
havoc(v);
havoc_offsets(stmt.dst);
};
// Swap bytes. For 64-bit types we need the weights to fit in a
// Swap bytes if needed. For 64-bit types we need the weights to fit in a
// signed int64, but for smaller types we don't want sign extension,
// so we use unsigned which still fits in a signed int64.
switch (stmt.op) {
case Un::Op::BE16:
swap_endianness(dst.svalue, uint16_t(0), boost::endian::native_to_big<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), boost::endian::native_to_big<uint16_t>);
if (!thread_local_options.big_endian) {
swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
} else {
swap_endianness(dst.svalue, uint16_t(0), truncate<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), truncate<uint16_t>);
}
break;
case Un::Op::BE32:
swap_endianness(dst.svalue, uint32_t(0), boost::endian::native_to_big<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), boost::endian::native_to_big<uint32_t>);
if (!thread_local_options.big_endian) {
swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
} else {
swap_endianness(dst.svalue, uint32_t(0), truncate<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), truncate<uint32_t>);
}
break;
case Un::Op::BE64:
swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_big<int64_t>);
swap_endianness(dst.uvalue, uint64_t(0), boost::endian::native_to_big<uint64_t>);
if (!thread_local_options.big_endian) {
swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse<int64_t>);
swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse<uint64_t>);
}
break;
case Un::Op::LE16:
swap_endianness(dst.svalue, uint16_t(0), boost::endian::native_to_little<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), boost::endian::native_to_little<uint16_t>);
if (thread_local_options.big_endian) {
swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
} else {
swap_endianness(dst.svalue, uint16_t(0), truncate<uint16_t>);
swap_endianness(dst.uvalue, uint16_t(0), truncate<uint16_t>);
}
break;
case Un::Op::LE32:
swap_endianness(dst.svalue, uint32_t(0), boost::endian::native_to_little<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), boost::endian::native_to_little<uint32_t>);
if (thread_local_options.big_endian) {
swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
} else {
swap_endianness(dst.svalue, uint32_t(0), truncate<uint32_t>);
swap_endianness(dst.uvalue, uint32_t(0), truncate<uint32_t>);
}
break;
case Un::Op::LE64:
swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_little<int64_t>);
swap_endianness(dst.svalue, uint64_t(0), boost::endian::native_to_little<uint64_t>);
if (thread_local_options.big_endian) {
swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse<int64_t>);
swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse<uint64_t>);
}
break;
case Un::Op::SWAP16:
swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
Expand Down
8 changes: 8 additions & 0 deletions src/ebpf_yaml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: MIT

#include <algorithm>
#include <bit>
#include <iostream>
#include <set>
#include <variant>
Expand Down Expand Up @@ -177,13 +178,20 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set<string>& ra
options.no_simplify = true;
options.setup_constraints = false;

// Default to the machine's native endianness.
options.big_endian = (std::endian::native == std::endian::big);

for (const string& name : raw_options) {
if (name == "!allow_division_by_zero") {
options.allow_division_by_zero = false;
} else if (name == "termination") {
options.check_termination = true;
} else if (name == "strict") {
options.strict = true;
} else if (name == "big_endian") {
options.big_endian = true;
} else if (name == "!big_endian") {
options.big_endian = false;
} else {
throw std::runtime_error("Unknown option: " + name);
}
Expand Down
16 changes: 8 additions & 8 deletions src/test/test_conformance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,25 @@ TEST_CONFORMANCE("le16.data")
TEST_CONFORMANCE("le32.data")
TEST_CONFORMANCE("le64.data")
TEST_CONFORMANCE("lock_add.data")
TEST_CONFORMANCE_RANGE("lock_add32.data", "[0, 1311768467463790321]")
TEST_CONFORMANCE("lock_add32.data")
TEST_CONFORMANCE("lock_and.data")
TEST_CONFORMANCE_TOP("lock_and32.data")
TEST_CONFORMANCE("lock_and32.data")
TEST_CONFORMANCE_TOP("lock_cmpxchg.data")
TEST_CONFORMANCE_TOP("lock_cmpxchg32.data")
TEST_CONFORMANCE("lock_fetch_add.data")
TEST_CONFORMANCE_RANGE("lock_fetch_add32.data", "[0, 1311768467463790321]")
TEST_CONFORMANCE("lock_fetch_add32.data")
TEST_CONFORMANCE("lock_fetch_and.data")
TEST_CONFORMANCE_TOP("lock_fetch_and32.data")
TEST_CONFORMANCE("lock_fetch_and32.data")
TEST_CONFORMANCE("lock_fetch_or.data")
TEST_CONFORMANCE_TOP("lock_fetch_or32.data")
TEST_CONFORMANCE("lock_fetch_or32.data")
TEST_CONFORMANCE("lock_fetch_xor.data")
TEST_CONFORMANCE_TOP("lock_fetch_xor32.data")
TEST_CONFORMANCE("lock_fetch_xor32.data")
TEST_CONFORMANCE("lock_or.data")
TEST_CONFORMANCE_TOP("lock_or32.data")
TEST_CONFORMANCE("lock_or32.data")
TEST_CONFORMANCE("lock_xchg.data")
TEST_CONFORMANCE("lock_xchg32.data")
TEST_CONFORMANCE("lock_xor.data")
TEST_CONFORMANCE_TOP("lock_xor32.data")
TEST_CONFORMANCE("lock_xor32.data")
TEST_CONFORMANCE("lsh32-imm.data")
TEST_CONFORMANCE("lsh32-imm-high.data")
TEST_CONFORMANCE("lsh32-imm-neg.data")
Expand Down
Loading

0 comments on commit 78a6c82

Please sign in to comment.