From 79ab7b93c214401386f15c28847aba218a594cd7 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sun, 13 Oct 2024 14:58:19 -0700 Subject: [PATCH] Verifier integration with libfuzzer tests. Signed-off-by: Alan Jowett --- .github/workflows/fuzzing.yml | 15 +- .github/workflows/main.yml | 5 +- .gitmodules | 3 + CMakeLists.txt | 1 + cmake/options.cmake | 1 + custom_tests/srcs/ubpf_test_debug_function.cc | 4 +- external/ebpf-verifier | 1 + libfuzzer/CMakeLists.txt | 54 +- libfuzzer/libfuzz_harness.cc | 687 ++++++++++++++++-- libfuzzer/libfuzzer_config.h.inc | 11 + vm/inc/ubpf.h | 43 +- vm/ubpf_vm.c | 129 ++-- 12 files changed, 832 insertions(+), 122 deletions(-) create mode 160000 external/ebpf-verifier create mode 100644 libfuzzer/libfuzzer_config.h.inc diff --git a/.github/workflows/fuzzing.yml b/.github/workflows/fuzzing.yml index f5e60dc83..b6bb40542 100644 --- a/.github/workflows/fuzzing.yml +++ b/.github/workflows/fuzzing.yml @@ -10,13 +10,14 @@ on: schedule: # Run every day at 21:00 UTC - cron: '00 21 * * *' workflow_dispatch: # Run manually + workflow_call: jobs: build: strategy: matrix: platform: - - ubuntu-latest + - ubuntu-24.04 arch: - x86_64 @@ -48,7 +49,7 @@ jobs: ccache - name: Install system dependencies (Linux) - if: matrix.platform == 'ubuntu-latest' + if: matrix.platform == 'ubuntu-24.04' run: | sudo apt-get update @@ -60,7 +61,8 @@ jobs: libboost-dev \ libboost-program-options-dev \ libboost-filesystem-dev \ - libelf-dev + libelf-dev \ + libyaml-cpp-dev if [[ "${{ matrix.arch }}" == "arm64" ]] ; then sudo apt install -y \ @@ -70,7 +72,7 @@ jobs: fi - name: Build/install libbpf From Source - if: matrix.platform == 'ubuntu-latest' + if: matrix.platform == 'ubuntu-24.04' run: ./.github/scripts/build-libbpf.sh shell: bash @@ -92,11 +94,11 @@ jobs: -G Ninja \ -S . \ -B build \ - -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ -DUBPF_ENABLE_LIBFUZZER=1 \ - -DCMAKE_BUILD_TYPE=Debug + -DVERIFIER_ENABLE_TESTS=false \ ${arch_flags} - name: Build uBPF @@ -136,6 +138,7 @@ jobs: ./ubpf_fuzzer new_corpus -artifact_prefix=artifacts/ -use_value_profile=1 -max_total_time=300 - name: Merge corpus into fuzz/corpus + if: ${{ github.event_name == 'schedule' }} run: | ./ubpf_fuzzer -merge=1 fuzz/corpus new_corpus git add fuzz/corpus diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8cf335379..9ab152dc4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ name: Main permissions: - contents: read + contents: write # Required by fuzzing task. security-events: write # Required by codeql task. actions: read @@ -340,6 +340,9 @@ jobs: build_codeql: true disable_retpolines: true + linux_release_fuzzing: + uses: ./.github/workflows/fuzzing.yml + # Disabled until https://github.com/iovisor/ubpf/issues/155 is resolved. # linux_debug_arm64_sanitizers: # uses: ./.github/workflows/posix.yml diff --git a/.gitmodules b/.gitmodules index f0b96ce4e..032b3142a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "external/bpf_conformance"] path = external/bpf_conformance url = https://github.com/Alan-Jowett/bpf_conformance.git +[submodule "external/ebpf-verifier"] + path = external/ebpf-verifier + url = https://github.com/vbpf/ebpf-verifier.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8858af4e7..cf3c69eaf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,4 +43,5 @@ endif() if (UBPF_ENABLE_LIBFUZZER) add_subdirectory("libfuzzer") + add_subdirectory("external/ebpf-verifier") endif() diff --git a/cmake/options.cmake b/cmake/options.cmake index 645631fcd..c1bd80f21 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -10,6 +10,7 @@ if(PLATFORM_LINUX OR PLATFORM_MACOS) option(UBPF_ENABLE_COVERAGE "Set to true to enable coverage flags") option(UBPF_ENABLE_SANITIZERS "Set to true to enable the address and undefined sanitizers") option(UBPF_ENABLE_LIBFUZZER "Set to true to enable the libfuzzer") + option(UBPF_ENABLE_LIBFUZZER_CONSTRAINT_CHECK "Set to true to enable the libfuzzer constraint check") endif() option(UBPF_DISABLE_RETPOLINES "Disable retpoline security on indirect calls and jumps") diff --git a/custom_tests/srcs/ubpf_test_debug_function.cc b/custom_tests/srcs/ubpf_test_debug_function.cc index 5b234e41e..2d06e1a77 100644 --- a/custom_tests/srcs/ubpf_test_debug_function.cc +++ b/custom_tests/srcs/ubpf_test_debug_function.cc @@ -22,8 +22,10 @@ typedef struct _vm_state { } vm_state_t; void -debug_callout(void* context, int program_counter, const uint64_t registers[16], const uint8_t* stack_start, size_t stack_length) +debug_callout(void* context, int program_counter, const uint64_t registers[16], const uint8_t* stack_start, size_t stack_length, uint64_t register_mask, const uint8_t* stack_mask) { + UNREFERENCED_PARAMETER(register_mask); + UNREFERENCED_PARAMETER(stack_mask); std::vector* vm_states = static_cast*>(context); vm_state_t vm_state{}; diff --git a/external/ebpf-verifier b/external/ebpf-verifier new file mode 160000 index 000000000..9f25cee94 --- /dev/null +++ b/external/ebpf-verifier @@ -0,0 +1 @@ +Subproject commit 9f25cee94f74108c19fa1cc88bc32c01d7ad758a diff --git a/libfuzzer/CMakeLists.txt b/libfuzzer/CMakeLists.txt index 41f21e1cd..39d8f5a61 100644 --- a/libfuzzer/CMakeLists.txt +++ b/libfuzzer/CMakeLists.txt @@ -6,24 +6,54 @@ if (UBPF_SKIP_EXTERNAL) return() endif() -set(CMAKE_CXX_STANDARD 20) - -add_executable( - ubpf_fuzzer - libfuzz_harness.cc -) - -target_include_directories("ubpf_fuzzer" PRIVATE - "${CMAKE_SOURCE_DIR}/vm" +set(UBPF_FUZZER_INCLUDES "${CMAKE_SOURCE_DIR}/vm" "${CMAKE_BINARY_DIR}/vm" + "${CMAKE_BINARY_DIR}/_deps/gsl-src/include" "${CMAKE_SOURCE_DIR}/vm/inc" "${CMAKE_BINARY_DIR}/vm/inc" "${CMAKE_SOURCE_DIR}/ubpf_plugin" -) + "${CMAKE_SOURCE_DIR}/external/ebpf-verifier/src" + "${CMAKE_SOURCE_DIR}/external/ebpf-verifier/src/crab" + "${CMAKE_SOURCE_DIR}/external/ebpf-verifier/src/crab_utils" + "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries( - ubpf_fuzzer +set(UBPF_FUZZER_LIBS ubpf ubpf_settings + ebpfverifier) + +# include(CheckCXXSymbolExists) + +set(CMAKE_REQUIRED_INCLUDES ${UBPF_FUZZER_INCLUDES}) + +# check_cxx_symbol_exists(ebpf_verifier_options_t::store_pre_invariants "config.hpp" HAVE_EBPF_CHECK_CONSTRAINTS_AT_LABEL) + +include(CheckCXXSourceCompiles) + +set(CHECK_CONFIG_STORE_PRE_INVARIANTS " +#include +int main() { + ebpf_verifier_options_t options; + options.store_pre_invariants = true; + return 0; +} +") + +check_cxx_source_compiles("${CHECK_CONFIG_STORE_PRE_INVARIANTS}" HAVE_EBPF_VERIFIER_CHECK_CONSTRAINTS_AT_LABEL) + +set(CMAKE_CXX_STANDARD 20) + +configure_file( + libfuzzer_config.h.inc + "${CMAKE_CURRENT_BINARY_DIR}/libfuzzer_config.h" +) + +add_executable( + ubpf_fuzzer + libfuzz_harness.cc ) +target_include_directories("ubpf_fuzzer" PRIVATE ${UBPF_FUZZER_INCLUDES}) + +target_link_libraries(ubpf_fuzzer PRIVATE ${UBPF_FUZZER_LIBS}) + diff --git a/libfuzzer/libfuzz_harness.cc b/libfuzzer/libfuzz_harness.cc index 45f5be3fd..a0fffb516 100644 --- a/libfuzzer/libfuzz_harness.cc +++ b/libfuzzer/libfuzz_harness.cc @@ -7,41 +7,377 @@ #include #include #include +#include #include #include +#include "libfuzzer_config.h" + +#include "asm_unmarshal.hpp" +#include "crab_verifier.hpp" +#include "platform.hpp" + extern "C" { +#define ebpf_inst ebpf_inst_ubpf #include "ebpf.h" #include "ubpf.h" +#undef ebpf_inst } #include "test_helpers.h" #include -uint64_t test_helpers_dispatcher(uint64_t p0, uint64_t p1,uint64_t p2,uint64_t p3, uint64_t p4, unsigned int idx, void* cookie) { +/** + * @brief Class to read the options from the environment and provide them to + * the fuzzer. + */ +class _ubpf_fuzzer_options +{ +public: + _ubpf_fuzzer_options() { + for (auto& [key, value] : option) { + const char* env = std::getenv(key.c_str()); + if (env != nullptr) { + value = std::stoi(env) != 0; + } + } + } + + bool get(const std::string& key) const { + return option.at(key); + } + +private: + std::map option{ + // Cheap options enabled by default. + {"UBPF_FUZZER_JIT", true}, ///< Enable JIT compilation. + {"UBPF_FUZZER_INTERPRETER", true}, ///< Enable interpreter execution. + {"UBPF_FUZZER_VERIFY_BYTE_CODE", + true}, ///< Enable a verifier pass before running the byte code. If byte code is verified, then both bounds check + ///< and undefined behavior failures are fatal. + // CPU and memory intensive options disabled by default. + {"UBPF_FUZZER_CONSTRAINT_CHECK", false}, ///< Enable constraint check against the verifier state. Useful for exhaustive + ///< testing. + {"UBPF_FUZZER_PRINT_VERIFIER_REPORT", false}, ///< Print verifier report. Useful for debugging. + {"UBPF_FUZZER_PRINT_EXECUTION_TRACE", false}, ///< Print execution trace, with register state at each step. Useful for + ///< debugging. + }; +} g_ubpf_fuzzer_options; + + +std::string g_verifier_report; + +/** + * @brief Context structure passed to the BPF program. Modeled after the context structure used by XDP. + */ +typedef struct _ubpf_context +{ + uint64_t data; + uint64_t data_end; + uint64_t stack_start; + uint64_t stack_end; +} ubpf_context_t; + +/** + * @brief Descriptor for the context structure. This is used by the verifier to determine the layout of the context + * structure in memory. + */ +ebpf_context_descriptor_t g_ebpf_context_descriptor_ubpf = { + .size = sizeof(ubpf_context_t), + .data = offsetof(ubpf_context_t, data), + .end = offsetof(ubpf_context_t, data_end), + .meta = -1, +}; + +/** + * @brief Description of the program type. This is used by the verifier to determine what context structure to use as + * well as the helper functions that are available. + */ +EbpfProgramType g_ubpf_program_type = { + .name = "ubpf", + .context_descriptor = &g_ebpf_context_descriptor_ubpf, + .platform_specific_data = 0, + .section_prefixes = {}, + .is_privileged = false, +}; + +/** + * @brief This function is called by the verifier when parsing an ELF file to determine the type of the program being + * loaded based on the section and path. + * + * @param[in] section The section name of the program. + * @param[in] path The path to the ELF file. + * @return The type of the program. + */ +EbpfProgramType +ubpf_get_program_type(const std::string& section, const std::string& path) +{ + UNREFERENCED_PARAMETER(section); + UNREFERENCED_PARAMETER(path); + return g_ubpf_program_type; +} + +/*** + * @brief This function is called by the verifier to determine the type of a map given the platform specific type. + * + * @param[in] platform_specific_type The platform specific type of the map. + * @return The type of the map. + */ +EbpfMapType +ubpf_get_map_type(uint32_t platform_specific_type) +{ + // Once the fuzzer supports maps, this function should be implemented to return metadata about the map, primarily + // the key and value size. + UNREFERENCED_PARAMETER(platform_specific_type); + return {}; +} + +/** + * @brief This function is called by the verifier to determine the prototype of a helper function given the helper + * function number. + * + * @param[in] n The helper function number. + * @return The prototype of the helper function. + */ +EbpfHelperPrototype +ubpf_get_helper_prototype(int32_t n) +{ + // Once the fuzzer supports helper functions, this function should be implemented to return metadata about the + // helper function. + UNREFERENCED_PARAMETER(n); + return {}; +} + +/** + * @brief This function is called by the verifier to determine whether a helper function is usable given the helper + * function number. + * + * @param[in] n The helper function number. + * @retval true The helper function is usable. + * @retval false The helper function is not usable. + */ +bool +ubpf_is_helper_usable(int32_t n) +{ + // Once the fuzzer supports helper functions, this function should be implemented to return whether the helper + // function is usable. + UNREFERENCED_PARAMETER(n); + return false; +} + +/** + * @brief This function is called by the verifier to parse the maps section of the ELF file (if any). + * + * @param[in,out] map_descriptors The map descriptors to populate. + * @param[in] data The data in the maps section. + * @param[in] map_record_size The size of each map record. + * @param[in] map_count The number of maps in the maps section. + * @param[in] platform The platform specific data. + * @param[in] options Options for the verifier. + */ +void +ubpf_parse_maps_section( + std::vector& map_descriptors, + const char* data, + size_t map_record_size, + int map_count, + const struct ebpf_platform_t* platform, + ebpf_verifier_options_t options) +{ + // Once the fuzzer supports maps, this function should be implemented to parse the maps section of the ELF file (if + // any). + UNREFERENCED_PARAMETER(map_descriptors); + UNREFERENCED_PARAMETER(data); + UNREFERENCED_PARAMETER(map_record_size); + UNREFERENCED_PARAMETER(map_count); + UNREFERENCED_PARAMETER(platform); + UNREFERENCED_PARAMETER(options); + throw std::runtime_error("parse_maps_section not implemented"); +} + +/** + * @brief Given a map descriptor, resolve any inner map references to other maps. + * + * @param[in,out] map_descriptors The map descriptors to resolve. + */ +void +ubpf_resolve_inner_map_references(std::vector& map_descriptors) +{ + // Once the fuzzer supports maps, this function should be implemented to resolve inner map references. + UNREFERENCED_PARAMETER(map_descriptors); + throw std::runtime_error("resolve_inner_map_references not implemented"); +} + +/** + * @brief The function is called by the verifier to get the map descriptor for a given map file descriptor. + * + * @param[in] map_fd The map file descriptor. + * @return The map descriptor. + */ +EbpfMapDescriptor& +ubpf_get_map_descriptor(int map_fd) +{ + // Once the fuzzer supports maps, this function should be implemented to return the map descriptor for the given map + // file descriptor. + UNREFERENCED_PARAMETER(map_fd); + throw std::runtime_error("get_map_descriptor not implemented"); +} + +/** + * @brief The platform abstraction for the verifier to call into the uBPF fuzzer platform. + */ +ebpf_platform_t g_ebpf_platform_ubpf_fuzzer = { + .get_program_type = ubpf_get_program_type, + .get_helper_prototype = ubpf_get_helper_prototype, + .is_helper_usable = ubpf_is_helper_usable, + .map_record_size = 0, + .parse_maps_section = ubpf_parse_maps_section, + .get_map_descriptor = ubpf_get_map_descriptor, + .get_map_type = ubpf_get_map_type, + .resolve_inner_map_references = ubpf_resolve_inner_map_references, + .supported_conformance_groups = bpf_conformance_groups_t::default_groups, +}; + +/** + * @brief Dispatcher for the helper functions. + * + * @param[in] p0 First parameter to the helper function. + * @param[in] p1 Second parameter to the helper function. + * @param[in] p2 Third parameter to the helper function. + * @param[in] p3 Fourth parameter to the helper function. + * @param[in] p4 Fifth parameter to the helper function. + * @param[in] idx Index of the helper function to call. + * @param[in] cookie Cookie to pass to the helper function. + * @return Value returned by the helper function. + */ +uint64_t +test_helpers_dispatcher(uint64_t p0, uint64_t p1, uint64_t p2, uint64_t p3, uint64_t p4, unsigned int idx, void* cookie) +{ UNREFERENCED_PARAMETER(cookie); return helper_functions[idx](p0, p1, p2, p3, p4); } -bool test_helpers_validator(unsigned int idx, const struct ubpf_vm *vm) { +/** + * @brief Function to validate the helper function index. + * + * @param[in] idx Helper function index. + * @param[in] vm The VM instance. + * @retval true The helper function index is valid. + * @retval false The helper function index is invalid. + */ +bool +test_helpers_validator(unsigned int idx, const struct ubpf_vm* vm) +{ UNREFERENCED_PARAMETER(vm); return helper_functions.contains(idx); } -extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size); +extern "C" int +LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size); + +std::string g_error_message; -int null_printf(FILE* stream, const char* format, ...) +/** + * @brief Capture the output of printf to a string. + * + * @param[in,out] stream The stream to write to. + * @param[in] format The format string. + * @param[in] ... The arguments to the format string. + * + * @return The number of characters written. + */ +int capture_printf(FILE* stream, const char* format, ...) { - if (!stream) { - return 0; + // Format the message and append it to g_error_message. + + UNREFERENCED_PARAMETER(stream); + + va_list args; + va_start(args, format); + char buffer[1024]; + int ret = vsnprintf(buffer, sizeof(buffer), format, args); + va_end(args); + + if (ret < 0) { + return ret; + } + + g_error_message += buffer; + + return ret; +} + +/** + * @brief Invoke the verifier to verify the given BPF program. + * + * @param[in] program_code The program byte code to verify. + * @retval true The program is safe to run. + * @retval false The program might be unsafe to run. Note: The verifier is conservative and may reject safe programs. + */ +bool +verify_bpf_byte_code(const std::vector& program_code) +try { + std::ostringstream error; + auto instruction_array = reinterpret_cast(program_code.data()); + size_t instruction_count = program_code.size() / sizeof(ebpf_inst); + const ebpf_platform_t* platform = &g_ebpf_platform_ubpf_fuzzer; + std::vector instructions{instruction_array, instruction_array + instruction_count}; + program_info info{ + .platform = platform, + .type = g_ubpf_program_type, + }; + std::string section; + std::string file; + raw_program raw_prog{file, section, 0, {}, instructions, info}; + + // Unpack the program into a sequence of instructions that the verifier can understand. + std::variant prog_or_error = unmarshal(raw_prog); + if (!std::holds_alternative(prog_or_error)) { + return false; } - if (!format) { - return 0; + + // Extract the program instructions. + InstructionSeq& prog = std::get(prog_or_error); + + // Start with the default verifier options. + ebpf_verifier_options_t options = ebpf_verifier_default_options; + + // Enable termination checking and pre-invariant storage. + options.check_termination = true; + options.print_invariants = g_ubpf_fuzzer_options.get("UBPF_FUZZER_PRINT_VERIFIER_REPORT"); + options.print_failures = g_ubpf_fuzzer_options.get("UBPF_FUZZER_PRINT_VERIFIER_REPORT"); +#if defined(HAVE_EBPF_VERIFIER_CHECK_CONSTRAINTS_AT_LABEL) + options.store_pre_invariants = g_ubpf_fuzzer_options.get("UBPF_FUZZER_CONSTRAINT_CHECK"); +#endif + + // Disable simplification so that the verifier can provide more fine grained invariant information for each + // instruction. + options.simplify = false; + + ebpf_verifier_stats_t stats; + + std::ostringstream error_stream; + + // Verify the program. This will return false or throw an exception if the program is invalid. + bool result = ebpf_verify_program(error_stream, prog, raw_prog.info, &options, &stats); + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_PRINT_VERIFIER_REPORT")) { + std::cout << "verifier stats:" << std::endl; + std::cout << "total_unreachable: " << stats.total_unreachable << std::endl; + std::cout << "total_warnings: " << stats.total_warnings << std::endl; + std::cout << "max_loop_count: " << stats.max_loop_count << std::endl; + std::cout << "result: " << result << std::endl; + std::cout << error_stream.str() << std::endl; } - return 0; + + return result; +} catch (const std::exception& ex) { + return false; } +/** + * @brief RAII wrapper for the ubpf_vm object. + */ typedef std::unique_ptr ubpf_vm_ptr; /** @@ -50,7 +386,8 @@ typedef std::unique_ptr ubpf_vm_ptr; * @param[in] program_code The program code to load into the VM. * @return A unique pointer to the ubpf_vm object or nullptr if the VM could not be created. */ -ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) +ubpf_vm_ptr +create_ubpf_vm(const std::vector& program_code) { // Automatically free the VM when it goes out of scope. std::unique_ptr vm(ubpf_create(), ubpf_destroy); @@ -66,16 +403,20 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) char* error_message = nullptr; - ubpf_set_error_print(vm.get(), null_printf); + // Capture any error messages from the uBPF library. + ubpf_set_error_print(vm.get(), capture_printf); if (ubpf_load(vm.get(), program_code.data(), program_code.size(), &error_message) != 0) { // The program failed to load, due to a validation error. // This is not interesting, as the fuzzer input is invalid. // Do not add it to the corpus. + g_error_message += error_message; free(error_message); return {nullptr, nullptr}; } + // Bounds checking is always active. Instead the behavior is if an out of bounds access is detected, the fuzzing either + // ignores the error or raises a fatal signal. ubpf_toggle_bounds_check(vm.get(), true); if (ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validator) != 0) { @@ -95,6 +436,221 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) return vm; } +/** + * @brief Classify the given address as packet, context, stack, map, or unknown. + */ +typedef enum class _address_type +{ + Packet, + Context, + Stack, + Map, + Unknown +} address_type_t; + +/** + * @brief Given a register value, classify it as packet, context, stack, or unknown. + * + * @param[in] context Pointer to the context structure. + * @param[in] register_value Register value to classify. + * @retval address_type_t::Packet The register value is within the packet data. + * @retval address_type_t::Context The register value is within the context structure. + * @retval address_type_t::Stack The register value is within the stack. + * @retval address_type_t::Unknown The register value is unknown. + */ +address_type_t +ubpf_classify_address(const ubpf_context_t* context, uint64_t register_value) +{ + uintptr_t register_value_ptr = reinterpret_cast(register_value); + uintptr_t stack_start = reinterpret_cast(context->stack_start); + uintptr_t stack_end = reinterpret_cast(context->stack_end); + uintptr_t context_start = reinterpret_cast(context); + uintptr_t context_end = context_start + sizeof(ubpf_context_t); + uintptr_t packet_start = reinterpret_cast(context->data); + uintptr_t packet_end = reinterpret_cast(context->data_end); + + if (register_value_ptr >= stack_start && register_value_ptr < stack_end) { + return address_type_t::Stack; + } else if (register_value_ptr >= context_start && register_value_ptr < context_end) { + return address_type_t::Context; + } else if (register_value_ptr >= packet_start && register_value_ptr < packet_end) { + return address_type_t::Packet; + } else { + return address_type_t::Unknown; + } +} + +/** + * @brief Function invoked prior to executing each instruction in the program. + * + * @param[in] context Context passed to the program. + * @param[in] program_counter The program counter (the index of the instruction to execute). + * @param[in] registers The register values. + * @param[in] stack_start The start of the stack. + * @param[in] stack_length The length of the stack. + * @param[in] register_mask The set of registers that have been modified since the start of the program. + * @param[in] stack_mask The set of stack locations that have been modified since the start of the program. + */ +void +ubpf_debug_function( + void* context, + int program_counter, + const uint64_t registers[16], + const uint8_t* stack_start, + size_t stack_length, + uint64_t register_mask, + const uint8_t* stack_mask) +{ + // Print the program counter and register values. + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_PRINT_EXECUTION_TRACE")) { + std::cout << "Program Counter: " << program_counter << std::endl; + std::cout << "Registers: "; + for (int i = 0; i < 10; i++) { + if ((register_mask & (1 << i)) == 0) { + continue; + } + std::cout << "r" << i << "=" << registers[i] << " "; + } + std::cout << std::endl; + } + + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_CONSTRAINT_CHECK")) { +#if defined(HAVE_EBPF_VERIFIER_CHECK_CONSTRAINTS_AT_LABEL) + ubpf_context_t* ubpf_context = reinterpret_cast(context); + UNREFERENCED_PARAMETER(stack_start); + UNREFERENCED_PARAMETER(stack_length); + UNREFERENCED_PARAMETER(stack_mask); + + std::string label = std::to_string(program_counter) + ":-1"; + + if (program_counter == 0) { + return; + } + + // Build set of string constraints from the register values. + std::set constraints; + for (int i = 0; i < 10; i++) { + if ((register_mask & (1 << i)) == 0) { + continue; + } + uint64_t reg = registers[i]; + std::string register_name = "r" + std::to_string(i); + + // Given the register value, classify it as packet, context, stack, or unknown and add the appropriate + // constraint. + address_type_t type = ubpf_classify_address(ubpf_context, reg); + switch (type) { + case address_type_t::Packet: + constraints.insert(register_name + ".type=packet"); + constraints.insert(register_name + ".packet_offset=" + std::to_string(reg - ubpf_context->data)); + constraints.insert( + register_name + ".packet_size=" + std::to_string(ubpf_context->data_end - ubpf_context->data)); + break; + + case address_type_t::Context: + constraints.insert(register_name + ".type=ctx"); + constraints.insert( + register_name + ".ctx_offset=" + std::to_string(reg - reinterpret_cast(ubpf_context))); + break; + + case address_type_t::Stack: + constraints.insert(register_name + ".type=stack"); + constraints.insert(register_name + ".stack_offset=" + std::to_string(reg - ubpf_context->stack_start)); + break; + + case address_type_t::Unknown: + constraints.insert("r" + std::to_string(i) + ".uvalue=" + std::to_string(registers[i])); + constraints.insert( + "r" + std::to_string(i) + ".svalue=" + std::to_string(static_cast(registers[i]))); + break; + case address_type_t::Map: + constraints.insert(register_name + ".type=shared"); + break; + } + } + + // Call ebpf_check_constraints_at_label with the set of string constraints at this label. + + std::ostringstream os; + + if (!ebpf_check_constraints_at_label(os, label, constraints)) { + std::cerr << "Label: " << label << std::endl; + std::cerr << os.str() << std::endl; + throw std::runtime_error("ebpf_check_constraints_at_label failed"); + } +#else + throw std::runtime_error("ebpf_check_constraints_at_label not supported"); + UNREFERENCED_PARAMETER(context); + UNREFERENCED_PARAMETER(stack_start); + UNREFERENCED_PARAMETER(stack_length); + UNREFERENCED_PARAMETER(stack_mask); +#endif + } +} + +/** + * @brief Helper function to create a ubpf_context_t object from the given memory and stack. + * + * @param[in] memory Vector containing the input memory. + * @param[in] ubpf_stack Vector containing the stack. + * @return The context object. + */ +ubpf_context_t +ubpf_context_from(std::vector& memory, std::vector& ubpf_stack) +{ + ubpf_context_t context; + context.data = reinterpret_cast(memory.data()); + context.data_end = context.data + memory.size(); + context.stack_start = reinterpret_cast(ubpf_stack.data()); + context.stack_end = context.stack_start + ubpf_stack.size(); + return context; +} + +/** + * @brief Function to check if the given address and size are within the bounds of the memory or stack. + * + * @param[in] context The context passed to ubpf_register_data_bounds_check. + * @param[in] addr The address to check. + * @param[in] size The size of the memory to check. + * @retval true The address and size are within the bounds of the memory or stack. + * @retval false The address and size are not within the bounds of the memory or stack. + */ +bool bounds_check(void* context, uint64_t addr, uint64_t size) +{ + ubpf_context_t* ubpf_context = reinterpret_cast(context); + + // Check if the lower bound of the address is within the bounds of the memory or stack. + if (ubpf_classify_address(ubpf_context, addr) == address_type_t::Unknown) { + std::cerr << "Address out of bounds: " << std::hex << addr << std::endl; + std::cerr << "Memory start: " << std::hex << ubpf_context->data << std::endl; + std::cerr << "Memory end: " << std::hex << ubpf_context->data_end << std::endl; + std::cerr << "Stack start: " << std::hex << ubpf_context->stack_start << std::endl; + std::cerr << "Stack end: " << std::hex << ubpf_context->stack_end << std::endl; + std::cerr << "Context start:" << std::hex << reinterpret_cast(ubpf_context) << std::endl; + std::cerr << "Context end:" << std::hex << reinterpret_cast(ubpf_context) + sizeof(ubpf_context_t) << std::endl; + return false; + } + + // Check if the upper bound of the address is within the bounds of the memory or stack. + if (ubpf_classify_address(ubpf_context, addr + size - 1) == address_type_t::Unknown) { + std::cerr << "Address out of bounds: " << std::hex << addr << std::endl; + std::cerr << "Memory start: " << std::hex << ubpf_context->data << std::endl; + std::cerr << "Memory end: " << std::hex << ubpf_context->data_end << std::endl; + std::cerr << "Stack start: " << std::hex << ubpf_context->stack_start << std::endl; + std::cerr << "Stack end: " << std::hex << ubpf_context->stack_end << std::endl; + std::cerr << "Context start:" << std::hex << reinterpret_cast(ubpf_context) << std::endl; + std::cerr << "Context end:" << std::hex << reinterpret_cast(ubpf_context) + sizeof(ubpf_context_t) << std::endl; + return false; + } + + return true; +} + +const std::set g_error_message_to_ignore{ + "Call to local function at pc [0-9]+ is not from a call instruction.", + "Instruction limit exceeded", +}; + /** * @brief Invoke the ubpf interpreter with the given program code and input memory. * @@ -102,22 +658,41 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) * @param[in,out] memory The input memory to use when executing the program. May be modified by the program. * @param[in,out] ubpf_stack The stack to use when executing the program. May be modified by the program. * @param[out] interpreter_result The result of the program execution. - * @return true if the program executed successfully. - * @return false if the program failed to execute. + * @retval true The program executed successfully. + * @retval false The program failed to execute. */ -bool call_ubpf_interpreter(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& interpreter_result) +bool +call_ubpf_interpreter( + const std::vector& program_code, + std::vector& memory, + std::vector& ubpf_stack, + uint64_t& interpreter_result) { auto vm = create_ubpf_vm(program_code); + ubpf_context_t context = ubpf_context_from(memory, ubpf_stack); + if (vm == nullptr) { // VM creation failed. return false; } + ubpf_register_debug_fn(vm.get(), &context, ubpf_debug_function); + ubpf_register_data_bounds_check(vm.get(), &context, bounds_check); + // Execute the program using the input memory. - if (ubpf_exec_ex(vm.get(), memory.data(), memory.size(), &interpreter_result, ubpf_stack.data(), ubpf_stack.size()) != 0) { - // VM execution failed. - return false; + if (ubpf_exec_ex(vm.get(), &context, sizeof(context), &interpreter_result, ubpf_stack.data(), ubpf_stack.size()) != 0) { + // Check if the error is being suppressed by one of the known error messages regex. + for (const auto& error_message : g_error_message_to_ignore) { + if (std::regex_search(g_error_message, std::regex(error_message))) { + return false; + } + } + + // If the byte code was verified, then both bounds check and undefined behavior failures are fatal. + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_VERIFY_BYTE_CODE")) { + throw std::runtime_error("Failed to execute program with error: " + g_error_message); + } } // VM execution succeeded. @@ -131,13 +706,20 @@ bool call_ubpf_interpreter(const std::vector& program_code, std::vector * @param[in,out] memory The input memory to use when executing the program. May be modified by the program. * @param[in,out] ubpf_stack The stack to use when executing the program. May be modified by the program. * @param[out] interpreter_result The result of the program execution. - * @return true if the program executed successfully. - * @return false if the program failed to execute. + * @retval true The program executed successfully. + * @retval false The program failed to execute. */ -bool call_ubpf_jit(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& jit_result) +bool +call_ubpf_jit( + const std::vector& program_code, + std::vector& memory, + std::vector& ubpf_stack, + uint64_t& jit_result) { auto vm = create_ubpf_vm(program_code); + ubpf_context_t context = ubpf_context_from(memory, ubpf_stack); + char* error_message = nullptr; if (vm == nullptr) { @@ -148,13 +730,10 @@ bool call_ubpf_jit(const std::vector& program_code, std::vector& program_code, std::vector& program, std::vector& memory) +bool +split_input(const uint8_t* data, std::size_t size, std::vector& program, std::vector& memory) { if (size < 4) return false; @@ -221,10 +801,11 @@ bool split_input(const uint8_t* data, std::size_t size, std::vector& pr * * @param[in] data Pointer to the input buffer. * @param[in] size Size of the input buffer. - * @return -1 if the input is invalid - * @return 0 if the input is valid and processed. + * @retval -1 The input is invalid + * @retval 0 The input is valid and processed. */ -int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) +int +LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) { // Assume the fuzzer input is as follows: // 32-bit program length @@ -233,20 +814,30 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) std::vector program; std::vector memory; - std::vector ubpf_stack(3*4096); + std::vector ubpf_stack(3 * 4096); + g_error_message = ""; if (!split_input(data, size, program, memory)) { // The input is invalid. Not interesting. return -1; } + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_VERIFY_BYTE_CODE")) { + if (!verify_bpf_byte_code(program)) { + // The program failed verification. + return 0; + } + } + uint64_t interpreter_result = 0; uint64_t jit_result = 0; - if (!call_ubpf_interpreter(program, memory, ubpf_stack, interpreter_result)) { - // Failed to load or execute the program in the interpreter. - // This is not interesting, as the fuzzer input is invalid. - return 0; + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_INTERPRETER")) { + if (!call_ubpf_interpreter(program, memory, ubpf_stack, interpreter_result)) { + // Failed to load or execute the program in the JIT. + // This is not interesting, as the fuzzer input is invalid. + return 0; + } } if (!split_input(data, size, program, memory)) { @@ -255,18 +846,22 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) assert(!"split_input failed"); } - if (!call_ubpf_jit(program, memory, ubpf_stack, jit_result)) { - // Failed to load or execute the program in the JIT. - // This is not interesting, as the fuzzer input is invalid. - return 0; + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_JIT")) { + if (!call_ubpf_jit(program, memory, ubpf_stack, jit_result)) { + // Failed to load or execute the program in the JIT. + // This is not interesting, as the fuzzer input is invalid. + return 0; + } } - // If interpreter_result is not equal to jit_result, raise a fatal signal - if (interpreter_result != jit_result) { - printf("%lx ubpf_stack\n", reinterpret_cast(ubpf_stack.data()) + ubpf_stack.size()); - printf("interpreter_result: %lx\n", interpreter_result); - printf("jit_result: %lx\n", jit_result); - throw std::runtime_error("interpreter_result != jit_result"); + if (g_ubpf_fuzzer_options.get("UBPF_FUZZER_JIT") && g_ubpf_fuzzer_options.get("UBPF_FUZZER_INTERPRETER")) { + // If interpreter_result is not equal to jit_result, raise a fatal signal + if (interpreter_result != jit_result) { + printf("%lx ubpf_stack\n", reinterpret_cast(ubpf_stack.data()) + ubpf_stack.size()); + printf("interpreter_result: %lx\n", interpreter_result); + printf("jit_result: %lx\n", jit_result); + throw std::runtime_error("interpreter_result != jit_result"); + } } // Program executed successfully. diff --git a/libfuzzer/libfuzzer_config.h.inc b/libfuzzer/libfuzzer_config.h.inc new file mode 100644 index 000000000..2d25059b6 --- /dev/null +++ b/libfuzzer/libfuzzer_config.h.inc @@ -0,0 +1,11 @@ +/* + Copyright (c) 2022-present, IO Visor Project + All rights reserved. + + This source code is licensed in accordance with the terms specified in + the LICENSE file found in the root directory of this source tree. +*/ + +#pragma once + +#cmakedefine HAVE_EBPF_VERIFIER_CHECK_CONSTRAINTS_AT_LABEL diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 81ee39843..d03875aab 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -356,7 +356,7 @@ extern "C" * * @param[in] vm The VM to compile the program in. * @param[out] errmsg The error message, if any. This should be freed by the caller. - * @return ubpf_jit_fn A pointer to the compiled program, or NULL on failure. + * @return A pointer to the compiled program, or NULL on failure. */ ubpf_jit_fn ubpf_compile(struct ubpf_vm* vm, char** errmsg); @@ -375,7 +375,7 @@ extern "C" * @param[in] vm The VM to compile the program in. * @param[out] errmsg The error message, if any. This should be freed by the caller. * @param[in] jit_mode The mode in which to execute the JITer -- basic or extended. - * @return ubpf_jit_fn A pointer to the compiled program, or NULL on failure. + * @return A pointer to the compiled program, or NULL on failure. */ ubpf_jit_ex_fn ubpf_compile_ex(struct ubpf_vm* vm, char** errmsg, enum JitMode jit_mode); @@ -391,7 +391,7 @@ extern "C" * * @param[in] vm The VM of the already JIT'd program. * @param[out] errmsg The error message, if any. This should be freed by the caller. - * @return ubpf_jit_fn A pointer to the compiled program (the same as buffer), or + * @return A pointer to the compiled program (the same as buffer), or * NULL on failure. */ ubpf_jit_fn @@ -461,7 +461,7 @@ extern "C" * @brief Retrieve the storage location for the BPF registers in the VM. * * @param[in] vm The VM to get the register storage from. - * @return uint64_t* A pointer to the register storage. + * @return A pointer to the register storage. */ uint64_t* ubpf_get_registers(const struct ubpf_vm* vm); @@ -487,7 +487,7 @@ extern "C" * @param[in] symbol_name Name of the symbol that is referenced. * @param[in] symbol_offset Offset of the symbol relative to the start of the map section. * @param[in] symbol_size Size of the symbol. - * @return uint64_t The value to insert into the BPF program. + * @return The value to insert into the BPF program. */ typedef uint64_t (*ubpf_data_relocation)( void* user_context, @@ -502,11 +502,20 @@ extern "C" * * @param[in] vm The VM to set the relocation function for. * @param[in] relocation The relocation function. - * @return int The value to insert into the BPF program. + * @return The value to insert into the BPF program. */ int ubpf_register_data_relocation(struct ubpf_vm* vm, void* user_context, ubpf_data_relocation relocation); + /** + * @brief Function that is called by the VM to check if a memory access is within bounds. + * + * @param[in] context The user context that was passed to ubpf_register_data_bounds_check. + * @param[in] addr The address to check. + * @param[in] size The size of the memory access. + * @retval True The memory access is within bounds. + * @retval False The memory access is out of bounds. + */ typedef bool (*ubpf_bounds_check)(void* context, uint64_t addr, uint64_t size); /** @@ -544,8 +553,8 @@ extern "C" * @param[in] vm The VM to set the instruction limit for. * @param[in] limit The maximum number of instructions that a program may execute or 0 for no limit. * @param[out] previous_limit Optional pointer to store the previous instruction limit. - * @return 0 Success. - * @return -1 Failure. + * @retval 0 Success. + * @retval -1 Failure. */ int ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit); @@ -557,8 +566,8 @@ extern "C" * * @param[in] vm VM to enable or disable undefined behavior checks on. * @param[in] enable Enable undefined behavior checks if true, disable if false. - * @return true if undefined behavior checks were previously enabled. - * @return false if undefined behavior checks were previously disabled. + * @retval true Undefined behavior checks were previously enabled. + * @retval false Undefined behavior checks were previously disabled. */ bool ubpf_toggle_undefined_behavior_check(struct ubpf_vm* vm, bool enable); @@ -571,13 +580,21 @@ extern "C" * @param[in] registers Array of 11 registers representing the VM state. * @param[in] stack_start Pointer to the beginning of the stack. * @param[in] stack_length Size of the stack in bytes. + * @param[in] register_mask Bitmask of registers that have been modified since the start of the program. + * Each set bit represents 1 modified register. LSB corresponds to register 0 and so on. + * @param[in] stack_mask_start Bitmask of the stack that has been modified since the start of the program. + * Each set bit represents 1 byte of the stack that has been modified. LSB corresponds to the first byte relative + * to stack_start and the MSB corresponds to the last byte. Note that the stack grows downwards, so the byte + * corresponding to the MSB is the first byte of the stack from the POV of the program and LSB is the last byte. */ typedef void (*ubpf_debug_fn)( void* context, int program_counter, const uint64_t registers[16], const uint8_t* stack_start, - size_t stack_length); + size_t stack_length, + uint64_t register_mask, + const uint8_t* stack_mask_start); /** * @brief Add option to invoke a debug function before each instruction. @@ -585,8 +602,8 @@ extern "C" * * @param[in] vm VM to add the option to. * @param[in] debug_fn Function to invoke before each instruction. Pass NULL to remove the function. - * @return 0 on success. - * @return -1 on failure. + * @retval 0 Success. + * @retval -1 Failure. */ int ubpf_register_debug_fn(struct ubpf_vm* vm, void* context, ubpf_debug_fn debug_function); diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index a5e37d0c3..d56954b4b 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -470,41 +470,57 @@ ubpf_check_shadow_stack( * @return false - The registers are not initialized - an error message has been printed. */ static inline bool -ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registers, struct ebpf_inst inst) +ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint32_t pc, uint16_t* shadow_registers, struct ebpf_inst inst) { if (!vm->undefined_behavior_check_enabled) { return true; } - bool src_register_required = false; - bool dst_register_required = false; - bool dst_register_initialized = false; + // Determine which registers are valid before and after the instruction. + bool source_register_valid_before_instruction = (*shadow_registers) & REGISTER_TO_SHADOW_MASK(inst.src); + bool destination_register_valid_before_instruction = (*shadow_registers) & REGISTER_TO_SHADOW_MASK(inst.dst); + bool destination_register_valid_after_instruction = destination_register_valid_before_instruction; switch (inst.opcode & EBPF_CLS_MASK) { // Load instructions initialize the destination register. case EBPF_CLS_LD: - dst_register_initialized = true; + // Load of immediate values makes the destination register valid. + destination_register_valid_after_instruction = true; break; // Load indirect instructions initialize the destination register and require the source register to be initialized. case EBPF_CLS_LDX: - src_register_required = true; - dst_register_initialized = true; + if (!source_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Source register r%d is not initialized.\n", pc, inst.src); + return false; + } + destination_register_valid_after_instruction = true; break; - // Store instructions require the destination register to be initialized. + // Store indirect instructions require the destination register to be initialized, but has no source register. case EBPF_CLS_ST: - dst_register_required = true; + if (inst.dst != BPF_REG_10 && !destination_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Destination register r%d is not initialized.\n", pc, inst.dst); + return false; + } break; - // Store indirect instructions require both the source and destination registers to be initialized. + // Store indirect instructions require both the source and destination registers to be initialized, except for + // writes to the stack. case EBPF_CLS_STX: - dst_register_required = true; - src_register_required = true; + if (inst.dst != BPF_REG_10 && !source_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Source register r%d is not initialized.\n", pc, inst.src); + return false; + } + if (inst.dst != BPF_REG_10 && !destination_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Destination register r%d is not initialized.\n", pc, inst.dst); + return false; + } break; + // ALU operations either use an immediate value or a source register. + // If the source register is used, it's initialized state is transferred to the destination register. + // If it's a unary operation, the initialized state of the source register is unchanged. case EBPF_CLS_ALU: case EBPF_CLS_ALU64: - // Source register is required if the EBPF_SRC_REG bit is set. - src_register_required = inst.opcode & EBPF_SRC_REG; - dst_register_initialized = true; switch (inst.opcode & EBPF_ALU_OP_MASK) { + // Binary ops. case 0x00: // EBPF_OP_ADD case 0x10: // EBPF_OP_SUB case 0x20: // EBPF_OP_MUL @@ -513,28 +529,36 @@ ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registe case 0x50: // EBPF_OP_AND case 0x60: // EBPF_OP_LSH case 0x70: // EBPF_OP_RSH - case 0x80: // EBPF_OP_NEG case 0x90: // EBPF_OP_MOD case 0xa0: // EBPF_OP_XOR case 0xc0: // EBPF_OP_ARSH - case 0xd0: // EBPF_OP_LE - dst_register_required = true; - break; case 0xb0: // EBPF_OP_MOV - // Destination register is initialized. + // Permit operations on uninitialized registers, but mark the destination register as uninitialized. + if (inst.opcode & EBPF_SRC_REG) { + destination_register_valid_after_instruction = source_register_valid_before_instruction; + } else { + destination_register_valid_after_instruction = true; + } + break; + // Unary ops + case 0x80: // EBPF_OP_NEG + case 0xd0: // EBPF_OP_LE + // Doesn't change the initialized state of the either register. break; + default: + vm->error_printf(stderr, "Error: %d: Unknown ALU opcode %x.\n", pc, inst.opcode); + return false; } break; case EBPF_CLS_JMP: case EBPF_CLS_JMP32: - // Source register is required if the EBPF_SRC_REG bit is set. - src_register_required = inst.opcode & EBPF_SRC_REG; switch (inst.opcode & EBPF_JMP_OP_MASK) { - case EBPF_MODE_JA: + // Unconditional jumps don't require any registers to be initialized. case EBPF_MODE_CALL: + case EBPF_MODE_JA: case EBPF_MODE_EXIT: - src_register_required = false; break; + // Conditional jumps require the destination register to be initialized and also the source register if it the EBPF_SRC_REG flag is set. case EBPF_MODE_JEQ: case EBPF_MODE_JGT: case EBPF_MODE_JGE: @@ -546,24 +570,34 @@ ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registe case EBPF_MODE_JLE: case EBPF_MODE_JSLT: case EBPF_MODE_JSLE: - dst_register_required = true; + // If the jump offset is 0, then this is a no-op. + if (inst.offset == 0) { + break; + } + if (!destination_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Destination register r%d is not initialized.\n", pc, inst.dst); + return false; + } + if (inst.opcode & EBPF_SRC_REG && !source_register_valid_before_instruction) { + vm->error_printf(stderr, "Error: %d: Source register r%d is not initialized.\n", pc, inst.src); + return false; + } break; + default: + vm->error_printf(stderr, "Error: %d: Unknown JMP opcode %x.\n", pc, inst.opcode); + return false; } - break; - } - - if (src_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.src))) { - vm->error_printf(stderr, "Error: Source register r%d is not initialized.\n", inst.src); - return false; - } - - if (dst_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.dst))) { - vm->error_printf(stderr, "Error: Destination register r%d is not initialized.\n", inst.dst); + break; + default: + vm->error_printf(stderr, "Error: %d: Unknown opcode %x.\n", pc, inst.opcode); return false; } - if (dst_register_initialized) { + // Update the shadow register state. + if (destination_register_valid_after_instruction) { *shadow_registers |= REGISTER_TO_SHADOW_MASK(inst.dst); + } else { + *shadow_registers &= ~REGISTER_TO_SHADOW_MASK(inst.dst); } if (inst.opcode == EBPF_OP_CALL) { @@ -582,7 +616,7 @@ ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registe if (inst.opcode == EBPF_OP_EXIT) { if (!(*shadow_registers & REGISTER_TO_SHADOW_MASK(0))) { - vm->error_printf(stderr, "Error: Return value register r0 is not initialized.\n"); + vm->error_printf(stderr, "Error: %d: Return value register r0 is not initialized.\n", pc); return false; } // Mark r1-r5 as uninitialized. @@ -658,6 +692,7 @@ ubpf_exec_ex( } if (vm->instruction_limit && instruction_limit-- <= 0) { return_value = -1; + vm->error_printf(stderr, "Error: Instruction limit exceeded.\n"); goto cleanup; } @@ -677,16 +712,24 @@ ubpf_exec_ex( struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); - // Invoke the debug function to allow the user to inspect the state of the VM if it is enabled. - if (vm->debug_function) { - vm->debug_function(vm->debug_function_context, cur_pc, reg, stack_start, stack_length); - } - - if (!ubpf_validate_shadow_register(vm, &shadow_registers, inst)) { + if (!ubpf_validate_shadow_register(vm, cur_pc, &shadow_registers, inst)) { return_value = -1; goto cleanup; } + // Invoke the debug function to allow the user to inspect the state of the VM if it is enabled. + if (vm->debug_function) { + vm->debug_function( + vm->debug_function_context, // The user's context pointer that was passed to ubpf_register_debug_fn. + cur_pc, // The current instruction pointer. + reg, // The array of 11 registers representing the VM state. + stack_start, // Pointer to the beginning of the stack. + stack_length, // Size of the stack in bytes. + shadow_registers, // Bitmask of registers that have been modified since the start of the program. + (uint8_t*)shadow_stack // Bitmask of the stack that has been modified since the start of the program. + ); + } + switch (inst.opcode) { case EBPF_OP_ADD_IMM: reg[inst.dst] += inst.imm;