From 934c10ffefc1c3ce8a5a359e7b30aef17999974b Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Sat, 11 May 2024 01:44:45 -0400 Subject: [PATCH] WIP: Support external stacks and local function calls using local memory 1. Add support for invoking the interpreter and JIT'd code with an external stack. This feature is generally useful and will also make it easier to fuzz the runtime and check for correctness. 2. Add support for local functions that use local memory. Prior to this commit, a local function could be called but could not use any local memory (without overwriting memory from another function). --- .../data/ubpf_test_frame_pointer.input | 1 + .../descrs/ubpf_test_frame_pointer.md | 4 + custom_tests/srcs/ubpf_test_frame_pointer.cc | 103 ++++++++++ tests/call-save.data | 55 +++++ ubpf_plugin/ubpf_plugin.cc | 41 +++- vm/inc/ubpf.h | 34 ++- vm/test.c | 20 ++ vm/ubpf_int.h | 80 ++++++-- vm/ubpf_jit.c | 25 ++- vm/ubpf_jit_arm64.c | 92 ++++----- vm/ubpf_jit_support.c | 19 +- vm/ubpf_jit_support.h | 25 ++- vm/ubpf_jit_x86_64.c | 96 ++++++--- vm/ubpf_vm.c | 193 ++++++++++++------ 14 files changed, 608 insertions(+), 180 deletions(-) create mode 100644 custom_tests/data/ubpf_test_frame_pointer.input create mode 100644 custom_tests/descrs/ubpf_test_frame_pointer.md create mode 100644 custom_tests/srcs/ubpf_test_frame_pointer.cc diff --git a/custom_tests/data/ubpf_test_frame_pointer.input b/custom_tests/data/ubpf_test_frame_pointer.input new file mode 100644 index 000000000..222f3f580 --- /dev/null +++ b/custom_tests/data/ubpf_test_frame_pointer.input @@ -0,0 +1 @@ +b7 06 00 00 0a 00 00 00 b7 07 00 00 0a 00 00 00 b7 08 00 00 0a 00 00 00 b7 09 00 00 0a 00 00 00 b7 01 00 00 05 00 00 00 7b 1a f8 ff 00 00 00 00 85 10 00 00 02 00 00 00 79 a0 f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 b7 01 00 00 37 00 00 00 7b 1a f8 ff 00 00 00 00 95 00 00 00 00 00 00 00 diff --git a/custom_tests/descrs/ubpf_test_frame_pointer.md b/custom_tests/descrs/ubpf_test_frame_pointer.md new file mode 100644 index 000000000..e0a872ad3 --- /dev/null +++ b/custom_tests/descrs/ubpf_test_frame_pointer.md @@ -0,0 +1,4 @@ +## Test Description + +This custom test program tests whether it is possible to update the external helper +functions for an eBPF program that has already been JIT'd. diff --git a/custom_tests/srcs/ubpf_test_frame_pointer.cc b/custom_tests/srcs/ubpf_test_frame_pointer.cc new file mode 100644 index 000000000..e75121861 --- /dev/null +++ b/custom_tests/srcs/ubpf_test_frame_pointer.cc @@ -0,0 +1,103 @@ +// Copyright (c) Will Hawkins +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include +#include +#include + +extern "C" +{ +#include "ubpf.h" +} + +#include "ubpf_custom_test_support.h" + +int +stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 16; +} + +int +overwrite_stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) +{ + UNREFERENCED_PARAMETER(vm); + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + return 0; +} + +int main(int argc, char **argv) +{ + std::vector args(argv, argv + argc); + std::string program_string{}; + ubpf_jit_fn jit_fn; + + std::getline(std::cin, program_string); + + uint64_t no_overwrite_interp_result = 0; + uint64_t no_overwrite_jit_result = 0; + uint64_t overwrite_interp_result = 0; + uint64_t overwrite_jit_result = 0; + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + no_overwrite_jit_result = jit_fn(nullptr, 0); + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &no_overwrite_interp_result); + } + + { + + std::unique_ptr vm(ubpf_create(), ubpf_destroy); + std::string error{}; + if (!ubpf_setup_custom_test( + vm, + program_string, + [](ubpf_vm_up& vm, std::string& error) { + if (ubpf_register_stack_usage_calculator(vm.get(), overwrite_stack_usage_calculator, nullptr) < 0) { + error = "Failed to register stack usage calculator."; + return false; + } + return true; + }, + jit_fn, + error)) { + std::cerr << "Problem setting up custom test: " << error << std::endl; + return 1; + } + + overwrite_jit_result = jit_fn(nullptr, 0); + + [[maybe_unused]] auto exec_result = ubpf_exec(vm.get(), NULL, 0, &overwrite_interp_result); + } + // ... because of the semantics of external_dispatcher, the result of the eBPF + // program execution should point to the same place to which &memory points. + return !(no_overwrite_interp_result == no_overwrite_jit_result && + no_overwrite_interp_result == 0x5 && + overwrite_interp_result == overwrite_jit_result && + overwrite_interp_result == 0x37); +} diff --git a/tests/call-save.data b/tests/call-save.data index bbf5300a8..423947ff9 100644 --- a/tests/call-save.data +++ b/tests/call-save.data @@ -3,13 +3,68 @@ mov %r6, 0x0001 mov %r7, 0x0020 mov %r8, 0x0300 mov %r9, 0x4000 + +# r1 should contain pointer to program memory. +# Don't screw that up because helper function 1 (memfrob) +# needs it. +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 1 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +# Call helper function 0 -- the memory pointer is +# no longer needed for any other helper functions, so +# we don't have to worry about keeping it safe. +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 0 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 call 2 mov %r0, 0 or %r0, %r6 or %r0, %r7 or %r0, %r8 or %r0, %r9 +jeq %r0, 0x4321, +1 +exit + +mov %r1, 0x0001 +mov %r2, 0x0001 +mov %r3, 0x0001 +mov %r4, 0x0001 +mov %r5, 0x0001 +call 3 +mov %r0, 0 +or %r0, %r6 +or %r0, %r7 +or %r0, %r8 +or %r0, %r9 exit +-- mem +01 02 03 04 05 06 07 08 -- result 0x4321 -- no register offset diff --git a/ubpf_plugin/ubpf_plugin.cc b/ubpf_plugin/ubpf_plugin.cc index ec48ff175..48ed45490 100644 --- a/ubpf_plugin/ubpf_plugin.cc +++ b/ubpf_plugin/ubpf_plugin.cc @@ -73,6 +73,24 @@ bytes_to_ebpf_inst(std::vector bytes) return instructions; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int stack_usage_calculator(const struct ubpf_vm *vm, uint16_t pc, void *cookie) { + UNREFERENCED_PARAMETER(pc); + UNREFERENCED_PARAMETER(cookie); + UNREFERENCED_PARAMETER(vm); + // We will default to a conservative 32 bytes of stack usage for each local function. + // That should be enough for all the conformance tests. + return 32; +} + /** * @brief This program reads BPF instructions from stdin and memory contents from * the first agument. It then executes the BPF program and prints the @@ -138,6 +156,8 @@ int main(int argc, char **argv) ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validater); + ubpf_register_stack_usage_calculator(vm.get(), stack_usage_calculator, nullptr); + if (ubpf_set_unwind_function_index(vm.get(), 5) != 0) { std::cerr << "Failed to set unwind function index" << std::endl; @@ -246,7 +266,7 @@ int main(int argc, char **argv) } } - // ... but first reset program memory. + // ... but first reset program memory ... usable_program_memory = memory; usable_program_memory_pointer = nullptr; if (usable_program_memory.size() != 0) { @@ -260,6 +280,25 @@ int main(int argc, char **argv) return 1; } + // ... and, for the cherry on the sundae, execute the program by specifying a stack ... + uint8_t external_stack[512] = {0, }; + + // ... but first, reset that pesky memory again ... + usable_program_memory = memory; + usable_program_memory_pointer = nullptr; + if (usable_program_memory.size() != 0) { + usable_program_memory_pointer = usable_program_memory.data(); + } + + uint64_t external_memory_index_helper_result; + if (ubpf_exec_ex(vm.get(), usable_program_memory_pointer, usable_program_memory.size(), &external_memory_index_helper_result, external_stack) != 0) + { + std::cerr << "Failed to execute program" << std::endl; + return 1; + } + + + // ... and make sure the results are the same. if (external_dispatcher_result != index_helper_result) { std::cerr << "Execution of the interpreted code with external and indexed helpers gave difference results: 0x" diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 6ee77cbe0..c0fd96df8 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -40,12 +40,15 @@ extern "C" #endif /** - * @brief Default stack size for the VM. Must be divisible by 16. + * @brief Default stack size for the eBPF program. Must be divisible by 16. */ -#if !defined(UBPF_STACK_SIZE) -#define UBPF_STACK_SIZE 512 +#if !defined(UBPF_EBPF_STACK_SIZE) +// #define UBPF_EBPF_STACK_SIZE 512 +#define UBPF_EBPF_STACK_SIZE 4096 #endif +#define UBPF_EBPF_NONVOLATILE_SIZE (sizeof(uint64_t) * 5) + /** * @brief Default maximum number of nested calls in the VM. */ @@ -63,6 +66,16 @@ extern "C" */ typedef uint64_t (*ubpf_jit_fn)(void* mem, size_t mem_len); + /** + * @brief Enum to describe JIT mode. + */ + + enum JitMode + { + ExtendedJitMode, + BasicJitMode + }; + /** * @brief Create a new uBPF VM. * @@ -160,9 +173,12 @@ extern "C" */ int ubpf_register_external_dispatcher( - struct ubpf_vm* vm, - external_function_dispatcher_t dispatcher, - external_function_validate_t validater); + struct ubpf_vm* vm, external_function_dispatcher_t dispatcher, external_function_validate_t validater); + + typedef int (*stack_usage_calculator_t)(const struct ubpf_vm* vm, uint16_t pc, void* cookie); + + int + ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie); /** * @brief Load code into a VM. @@ -268,6 +284,9 @@ extern "C" int ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value); + int + ubpf_exec_ex(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value, uint8_t* stack); + /** * @brief Compile a BPF program in the VM to native code. * @@ -294,7 +313,7 @@ extern "C" * NULL on failure. */ ubpf_jit_fn - ubpf_copy_jit(struct ubpf_vm* vm, void *buffer, size_t size, char** errmsg); + ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg); /** * @brief Translate the eBPF byte code to machine code. @@ -428,7 +447,6 @@ extern "C" int ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit); - #ifdef __cplusplus } #endif diff --git a/vm/test.c b/vm/test.c index 80f4d1000..fa50b413d 100644 --- a/vm/test.c +++ b/vm/test.c @@ -176,6 +176,25 @@ map_relocation_bounds_check_function(void* user_context, uint64_t addr, uint64_t } return false; } +/** + * @brief The handler to determine the stack usage of local functions. + * + * @param[in] vm Pointer to the VM of which the local function at pc is a part. + * @param[in] pc The instruction address of the local function. + * @param[in] cookie A pointer to the context cookie given when this callback + * was registered. + * @return The amount of stack used by the local function starting at pc. + */ +int +stack_usage_calculator(const struct ubpf_vm* vm, uint16_t pc, void* cookie) +{ + (void)(pc); + (void)(cookie); + (void)(vm); + // This is sized large enough that the rel_64_32.bpf.c program has enough space + // for each local function! + return 32; +} int main(int argc, char** argv) @@ -283,6 +302,7 @@ main(int argc, char** argv) register_functions(vm); + ubpf_register_stack_usage_calculator(vm, stack_usage_calculator, NULL); /* * The ELF magic corresponds to an RSH instruction with an offset, * which is invalid. diff --git a/vm/ubpf_int.h b/vm/ubpf_int.h index 1389886b3..41a0da0b2 100644 --- a/vm/ubpf_int.h +++ b/vm/ubpf_int.h @@ -21,6 +21,7 @@ #ifndef UBPF_INT_H #define UBPF_INT_H +#include #include #include "ebpf.h" @@ -29,16 +30,24 @@ struct ebpf_inst; typedef uint64_t (*ext_func)(uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); -typedef enum { +typedef enum +{ UBPF_JIT_COMPILE_SUCCESS, UBPF_JIT_COMPILE_FAILURE, } upbf_jit_result_t; -struct ubpf_jit_result { +struct ubpf_jit_result +{ uint32_t external_dispatcher_offset; uint32_t external_helper_offset; upbf_jit_result_t compile_result; - char *errmsg; + char* errmsg; +}; + +struct ubpf_stack_usage +{ + bool stack_usage_calculated; + uint16_t stack_usage; }; #define MAX_EXT_FUNCS 64 @@ -56,14 +65,24 @@ struct ubpf_vm bool* int_funcs; const char** ext_func_names; + struct ubpf_stack_usage* local_func_stack_usage; + void* stack_usage_calculator_cookie; + stack_usage_calculator_t stack_usage_calculator; + external_function_dispatcher_t dispatcher; external_function_validate_t dispatcher_validate; bool bounds_check_enabled; int (*error_printf)(FILE* stream, const char* format, ...); - struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); - bool (*jit_update_dispatcher)(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); - bool (*jit_update_helper)(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); + bool (*jit_update_dispatcher)( + struct ubpf_vm* vm, + external_function_dispatcher_t new_dispatcher, + uint8_t* buffer, + size_t size, + uint32_t offset); + bool (*jit_update_helper)( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); int unwind_stack_extension_index; uint64_t pointer_secret; ubpf_data_relocation data_relocation_function; @@ -78,29 +97,42 @@ struct ubpf_vm struct ubpf_stack_frame { + uint16_t stack_usage; uint16_t return_address; - uint64_t saved_registers[4]; + uint64_t saved_registers[5]; }; /* The various JIT targets. */ // arm64 struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//x86_64 +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// x86_64 struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_x86_64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_x86_64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); - -//uhm, hello? +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_x86_64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_x86_64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); + +// uhm, hello? struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size); -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset); +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset); char* ubpf_error(const char* fmt, ...); @@ -130,4 +162,10 @@ ubpf_fetch_instruction(const struct ubpf_vm* vm, uint16_t pc); void ubpf_store_instruction(const struct ubpf_vm* vm, uint16_t pc, struct ebpf_inst inst); -#endif +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc); + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg); + +#endif \ No newline at end of file diff --git a/vm/ubpf_jit.c b/vm/ubpf_jit.c index cc534ade6..f94a22bcd 100644 --- a/vm/ubpf_jit.c +++ b/vm/ubpf_jit.c @@ -19,6 +19,7 @@ * limitations under the License. */ +#include "ubpf.h" #define _GNU_SOURCE #include #include @@ -29,11 +30,10 @@ #include #include "ubpf_int.h" - int -ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +ubpf_translate_ex(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg, enum JitMode jit_mode) { - struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size); + struct ubpf_jit_result jit_result = vm->jit_translate(vm, buffer, size, jit_mode); vm->jitted_result = jit_result; if (jit_result.errmsg) { *errmsg = jit_result.errmsg; @@ -41,8 +41,14 @@ ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) return jit_result.compile_result == UBPF_JIT_COMPILE_SUCCESS ? 0 : -1; } +int +ubpf_translate(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, char** errmsg) +{ + return ubpf_translate_ex(vm, buffer, size, errmsg, BasicJitMode); +} + struct ubpf_jit_result -ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct ubpf_jit_result compile_result; compile_result.compile_result = UBPF_JIT_COMPILE_FAILURE; @@ -52,11 +58,14 @@ ubpf_translate_null(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) UNUSED_PARAMETER(vm); UNUSED_PARAMETER(buffer); UNUSED_PARAMETER(size); + UNUSED_PARAMETER(jit_mode); compile_result.errmsg = ubpf_error("Code can not be JITed on this target."); return compile_result; } -bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_null( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_dispatcher); @@ -66,7 +75,9 @@ bool ubpf_jit_update_dispatcher_null(struct ubpf_vm* vm, external_function_dispa return false; } -bool ubpf_jit_update_helper_null(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_null( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); UNUSED_PARAMETER(new_helper); @@ -138,7 +149,7 @@ ubpf_compile(struct ubpf_vm* vm, char** errmsg) } ubpf_jit_fn -ubpf_copy_jit(struct ubpf_vm *vm, void *buffer, size_t size, char **errmsg) +ubpf_copy_jit(struct ubpf_vm* vm, void* buffer, size_t size, char** errmsg) { // If compilation was not successfull or it has not even been attempted, // we cannot copy. diff --git a/vm/ubpf_jit_arm64.c b/vm/ubpf_jit_arm64.c index c67104069..0e67dbe0c 100644 --- a/vm/ubpf_jit_arm64.c +++ b/vm/ubpf_jit_arm64.c @@ -24,7 +24,6 @@ #define _GNU_SOURCE #include -#include #include #include #include @@ -245,8 +244,7 @@ emit_loadstore_register( } static void -emit_loadstore_literal( - struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) +emit_loadstore_literal(struct jit_state* state, enum LoadStoreOpcode op, enum Registers rt, uint32_t target) { note_load(state, target); const uint32_t reg_op_base = 0x08000000U; @@ -254,7 +252,7 @@ emit_loadstore_literal( } static void -emit_adr(struct jit_state *state, uint32_t offset, enum Registers rd) +emit_adr(struct jit_state* state, uint32_t offset, enum Registers rd) { note_lea(state, offset); uint32_t instr = 0x10000000 | rd; @@ -639,7 +637,7 @@ emit_jit_epilogue(struct jit_state* state) } static uint32_t -emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispatcher_addr) +emit_dispatched_external_helper_address(struct jit_state* state, uint64_t dispatcher_addr) { // We will assume that the buffer of memory holding the JIT'd code is 4-byte aligned. // And, because ARM is 32-bit instructions, we know that each instruction is 4-byte aligned. @@ -658,10 +656,11 @@ emit_dispatched_external_helper_address(struct jit_state *state, uint64_t dispat } static uint32_t -emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) { +emit_helper_table(struct jit_state* state, struct ubpf_vm* vm) +{ uint32_t helper_table_address_target = state->offset; - for (int i = 0; iext_funcs[i], sizeof(uint64_t)); } return helper_table_address_target; @@ -938,7 +937,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; - emit_jit_prologue(state, UBPF_STACK_SIZE); + emit_jit_prologue(state, UBPF_EBPF_STACK_SIZE); for (i = 0; i < vm->num_insts; i++) { @@ -1177,43 +1176,42 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) if (state->jit_status != NoError) { switch (state->jit_status) { - case TooManyJumps: { - *errmsg = ubpf_error("Too many jump instructions."); - break; - } - case TooManyLoads: { - *errmsg = ubpf_error("Too many load instructions."); - break; - } - case TooManyLeas: { - *errmsg = ubpf_error("Too many LEA calculations."); - break; - } - case UnexpectedInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unexpected instruction. - break; - } - case UnknownInstruction: { - // errmsg set at time the error was detected because the message requires - // information about the unknown instruction. - break; - } - case NotEnoughSpace: { - *errmsg = ubpf_error("Target buffer too small"); - break; - } - case NoError: { - assert(false); - } + case TooManyJumps: { + *errmsg = ubpf_error("Too many jump instructions."); + break; + } + case TooManyLoads: { + *errmsg = ubpf_error("Too many load instructions."); + break; + } + case TooManyLeas: { + *errmsg = ubpf_error("Too many LEA calculations."); + break; + } + case UnexpectedInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unexpected instruction. + break; + } + case UnknownInstruction: { + // errmsg set at time the error was detected because the message requires + // information about the unknown instruction. + break; + } + case NotEnoughSpace: { + *errmsg = ubpf_error("Target buffer too small"); + break; + } + case NoError: { + assert(false); + } } return -1; } - emit_jit_epilogue(state); - state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); + state->dispatcher_loc = emit_dispatched_external_helper_address(state, (uint64_t)vm->dispatcher); state->helper_table_loc = emit_helper_table(state, vm); return 0; @@ -1277,7 +1275,6 @@ resolve_adr(struct jit_state* state, uint32_t instr_offset, int32_t immediate) memcpy(state->buf + instr_offset, &instr, sizeof(uint32_t)); } - static bool resolve_jumps(struct jit_state* state) { @@ -1345,12 +1342,13 @@ resolve_leas(struct jit_state* state) return true; } - -bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_dispatcher_arm64( + struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; - void *dispatcher_address = (void*)((uint64_t)buffer + offset); + void* dispatcher_address = (void*)((uint64_t)buffer + offset); if ((uint64_t)dispatcher_address + sizeof(void*) < jit_upper_bound) { memcpy(dispatcher_address, &new_dispatcher, sizeof(void*)); return true; @@ -1359,7 +1357,9 @@ bool ubpf_jit_update_dispatcher_arm64(struct ubpf_vm* vm, external_function_disp return false; } -bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) +bool +ubpf_jit_update_helper_arm64( + struct ubpf_vm* vm, ext_func new_helper, unsigned int idx, uint8_t* buffer, size_t size, uint32_t offset) { UNUSED_PARAMETER(vm); uint64_t jit_upper_bound = (uint64_t)buffer + size; @@ -1373,12 +1373,12 @@ bool ubpf_jit_update_helper_arm64(struct ubpf_vm* vm, ext_func new_helper, unsig } struct ubpf_jit_result -ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } diff --git a/vm/ubpf_jit_support.c b/vm/ubpf_jit_support.c index ba96b9be0..e4984067c 100644 --- a/vm/ubpf_jit_support.c +++ b/vm/ubpf_jit_support.c @@ -19,10 +19,18 @@ #include "ubpf_jit_support.h" #include +#include "ubpf.h" #include "ubpf_int.h" int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg) { +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg) +{ compile_result->compile_result = UBPF_JIT_COMPILE_FAILURE; compile_result->errmsg = NULL; compile_result->external_dispatcher_offset = 0; @@ -38,6 +46,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com state->num_loads = 0; state->num_leas = 0; state->jit_status = NoError; + state->jit_mode = jit_mode; if (!state->pc_locs || !state->jumps || !state->loads || !state->leas) { *errmsg = ubpf_error("Could not allocate space needed to JIT compile eBPF program"); @@ -48,7 +57,7 @@ initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *com } void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result) +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result) { UNUSED_PARAMETER(compile_result); free(state->pc_locs); @@ -62,7 +71,8 @@ release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compil } void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index) +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index) { struct patchable_relative* jump = &table[index]; jump->offset_loc = offset; @@ -82,9 +92,8 @@ note_lea(struct jit_state* state, uint32_t offset) emit_patchable_relative(state->offset, offset, 0, state->leas, state->num_leas++); } - void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset) { for (size_t index = 0; index < table_size; index++) { if (table[index].offset_loc == src_offset) { diff --git a/vm/ubpf_jit_support.h b/vm/ubpf_jit_support.h index 197389752..659bcc85a 100644 --- a/vm/ubpf_jit_support.h +++ b/vm/ubpf_jit_support.h @@ -28,7 +28,8 @@ #include #include "ubpf_int.h" -enum JitProgress { +enum JitProgress +{ NoError, TooManyJumps, TooManyLoads, @@ -51,10 +52,10 @@ struct patchable_relative /* Special values for target_pc in struct jump */ #define TARGET_PC_EXIT ~UINT32_C(0) -#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) -#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) +#define TARGET_PC_ENTER (~UINT32_C(0) & 0x01) +#define TARGET_PC_RETPOLINE (~UINT32_C(0) & 0x0101) #define TARGET_PC_EXTERNAL_DISPATCHER (~UINT32_C(0) & 0x010101) -#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) +#define TARGET_LOAD_HELPER_TABLE (~UINT32_C(0) & 0x01010101) struct jit_state { @@ -85,6 +86,7 @@ struct jit_state */ uint32_t helper_table_loc; enum JitProgress jit_status; + enum JitMode jit_mode; struct patchable_relative* jumps; struct patchable_relative* loads; struct patchable_relative* leas; @@ -95,13 +97,20 @@ struct jit_state }; int -initialize_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result, uint8_t *buffer, uint32_t size, char **errmsg); +initialize_jit_state_result( + struct jit_state* state, + struct ubpf_jit_result* compile_result, + uint8_t* buffer, + uint32_t size, + enum JitMode jit_mode, + char** errmsg); void -release_jit_state_result(struct jit_state *state, struct ubpf_jit_result *compile_result); +release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compile_result); void -emit_patchable_relative(uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative *table, size_t index); +emit_patchable_relative( + uint32_t offset, uint32_t target_pc, uint32_t manual_target_offset, struct patchable_relative* table, size_t index); void note_load(struct jit_state* state, uint32_t target_pc); @@ -113,5 +122,5 @@ void emit_jump_target(struct jit_state* state, uint32_t jump_src); void -fixup_jump_target(struct patchable_relative *table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); +fixup_jump_target(struct patchable_relative* table, size_t table_size, uint32_t src_offset, uint32_t dest_offset); #endif diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 467d6daab..23d289e91 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -18,6 +18,7 @@ * limitations under the License. */ +#include "ubpf.h" #include "ubpf_jit_support.h" #define _GNU_SOURCE @@ -79,20 +80,24 @@ static int register_map[REGISTER_MAP_SIZE] = { RBP, }; #else -static int platform_nonvolatile_registers[] = {RBP, RBX, R13, R14, R15}; +static int platform_nonvolatile_registers[] = {RBP, RBX, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = { + RAX, RDI, RSI, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). static int platform_parameter_registers[] = {RDI, RSI, RDX, RCX, R8, R9}; static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers RAX, RDI, RSI, RDX, R10, R8, + // Non-volatile registers RBX, + R12, R13, R14, R15, - RBP, }; #endif @@ -104,17 +109,39 @@ map_register(int r) return register_map[r % _BPF_REG_MAX]; } +#if 0 static inline void -emit_local_call(struct jit_state* state, uint32_t target_pc) +emit_bpf_push(struct jit_state* state, int bpf_register) { - /* - * Pushing 4 * 8 = 32 bytes will maintain the invariant - * that the stack is 16-byte aligned. - */ + emit_store(state, S64, map_register(bpf_register), map_register(BPF_REG_10), -8); + emit_alu64_imm32(state, 0x81, 5, map_register(BPF_REG_10), 8); +} + +static inline void +emit_bpf_pop(struct jit_state* state, int bpf_register) +{ + emit_alu64_imm32(state, 0x81, 0, map_register(BPF_REG_10), 8); + emit_load(state, S64, map_register(BPF_REG_10), map_register(bpf_register), -8); +} +#endif + +static inline void +emit_local_call(struct ubpf_vm* vm, struct jit_state* state, uint32_t target_pc) +{ + UNUSED_PARAMETER(vm); + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer down by that value! + // sub r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x2B); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit_push(state, map_register(BPF_REG_6)); emit_push(state, map_register(BPF_REG_7)); emit_push(state, map_register(BPF_REG_8)); emit_push(state, map_register(BPF_REG_9)); + #if defined(_WIN32) /* Windows x64 ABI requires home register space */ /* Allocate home register space - 4 registers */ @@ -122,6 +149,7 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) #endif emit1(state, 0xe8); // e8 is the opcode for a CALL emit_jump_address_reloc(state, target_pc); + #if defined(_WIN32) /* Deallocate home register space - 4 registers */ emit_alu64_imm32(state, 0x81, 0, RSP, 4 * sizeof(uint64_t)); @@ -130,12 +158,19 @@ emit_local_call(struct jit_state* state, uint32_t target_pc) emit_pop(state, map_register(BPF_REG_8)); emit_pop(state, map_register(BPF_REG_7)); emit_pop(state, map_register(BPF_REG_6)); + + // Because the top of the stack holds the stack usage of the calling function, + // we adjust the base pointer back up by that value! + // add r15, [rsp] + emit1(state, 0x4c); + emit1(state, 0x03); + emit1(state, 0x3C); // Mod: 00b Reg: 111b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b } static uint32_t emit_dispatched_external_helper_address(struct jit_state* state, struct ubpf_vm* vm) { - uint32_t external_helper_address_target = state->offset; emit8(state, (uint64_t)vm->dispatcher); return external_helper_address_target; @@ -237,6 +272,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) { int i; + (void)platform_volatile_registers; /* Save platform non-volatile registers */ for (i = 0; i < _countof(platform_nonvolatile_registers); i++) { emit_push(state, platform_nonvolatile_registers[i]); @@ -247,7 +283,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, platform_parameter_registers[0], map_register(BPF_REG_1)); } - /* Move the platform parameter register to the (volatile) register + /* Move the first platform parameter register to the (volatile) register * that holds the pointer to the context. */ emit_mov(state, platform_parameter_registers[0], VOLATILE_CTXT); @@ -256,7 +292,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) * Assuming that the stack is 16-byte aligned right before * the call insn that brought us to this code, when * we start executing the jit'd code, we need to regain a 16-byte - * alignment. The UBPF_STACK_SIZE is guaranteed to be + * alignment. The UBPF_EBPF_STACK_SIZE is guaranteed to be * divisible by 16. However, if we pushed an even number of * registers on the stack when we are saving state (see above), * then we have to add an additional 8 bytes to get back @@ -267,12 +303,23 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } /* - * Set BPF R10 (the way to access the frame in eBPF) to match RSP. + * Let's set RBP to RSP so that we can restore RSP later! */ - emit_mov(state, RSP, map_register(BPF_REG_10)); - - /* Allocate stack space */ - emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_STACK_SIZE); + emit_mov(state, RSP, RBP); + + /* Configure eBPF program stack space */ + if (state->jit_mode == BasicJitMode) { + /* + * Set BPF R10 (the way to access the frame in eBPF) the beginning + * of the eBPF program's stack space. + */ + emit_mov(state, RSP, map_register(BPF_REG_10)); + /* Allocate eBPF program stack space */ + emit_alu64_imm32(state, 0x81, 5, RSP, UBPF_EBPF_STACK_SIZE); + } else { + /* Use given eBPF program stack space */ + emit_mov(state, platform_parameter_registers[2], map_register(BPF_REG_10)); + } #if defined(_WIN32) /* Windows x64 ABI requires home register space */ @@ -308,10 +355,13 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) uint32_t target_pc = i + inst.offset + 1; if (i == 0 || vm->int_funcs[i]) { - /* When we are the subject of a call, we have to properly align our - * stack pointer. - */ + uint16_t stack_usage = ubpf_stack_usage_for_local_func(vm, i); emit_alu64_imm32(state, 0x81, 5, RSP, 8); + emit1(state, 0x48); + emit1(state, 0xC7); + emit1(state, 0x04); // Mod: 00b Reg: 000b RM: 100b + emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b + emit4(state, stack_usage); } switch (inst.opcode) { @@ -662,7 +712,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } } else if (inst.src == 1) { target_pc = i + inst.imm + 1; - emit_local_call(state, target_pc); + emit_local_call(vm, state, target_pc); } break; case EBPF_OP_EXIT: @@ -768,8 +818,8 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_mov(state, map_register(BPF_REG_0), RAX); } - /* Deallocate stack space by restoring RSP from BPF R10. */ - emit_mov(state, map_register(BPF_REG_10), RSP); + /* Deallocate stack space by restoring RSP from RBP. */ + emit_mov(state, RBP, RSP); if (!(_countof(platform_nonvolatile_registers) % 2)) { emit_alu64_imm32(state, 0x81, 0, RSP, 0x8); @@ -977,12 +1027,12 @@ resolve_patchable_relatives(struct jit_state* state) } struct ubpf_jit_result -ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size) +ubpf_translate_x86_64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode) { struct jit_state state; struct ubpf_jit_result compile_result; - if (initialize_jit_state_result(&state, &compile_result, buffer, *size, &compile_result.errmsg) < 0) { + if (initialize_jit_state_result(&state, &compile_result, buffer, *size, jit_mode, &compile_result.errmsg) < 0) { goto out; } diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index e2d71ae00..112746ce3 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -65,7 +65,14 @@ ubpf_set_error_print(struct ubpf_vm* vm, int (*error_printf)(FILE* stream, const } static uint64_t -ubpf_default_external_dispatcher(uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, unsigned int index, external_function_t *external_fns) +ubpf_default_external_dispatcher( + uint64_t arg1, + uint64_t arg2, + uint64_t arg3, + uint64_t arg4, + uint64_t arg5, + unsigned int index, + external_function_t* external_fns) { return external_fns[index](arg1, arg2, arg3, arg4, arg5); } @@ -90,6 +97,12 @@ ubpf_create(void) return NULL; } + vm->local_func_stack_usage = calloc(UBPF_MAX_INSTS, sizeof(struct ubpf_stack_usage)); + if (vm->local_func_stack_usage == NULL) { + ubpf_destroy(vm); + return NULL; + } + vm->bounds_check_enabled = true; vm->error_printf = fprintf; @@ -118,6 +131,7 @@ ubpf_destroy(struct ubpf_vm* vm) free(vm->int_funcs); free(vm->ext_funcs); free(vm->ext_func_names); + free(vm->local_func_stack_usage); free(vm); } @@ -127,7 +141,6 @@ as_external_function_t(void* f) return (external_function_t)f; }; - int ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_function_t fn) { @@ -146,7 +159,8 @@ ubpf_register(struct ubpf_vm* vm, unsigned int idx, const char* name, external_f } // Now, update! - if (!vm->jit_update_helper(vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { + if (!vm->jit_update_helper( + vm, fn, idx, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_helper_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -173,7 +187,8 @@ ubpf_register_external_dispatcher( } // Now, update! - if (!vm->jit_update_dispatcher(vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { + if (!vm->jit_update_dispatcher( + vm, dispatcher, (uint8_t*)vm->jitted, vm->jitted_size, vm->jitted_result.external_dispatcher_offset)) { // Can't immediately stop here because we have unprotected memory! success = -1; } @@ -215,8 +230,8 @@ ubpf_load(struct ubpf_vm* vm, const void* code, uint32_t code_len, char** errmsg const struct ebpf_inst* source_inst = code; *errmsg = NULL; - if (UBPF_STACK_SIZE % sizeof(uint64_t) != 0) { - *errmsg = ubpf_error("UBPF_STACK_SIZE must be a multiple of 8"); + if (UBPF_EBPF_STACK_SIZE % sizeof(uint64_t) != 0) { + *errmsg = ubpf_error("UBPF_EBPF_STACK_SIZE must be a multiple of 8"); return -1; } @@ -357,44 +372,24 @@ ubpf_mem_store(uint64_t address, uint64_t value, size_t size) } int -ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +ubpf_exec_ex(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value, uint8_t* stack) { uint16_t pc = 0; const struct ebpf_inst* insts = vm->insts; uint64_t* reg; uint64_t _reg[16]; - uint64_t ras_index = 0; + uint64_t stack_frame_index = 0; int return_value = -1; - void *external_dispatcher_cookie = mem; - -// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. -#if defined(NTDDI_VERSION) && defined(WINNT) - uint64_t* stack = NULL; - struct ubpf_stack_frame* stack_frames = NULL; + void* external_dispatcher_cookie = mem; - stack = calloc(UBPF_STACK_SIZE, 1); - if (!stack) { - return_value = -1; - goto cleanup; - } - - stack_frames = calloc(UBPF_MAX_CALL_DEPTH, sizeof(struct ubpf_stack_frame)); - if (!stack_frames) { - return_value = -1; - goto cleanup; + if (!insts) { + /* Code must be loaded before we can execute */ + return -1; } -#else - uint64_t stack[UBPF_STACK_SIZE / sizeof(uint64_t)]; struct ubpf_stack_frame stack_frames[UBPF_MAX_CALL_DEPTH] = { 0, }; -#endif - - if (!insts) { - /* Code must be loaded before we can execute */ - return -1; - } #ifdef DEBUG if (vm->regs) @@ -407,7 +402,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret reg[1] = (uintptr_t)mem; reg[2] = (uint64_t)mem_len; - reg[10] = (uintptr_t)stack + UBPF_STACK_SIZE; + reg[10] = (uintptr_t)stack + UBPF_EBPF_STACK_SIZE; int instruction_limit = vm->instruction_limit; @@ -421,6 +416,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret return_value = -1; goto cleanup; } + + if (pc == 0 || vm->int_funcs[pc]) { + stack_frames[stack_frame_index].stack_usage = ubpf_stack_usage_for_local_func(vm, pc); + } + struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); switch (inst.opcode) { @@ -916,13 +916,14 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } break; case EBPF_OP_EXIT: - if (ras_index > 0) { - ras_index--; - pc = stack_frames[ras_index].return_address; - reg[BPF_REG_6] = stack_frames[ras_index].saved_registers[0]; - reg[BPF_REG_7] = stack_frames[ras_index].saved_registers[1]; - reg[BPF_REG_8] = stack_frames[ras_index].saved_registers[2]; - reg[BPF_REG_9] = stack_frames[ras_index].saved_registers[3]; + if (stack_frame_index > 0) { + stack_frame_index--; + pc = stack_frames[stack_frame_index].return_address; + reg[BPF_REG_6] = stack_frames[stack_frame_index].saved_registers[0]; + reg[BPF_REG_7] = stack_frames[stack_frame_index].saved_registers[1]; + reg[BPF_REG_8] = stack_frames[stack_frame_index].saved_registers[2]; + reg[BPF_REG_9] = stack_frames[stack_frame_index].saved_registers[3]; + reg[BPF_REG_10] += stack_frames[stack_frame_index].stack_usage; break; } *bpf_return_value = reg[0]; @@ -934,9 +935,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret if (inst.src == 0) { // Handle call by address to external function. if (vm->dispatcher != NULL) { - reg[0] = vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); + reg[0] = + vm->dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, external_dispatcher_cookie); } else { - reg[0] = ubpf_default_external_dispatcher(reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); + reg[0] = ubpf_default_external_dispatcher( + reg[1], reg[2], reg[3], reg[4], reg[5], inst.imm, vm->ext_funcs); } if (inst.imm == vm->unwind_stack_extension_index && reg[0] == 0) { *bpf_return_value = reg[0]; @@ -944,22 +947,25 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret goto cleanup; } } else if (inst.src == 1) { - if (ras_index >= UBPF_MAX_CALL_DEPTH) { + if (stack_frame_index >= UBPF_MAX_CALL_DEPTH) { vm->error_printf( stderr, "uBPF error: number of nested functions calls (%lu) exceeds max (%lu) at PC %u\n", - ras_index + 1, + stack_frame_index + 1, UBPF_MAX_CALL_DEPTH, cur_pc); return_value = -1; goto cleanup; } - stack_frames[ras_index].saved_registers[0] = reg[BPF_REG_6]; - stack_frames[ras_index].saved_registers[1] = reg[BPF_REG_7]; - stack_frames[ras_index].saved_registers[2] = reg[BPF_REG_8]; - stack_frames[ras_index].saved_registers[3] = reg[BPF_REG_9]; - stack_frames[ras_index].return_address = pc; - ras_index++; + stack_frames[stack_frame_index].saved_registers[0] = reg[BPF_REG_6]; + stack_frames[stack_frame_index].saved_registers[1] = reg[BPF_REG_7]; + stack_frames[stack_frame_index].saved_registers[2] = reg[BPF_REG_8]; + stack_frames[stack_frame_index].saved_registers[3] = reg[BPF_REG_9]; + stack_frames[stack_frame_index].return_address = pc; + + reg[BPF_REG_10] -= stack_frames[stack_frame_index].stack_usage; + + stack_frame_index++; pc += inst.imm; break; } else if (inst.src == 2) { @@ -976,11 +982,28 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret cleanup: #if defined(NTDDI_VERSION) && defined(WINNT) free(stack_frames); - free(stack); #endif return return_value; } +int +ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) +{ +// Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. +#if defined(NTDDI_VERSION) && defined(WINNT) + uint64_t* stack = NULL; + struct ubpf_stack_frame* stack_frames = NULL; + + stack = calloc(UBPF_STACK_SIZE, 1); + if (!stack) { + return -1; + } +#else + uint64_t stack[UBPF_EBPF_STACK_SIZE / sizeof(uint64_t)]; +#endif + return ubpf_exec_ex(vm, mem, mem_len, bpf_return_value, (uint8_t*)stack); +} + static bool validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_insts, char** errmsg) { @@ -989,6 +1012,10 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, 0, errmsg)) { + return false; + } + int i; for (i = 0; i < num_insts; i++) { struct ebpf_inst inst = insts[i]; @@ -1047,8 +1074,11 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i case EBPF_OP_MOD64_REG: case EBPF_OP_XOR64_IMM: case EBPF_OP_XOR64_REG: + break; case EBPF_OP_MOV64_IMM: case EBPF_OP_MOV64_REG: + store = true; + break; case EBPF_OP_ARSH64_IMM: case EBPF_OP_ARSH64_REG: break; @@ -1159,6 +1189,9 @@ validate(const struct ubpf_vm* vm, const struct ebpf_inst* insts, uint32_t num_i ubpf_error("call to local function (at PC %d) is out of bounds (target: %d)", i, call_target); return false; } + if (!ubpf_calculate_stack_usage_for_local_func(vm, call_target, errmsg)) { + return false; + } } else if (inst.src == 2) { *errmsg = ubpf_error("call to external function by BTF ID (at PC %d) is not supported", i); return false; @@ -1210,10 +1243,10 @@ bounds_check( if (!vm->bounds_check_enabled) return true; - uintptr_t access_start= (uintptr_t)addr; + uintptr_t access_start = (uintptr_t)addr; uintptr_t access_end = access_start + size; uintptr_t stack_start = (uintptr_t)stack; - uintptr_t stack_end = stack_start + UBPF_STACK_SIZE; + uintptr_t stack_end = stack_start + UBPF_EBPF_STACK_SIZE; uintptr_t mem_start = (uintptr_t)mem; uintptr_t mem_end = mem_start + mem_len; @@ -1223,12 +1256,7 @@ bounds_check( if (access_start > access_end) { vm->error_printf( - stderr, - "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", - type, - cur_pc, - addr, - size); + stderr, "uBPF error: invalid memory access %s at PC %u, addr %p, size %d\n", type, cur_pc, addr, size); return false; } @@ -1249,7 +1277,8 @@ bounds_check( // The address may be invalid or it may be a region of memory that the caller // is aware of but that is not part of the stack or memory. // Call any registered bounds check function to determine if the access is valid. - if (vm->bounds_check_function != NULL && vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { + if (vm->bounds_check_function != NULL && + vm->bounds_check_function(vm->bounds_check_user_data, access_start, size)) { return true; } @@ -1266,7 +1295,7 @@ bounds_check( mem, mem_len, stack, - UBPF_STACK_SIZE); + UBPF_EBPF_STACK_SIZE); return false; } @@ -1388,3 +1417,45 @@ ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previou vm->instruction_limit = limit; return 0; } + +bool +ubpf_calculate_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc, char** errmsg) +{ + // If there is a stack usage calculator and we have not invoked it before for the target, + // then now is the time to call it! + if (vm->stack_usage_calculator && !vm->local_func_stack_usage[pc].stack_usage_calculated) { + uint16_t stack_usage = (vm->stack_usage_calculator)(vm, pc, vm->stack_usage_calculator_cookie); + vm->local_func_stack_usage[pc].stack_usage = stack_usage; + } + vm->local_func_stack_usage[pc].stack_usage_calculated = true; + // Now that we are guaranteed to have a value for the amount of the stack used by the function + // starting at call_target, let's make sure that it is 16-byte aligned. Note: The amount of stack + // used might be 0 (in the case where there is no registered stack usage calculator callback). That + // is okay because ubpf_stack_usage_for_local_func will give us a meaningful default. + if (vm->local_func_stack_usage[pc].stack_usage % 16) { + *errmsg = ubpf_error( + "local function (at PC %d) has improperly sized stack use (%d)", + pc, + vm->local_func_stack_usage[pc].stack_usage); + return false; + } + return true; +} + +uint16_t +ubpf_stack_usage_for_local_func(const struct ubpf_vm* vm, uint16_t pc) +{ + uint16_t stack_usage = 0; + if (vm->local_func_stack_usage[pc].stack_usage_calculated) { + stack_usage = vm->local_func_stack_usage[pc].stack_usage; + } + return stack_usage; +} + +int +ubpf_register_stack_usage_calculator(struct ubpf_vm* vm, stack_usage_calculator_t calculator, void* cookie) +{ + vm->stack_usage_calculator_cookie = cookie; + vm->stack_usage_calculator = calculator; + return 0; +} \ No newline at end of file