diff --git a/vm/CMakeLists.txt b/vm/CMakeLists.txt index f173aa5ba..5af6e6412 100644 --- a/vm/CMakeLists.txt +++ b/vm/CMakeLists.txt @@ -44,7 +44,6 @@ add_library("ubpf" ubpf_jit_support.c ubpf_jit_support.h ubpf_jit_x86_64.c - ubpf_jit_x86_64.h ubpf_loader.c ubpf_vm.c ) diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 0b0ef0bd3..ade57a080 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -29,89 +29,1086 @@ #include #include #include +#include #include #include "ubpf_int.h" -#include "ubpf_jit_x86_64.h" #if !defined(_countof) #define _countof(array) (sizeof(array) / sizeof(array[0])) #endif +#define RAX 0 +#define RCX 1 +#define RDX 2 +#define RBX 3 +#define RSP 4 +#define RBP 5 +#define RIP 5 +#define RSI 6 +#define RDI 7 +#define R8 8 +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 + +#define VOLATILE_CTXT 11 + +enum operand_size +{ + S8, + S16, + S32, + S64, +}; + +#define REGISTER_MAP_SIZE 11 + +/* + * There are two common x86-64 calling conventions, as discussed at + * https://en.wikipedia.org/wiki/X86_calling_conventions#x86-64_calling_conventions + * + * Please Note: R12 is special and we are *not* using it. As a result, it is omitted + * from the list of non-volatile registers for both platforms (even though it is, in + * fact, non-volatile). + * + * BPF R0-R4 are "volatile" + * BPF R5-R10 are "non-volatile" + * In general, we attempt to map BPF volatile registers to x64 volatile and BPF non- + * volatile to x64 non-volatile. + */ + +// Because of this designation and the way that the registers are mapped +// between native and BPF, the value in native R10 is always something +// the BPF program has to consider trashed across external function calls. +// Therefore, during invocation of external function calls, we can use +// native R10 for free. +#define RCX_ALT R10 + +#if defined(_WIN32) +static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = {RAX, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). +static int platform_parameter_registers[] = {RCX, RDX, R8, R9}; +static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers + RAX, + R10, + RDX, + R8, + R9, + R12, + // Non-volatile registers + RBX, + RDI, + RSI, + R14, + R15, // Until further notice, r15 must be mapped to eBPF register r10 +}; +#else +static int platform_nonvolatile_registers[] = {RBP, RBX, R12, R13, R14, R15}; // Callee-saved registers. +static int platform_volatile_registers[] = { + RAX, RDI, RSI, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). +static int platform_parameter_registers[] = {RDI, RSI, RDX, RCX, R8, R9}; +static int register_map[REGISTER_MAP_SIZE] = { + // Scratch registers + RAX, + RDI, + RSI, + RDX, + R10, + R8, + // Non-volatile registers + RBX, + R12, + R13, + R14, + R15, // Until further notice, r15 must be mapped to eBPF register r10 +}; +#endif + +/* Return the x86 register for the given eBPF register */ +static int +map_register(int r) +{ + assert(r < _BPF_REG_MAX); + return register_map[r % _BPF_REG_MAX]; +} + +static inline void +emit_bytes(struct jit_state* state, void* data, uint32_t len) +{ + // Never emit any bytes if there is an error! + if (state->jit_status != NoError) { + return; + } + + // If we are trying to emit bytes to a spot outside the buffer, + // then there is not enough space! + if ((state->offset + len) > state->size) { + state->jit_status = NotEnoughSpace; + return; + } + + memcpy(state->buf + state->offset, data, len); + state->offset += len; +} + +static inline void +emit1(struct jit_state* state, uint8_t x) +{ + emit_bytes(state, &x, sizeof(x)); +} + +static inline void +emit2(struct jit_state* state, uint16_t x) +{ + emit_bytes(state, &x, sizeof(x)); +} + +static inline void +emit4(struct jit_state* state, uint32_t x) +{ + emit_bytes(state, &x, sizeof(x)); +} + +static inline void +emit8(struct jit_state* state, uint64_t x) +{ + emit_bytes(state, &x, sizeof(x)); +} + +static void +emit_4byte_offset_placeholder(struct jit_state* state) +{ + emit4(state, 0); +} + +static uint32_t +emit_jump_address_reloc(struct jit_state* state, int32_t target_pc) +{ + if (state->num_jumps == UBPF_MAX_INSTS) { + state->jit_status = TooManyJumps; + return 0; + } + uint32_t target_address_offset = state->offset; + emit_patchable_relative(state->offset, target_pc, 0, state->jumps, state->num_jumps++); + emit_4byte_offset_placeholder(state); + return target_address_offset; +} + +static uint32_t +emit_near_jump_address_reloc(struct jit_state* state, int32_t target_pc) +{ + if (state->num_jumps == UBPF_MAX_INSTS) { + state->jit_status = TooManyJumps; + return 0; + } + uint32_t target_address_offset = state->offset; + emit_patchable_relative_ex(state->offset, target_pc, 0, state->jumps, state->num_jumps++, true /* near */); + emit1(state, 0x0); + return target_address_offset; +} + +static uint32_t +emit_local_call_address_reloc(struct jit_state* state, int32_t target_pc) +{ + if (state->num_local_calls == UBPF_MAX_INSTS) { + state->jit_status = TooManyLocalCalls; + return 0; + } + uint32_t target_address_offset = state->offset; + emit_patchable_relative(state->offset, target_pc, 0, state->local_calls, state->num_local_calls++); + emit_4byte_offset_placeholder(state); + return target_address_offset; +} + +static inline void +emit_modrm(struct jit_state* state, int mod, int r, int m) +{ + // Only the top 2 bits of the mod should be used. + assert(!(mod & ~0xc0)); + emit1(state, (mod & 0xc0) | ((r & 7) << 3) | (m & 7)); +} + +static inline void +emit_modrm_reg2reg(struct jit_state* state, int r, int m) +{ + emit_modrm(state, 0xc0, r, m); +} + +/** + * @brief Emit an ModRM byte and accompanying displacement. + * Special case for the situation where the displacement is 0. + * + * @param[in] state The JIT state in which to emit this instruction. + * @param[in] reg The value for the reg of the ModRM byte. + * @param[in] rm The value for the rm of the ModRM byte. + * @param[in] d The displacement value + */ +static inline void +emit_modrm_and_displacement(struct jit_state* state, int reg, int rm, int32_t d) +{ + rm &= 0xf; + reg &= 0xf; + + // Handle 0 displacement special (where we can!). + if (d == 0 && rm != RSP && rm != RBP && rm != R12 && rm != R13) { + emit_modrm(state, 0x00, reg, rm); + return; + } + + uint32_t near_disp = (d >= -128 && d <= 127); + uint8_t mod = near_disp ? 0x40 : 0x80; + + emit_modrm(state, mod, reg, rm); + if (rm == R12) { + // When using R12 as the rm in (rm + disp), the actual + // rm has to be put in an SIB. SIB value of 0x24 means: + // scale (of index): N/A (see below) + // index: no index + // base: R12 + // A SIB byte with this value means that the resulting + // encoded instruction will mimic the semantics when + // using any other register. + emit1(state, 0x24); + } + + if (near_disp) + emit1(state, d); + else + emit4(state, d); +} + +static inline void +emit_rex(struct jit_state* state, int w, int r, int x, int b) +{ + assert(!(w & ~1)); + assert(!(r & ~1)); + assert(!(x & ~1)); + assert(!(b & ~1)); + emit1(state, 0x40 | (w << 3) | (r << 2) | (x << 1) | b); +} + +/* + * Emits a REX prefix with the top bit of src and dst. + * Skipped if no bits would be set. + */ +static inline void +emit_basic_rex(struct jit_state* state, int w, int src, int dst) +{ + if (w || (src & 8) || (dst & 8)) { + emit_rex(state, w, !!(src & 8), 0, !!(dst & 8)); + } +} + +static inline void +emit_push(struct jit_state* state, int r) +{ + emit_basic_rex(state, 0, 0, r); + emit1(state, 0x50 | (r & 7)); +} + +static inline void +emit_pop(struct jit_state* state, int r) +{ + emit_basic_rex(state, 0, 0, r); + emit1(state, 0x58 | (r & 7)); +} + +/* REX prefix and ModRM byte */ +/* We use the MR encoding when there is a choice */ +/* 'src' is often used as an opcode extension */ +static inline void +emit_alu32(struct jit_state* state, int op, int src, int dst) +{ + emit_basic_rex(state, 0, src, dst); + emit1(state, op); + emit_modrm_reg2reg(state, src, dst); +} + +/* REX prefix, ModRM byte, and 32-bit immediate */ +static inline void +emit_alu32_imm32(struct jit_state* state, int op, int src, int dst, int32_t imm) +{ + emit_alu32(state, op, src, dst); + emit4(state, imm); +} + +/* REX prefix, ModRM byte, and 8-bit immediate */ +static inline void +emit_alu32_imm8(struct jit_state* state, int op, int src, int dst, int8_t imm) +{ + emit_alu32(state, op, src, dst); + emit1(state, imm); +} + +static inline void +emit_truncate_u32(struct jit_state* state, int destination) +{ + emit_alu32_imm32(state, 0x81, 4, destination, UINT32_MAX); +} + +/* REX.W prefix and ModRM byte */ +/* We use the MR encoding when there is a choice */ +/* 'src' is often used as an opcode extension */ +static inline void +emit_alu64(struct jit_state* state, int op, int src, int dst) +{ + emit_basic_rex(state, 1, src, dst); + emit1(state, op); + emit_modrm_reg2reg(state, src, dst); +} + +/* REX.W prefix, ModRM byte, and 32-bit immediate */ +static inline void +emit_alu64_imm32(struct jit_state* state, int op, int src, int dst, int32_t imm) +{ + emit_alu64(state, op, src, dst); + emit4(state, imm); +} + +/* REX.W prefix, ModRM byte, and 8-bit immediate */ +static inline void +emit_alu64_imm8(struct jit_state* state, int op, int src, int dst, int8_t imm) +{ + emit_alu64(state, op, src, dst); + emit1(state, imm); +} + +/* Register to register mov */ +static inline void +emit_mov(struct jit_state* state, int src, int dst) +{ + emit_alu64(state, 0x89, src, dst); +} + +static inline void +emit_cmp_imm32(struct jit_state* state, int dst, int32_t imm) +{ + emit_alu64_imm32(state, 0x81, 7, dst, imm); +} + +static inline void +emit_cmp32_imm32(struct jit_state* state, int dst, int32_t imm) +{ + emit_alu32_imm32(state, 0x81, 7, dst, imm); +} + +static inline void +emit_cmp(struct jit_state* state, int src, int dst) +{ + emit_alu64(state, 0x39, src, dst); +} + +static inline void +emit_cmp32(struct jit_state* state, int src, int dst) +{ + emit_alu32(state, 0x39, src, dst); +} + +static inline uint32_t +emit_jcc(struct jit_state* state, int code, int32_t target_pc) +{ + emit1(state, 0x0f); + emit1(state, code); + return emit_jump_address_reloc(state, target_pc); +} + +/* Load [src + offset] into dst */ +static inline void +emit_load(struct jit_state* state, enum operand_size size, int src, int dst, int32_t offset) +{ + emit_basic_rex(state, size == S64, dst, src); + + if (size == S8 || size == S16) { + /* movzx */ + emit1(state, 0x0f); + emit1(state, size == S8 ? 0xb6 : 0xb7); + } else if (size == S32 || size == S64) { + /* mov */ + emit1(state, 0x8b); + } + + emit_modrm_and_displacement(state, dst, src, offset); +} + +/* Load sign-extended immediate into register */ +static inline void +emit_load_imm(struct jit_state* state, int dst, int64_t imm) +{ + if (imm >= INT32_MIN && imm <= INT32_MAX) { + emit_alu64_imm32(state, 0xc7, 0, dst, imm); + } else { + /* movabs $imm,dst */ + emit_basic_rex(state, 1, 0, dst); + emit1(state, 0xb8 | (dst & 7)); + emit8(state, imm); + } +} + +static uint32_t +emit_rip_relative_load(struct jit_state* state, int dst, int relative_load_tgt) +{ + if (state->num_loads == UBPF_MAX_INSTS) { + state->jit_status = TooManyLoads; + return 0; + } + + emit_rex(state, 1, 0, 0, 0); + emit1(state, 0x8b); + emit_modrm(state, 0, dst, 0x05); + uint32_t load_target_offset = state->offset; + note_load(state, relative_load_tgt); + emit_4byte_offset_placeholder(state); + return load_target_offset; +} + +static void +emit_rip_relative_lea(struct jit_state* state, int dst, int lea_tgt) +{ + if (state->num_leas == UBPF_MAX_INSTS) { + state->jit_status = TooManyLeas; + return; + } + + // lea dst, [rip + HELPER TABLE ADDRESS] + emit_rex(state, 1, 1, 0, 0); + emit1(state, 0x8d); + emit_modrm(state, 0, dst, 0x05); + note_lea(state, lea_tgt); + emit_4byte_offset_placeholder(state); +} + +/* Store register src to [dst + offset] */ +static inline void +emit_store(struct jit_state* state, enum operand_size size, int src, int dst, int32_t offset) +{ + if (size == S16) { + emit1(state, 0x66); /* 16-bit override */ + } + int rexw = size == S64; + if (rexw || src & 8 || dst & 8 || size == S8) { + emit_rex(state, rexw, !!(src & 8), 0, !!(dst & 8)); + } + emit1(state, size == S8 ? 0x88 : 0x89); + emit_modrm_and_displacement(state, src, dst, offset); +} + +/* Store immediate to [dst + offset] */ +static inline void +emit_store_imm32(struct jit_state* state, enum operand_size size, int dst, int32_t offset, int32_t imm) +{ + if (size == S16) { + emit1(state, 0x66); /* 16-bit override */ + } + emit_basic_rex(state, size == S64, 0, dst); + emit1(state, size == S8 ? 0xc6 : 0xc7); + emit_modrm_and_displacement(state, 0, dst, offset); + if (size == S32 || size == S64) { + emit4(state, imm); + } else if (size == S16) { + emit2(state, imm); + } else if (size == S8) { + emit1(state, imm); + } +} + +static inline void +emit_ret(struct jit_state* state) +{ + emit1(state, 0xc3); +} + +/** @brief Emit a (32-bit) jump. + * + * @param[in] state The JIT state. + * @param[in] target_pc The PC to which to jump when this near + * jump is executed. + * @return The offset in the JIT'd code where the jump offset starts. + */ +static inline uint32_t +emit_jmp(struct jit_state* state, uint32_t target_pc) +{ + emit1(state, 0xe9); + return emit_jump_address_reloc(state, target_pc); +} + +/** @brief Emit a near jump. + * + * @param[in] state The JIT state. + * @param[in] target_pc The PC to which to jump when this near + * jump is executed. + * @return The offset in the JIT'd code where the jump offset starts. + */ +static inline uint32_t +emit_near_jmp(struct jit_state* state, uint32_t target_pc) +{ + emit1(state, 0xeb); + return emit_near_jump_address_reloc(state, target_pc); +} + +static inline uint32_t +emit_call(struct jit_state* state, uint32_t target_pc) +{ + emit1(state, 0xe8); + uint32_t call_src = state->offset; + emit_jump_address_reloc(state, target_pc); + return call_src; +} + +static inline void +emit_pause(struct jit_state* state) +{ + emit1(state, 0xf3); + emit1(state, 0x90); +} + +static inline void +emit_dispatched_external_helper_call(struct jit_state* state, unsigned int idx) +{ + /* + * Note: We do *not* have to preserve any x86-64 registers here ... + * ... according to the SystemV ABI: rbx (eBPF6), + * r13 (eBPF7), + * r14 (eBPF8), + * r15 (eBPF9), and + * rbp (eBPF10) are all preserved. + * ... according to the Windows ABI: r15 (eBPF6) + * rdi (eBPF7), + * rsi (eBPF8), + * rbx (eBPF9), and + * rbp (eBPF10) are all preserved. + * + * When we enter here, our stack is 16-byte aligned. Keep + * it that way! + */ + + /* + * There are two things that could happen: + * 1. The user has registered an external dispatcher and we need to + * send control there to invoke an external helper. + * 2. The user is relying on the default dispatcher to pass control + * to the registered external helper. + * To determine which action to take, we will first consider the 8 + * bytes at TARGET_PC_EXTERNAL_DISPATCHER. If those 8 bytes have an + * address, that represents the address of the user-registered external + * dispatcher and we pass control there. That function signature looks like + * uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, unsigned int index, void* cookie + * so we make sure that the arguments are done properly depending on the abi. + * + * If there is no external dispatcher registered, the user is expected + * to have registered a handler with us for the helper with index idx. + * There is a table of MAX_ function pointers starting at TARGET_LOAD_HELPER_TABLE. + * Each of those functions has a signature that looks like + * uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, void* cookie + * We load the appropriate function pointer by using idx to index it and then + * make sure that the arguments are set properly depending on the abi. + */ + + // Save register where volatile context is stored. + emit_push(state, VOLATILE_CTXT); + emit_push(state, VOLATILE_CTXT); + // ^^ Stack is aligned here. + +#if defined(_WIN32) + /* Because we may need 24 bytes on the stack but at least 16, we have to take 32 + * to keep alignment happy. We may ultimately need it all, but we certainly + * need 16! Later, though, there is a push that always happens (MARKER2), so + * we only allocate 24 here. + */ + emit_alu64_imm32(state, 0x81, 5, RSP, 3 * sizeof(uint64_t)); +#endif + + emit_rip_relative_load(state, RAX, TARGET_PC_EXTERNAL_DISPATCHER); + // cmp rax, 0 + emit_cmp_imm32(state, RAX, 0); + // jne skip_default_dispatcher_label + uint32_t skip_default_dispatcher_source = emit_jcc(state, 0x85, 0); + + // Default dispatcher: + + // Load the address of the helper function from the table. + // mov rax, idx + emit_alu32(state, 0xc7, 0, RAX); + emit4(state, idx); + // shl rax, 3 (i.e., multiply the index by 8 because addresses are that size on x86-64) + emit_alu64_imm8(state, 0xc1, 4, RAX, 3); + + // lea r10, [rip + HELPER TABLE ADDRESS] + emit_rip_relative_lea(state, R10, TARGET_LOAD_HELPER_TABLE); + + // add rax, r10 + emit_alu64(state, 0x01, R10, RAX); + // load rax, [rax] + emit_load(state, S64, RAX, RAX, 0); + + // There is no index for the registered helper function. They just get + // 5 arguments and a context, which becomes the 6th argument to the function ... +#if defined(_WIN32) + // and spills to the stack on Windows. + // mov qword [rsp], VOLATILE_CTXT + emit1(state, 0x4c); + emit1(state, 0x89); + emit1(state, 0x5c); + emit1(state, 0x24); + emit1(state, 0x00); +#else + // and goes in R9 on SystemV. + emit_mov(state, VOLATILE_CTXT, R9); +#endif + + // jmp call_label + uint32_t skip_external_dispatcher_source = emit_jmp(state, 0); + + // External dispatcher: + + // skip_default_dispatcher_label: + emit_jump_target(state, skip_default_dispatcher_source); + + // Using an external dispatcher. They get a total of 7 arguments. The + // 6th argument is the index of the function to call which ... + +#if defined(_WIN32) + // and spills to the stack on Windows. + + // mov qword [rsp + 8], VOLATILE_CTXT + emit1(state, 0x4c); + emit1(state, 0x89); + emit1(state, 0x5c); + emit1(state, 0x24); + emit1(state, 0x08); + + // To make it easier on ourselves, let's just use + // VOLATILE_CTXT register to load the immediate + // and push to the stack. + emit_load_imm(state, VOLATILE_CTXT, (uint64_t)idx); + + // mov qword [rsp + 0], VOLATILE_CTXT + emit1(state, 0x4c); + emit1(state, 0x89); + emit1(state, 0x5c); + emit1(state, 0x24); + emit1(state, 0x00); +#else + // and goes in R9 on SystemV. + emit_load_imm(state, R9, (uint64_t)idx); + // And the 7th is already spilled to the stack in the right spot because + // we wanted to save it -- cool (see MARKER1, above). + + // Intentional no-op for 7th argument. +#endif + + // Control flow converges for call: + + // call_label: + emit_jump_target(state, skip_external_dispatcher_source); + +#if defined(_WIN32) + /* Windows x64 ABI spills 5th parameter to stack (MARKER2) */ + emit_push(state, map_register(5)); + + /* Windows x64 ABI requires home register space. + * Allocate home register space - 4 registers. + */ + emit_alu64_imm32(state, 0x81, 5, RSP, 4 * sizeof(uint64_t)); +#endif + +#ifndef UBPF_DISABLE_RETPOLINES + emit_call(state, TARGET_PC_RETPOLINE); +#else + /* TODO use direct call when possible */ + /* callq *%rax */ + emit1(state, 0xff); + // ModR/M byte: b11010000b = xd + // ^ + // register-direct addressing. + // ^ + // opcode extension (2) + // ^ + // rax is register 0 + emit1(state, 0xd0); +#endif + + // The result is in RAX. Nothing to do there. + // Just rationalize the stack! + +#if defined(_WIN32) + /* Deallocate home register space + (up to ) 3 spilled parameters + alignment space */ + emit_alu64_imm32(state, 0x81, 0, RSP, (4 + 3 + 1) * sizeof(uint64_t)); +#endif + + emit_pop(state, VOLATILE_CTXT); // Restore register where volatile context is stored. + emit_pop(state, VOLATILE_CTXT); // Restore register where volatile context is stored. +} + +#define X64_ALU_ADD 0x01 +#define X64_ALU_OR 0x09 +#define X64_ALU_AND 0x21 +#define X64_ALU_XOR 0x31 + +#define IS_64BIT 1 +#define IS_32BIT 0 + +/** + * @brief Emit an atomic ALU operation without fetch. + * + * This applies the operation to the memory location identified by the destination register and offset, using the source + * register as the operand. + * + * @param[in,out] state The JIT state + * @param[in] opcode The x64 opcode for the ALU operation + * @param[in] is_64bit Whether the operation is 64-bit or 32-bit + * @param[in] src Source register + * @param[in] dst Destination register + * @param[in] offset Offset from the destination register + */ +static inline void +emit_atomic_alu(struct jit_state* state, int opcode, int is_64bit, int src, int dst, int offset) +{ + emit1(state, 0xf0); // lock prefix + emit_basic_rex(state, is_64bit, src, dst); + emit1(state, opcode); + emit_modrm_and_displacement(state, src, dst, offset); +} + +/** + * @brief Atomically compare and exchange. + * This writes the value stored in src into the memory location identified by dst and offset, if the value at that + * location is equal to the value in RAX. + * + * @param[in,out] state The JIT state + * @param[in] is_64bit Whether the operation is 64-bit or 32-bit + * @param[in] src The value to write. + * @param[in] dst The base address of the destination memory location. + * @param[in] offset The offset from dst. + * @note This implicitly uses RAX as the original value to compare against and stores the original value in [destination + * + offset] into RAX. + */ +static inline void +emit_atomic_cmp_exch_with_rax(struct jit_state* state, int is_64bit, int src, int dst, int offset) +{ + emit1(state, 0xf0); // lock prefix + emit_basic_rex(state, is_64bit, src, dst); + emit1(state, 0x0f); + emit1(state, 0xb1); + emit_modrm_and_displacement(state, src, dst, offset); +} + +/** + * @brief Atomically swaps the value of src into the memory location identified by dst and offset and sets src to the + * original value at that location. + * + * @param[in,out] state The JIT state + * @param[in] is_64bit Whether the operation is 64-bit or 32-bit + * @param[in] src The value to write. + * @param[in] dst The base address of the destination memory location. + * @param[in] offset The offset from the destination memory location. + */ +static inline void +emit_atomic_exchange(struct jit_state* state, int is_64bit, int src, int dst, int offset) +{ + emit1(state, 0xf0); // lock prefix + emit_basic_rex(state, is_64bit, src, dst); + emit1(state, 0x87); + emit_modrm_and_displacement(state, src, dst, offset); +} + +/** + * @brief Perform an ALU operation atomically and fetch the result. + * + * @param[in,out] state The JIT state + * @param[in] is_64bit Whether the operation is 64-bit or 32-bit + * @param[in] opcode The x64 opcode for the ALU operation + * @param[in] src The source register, used as the operand for the ALU operation. + * @param[in] dst The base address of the destination memory location. + * @param[in] offset The offset from the destination memory location. + * @note This operation stores the original value in [destination + offset] into RAX. + * @note This operation is emulated on x64 for 64-bit and 32-bit operations as the x64 architecture does not have an + * atomic fetch-and, fetch-or, fetch-xor, or fetch-add instruction. + */ +static inline void +emit_atomic_fetch_alu(struct jit_state* state, int is_64bit, int opcode, int src, int dst, int offset) +{ + // x64 lacks a 64-bit atomic version of some alu-fetch instruction, so we emulate it with a compare-exchange. + // This is not a problem because the compare-exchange instruction is a full memory barrier. + + // Compare exchange overwrites RAX, so we need to save it. + emit_push(state, RAX); + + // Load the original value at the destination into RAX. + emit_load(state, is_64bit ? S64 : S32, dst, RAX, offset); + + // Loop until we successfully update the value. + uint32_t loop_start = state->offset; + + // Copy RAX to RCX + emit_mov(state, RAX, RCX); + + // Perform the ALU operation into RCX. + emit_alu64(state, opcode, src, RCX); + + // Attempt to compare-exchange the value. + emit_atomic_cmp_exch_with_rax(state, is_64bit, RCX, dst, offset); + + // If the compare-exchange failed, loop. + emit1(state, 0x75); + emit1(state, loop_start - state->offset - 1); + + // Move RAX into the src register + emit_mov(state, RAX, src); + + // Restore RAX + emit_pop(state, RAX); +} + +static inline void +emit_atomic_add64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_ADD, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_add32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_ADD, IS_32BIT, src, dst, offset); +} + +static inline void +emit_atomic_and64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_AND, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_and32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_AND, IS_32BIT, src, dst, offset); +} + +static inline void +emit_atomic_or64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_OR, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_or32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_OR, IS_32BIT, src, dst, offset); +} + +static inline void +emit_atomic_xor64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_XOR, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_xor32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_alu(state, X64_ALU_XOR, IS_32BIT, src, dst, offset); +} + + +static inline void +emit_atomic_compare_exchange64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_cmp_exch_with_rax(state, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_compare_exchange32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_cmp_exch_with_rax(state, IS_32BIT, src, dst, offset); +} + + +static inline void +emit_atomic_exchange64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_exchange(state, IS_64BIT, src, dst, offset); +} + +static inline void +emit_atomic_exchange32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_exchange(state, IS_32BIT, src, dst, offset); +} + +static inline void +emit_atomic_fetch_add64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_ADD, src, dst, offset); +} + +static inline void +emit_atomic_fetch_add32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_ADD, src, dst, offset); +} + +static inline void +emit_atomic_fetch_and64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_AND, src, dst, offset); +} + +static inline void +emit_atomic_fetch_and32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_AND, src, dst, offset); +} + +static inline void +emit_atomic_fetch_or64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_OR, src, dst, offset); +} + +static inline void +emit_atomic_fetch_or32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_OR, src, dst, offset); +} + +static inline void +emit_atomic_fetch_xor64(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_XOR, src, dst, offset); +} + +static inline void +emit_atomic_fetch_xor32(struct jit_state* state, int src, int dst, int offset) +{ + emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_XOR, src, dst, offset); +} + static void -muldivmod(struct jit_state* state, uint8_t opcode, int src, int dst, int32_t imm); +emit_muldivmod(struct jit_state* state, uint8_t opcode, int src, int dst, int32_t imm) +{ + bool mul = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MUL_IMM & EBPF_ALU_OP_MASK); + bool div = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_DIV_IMM & EBPF_ALU_OP_MASK); + bool mod = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MOD_IMM & EBPF_ALU_OP_MASK); + bool is64 = (opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU64; + bool reg = (opcode & EBPF_SRC_REG) == EBPF_SRC_REG; + + // Short circuit for imm == 0. + if (!reg && imm == 0) { + if (div || mul) { + // For division and multiplication, set result to zero. + emit_alu32(state, 0x31, dst, dst); + } else { + // For modulo, set result to dividend. + emit_mov(state, dst, dst); + } + return; + } + + if (dst != RAX) { + emit_push(state, RAX); + } + + if (dst != RDX) { + emit_push(state, RDX); + } + + // Load the divisor into RCX. + if (!reg) { + emit_load_imm(state, RCX, imm); + } else { + emit_mov(state, src, RCX); + } + + // Load the dividend into RAX. + emit_mov(state, dst, RAX); + + // BPF has two different semantics for division and modulus. For division + // if the divisor is zero, the result is zero. For modulus, if the divisor + // is zero, the result is the dividend. To handle this we set the divisor + // to 1 if it is zero and then set the result to zero if the divisor was + // zero (for division) or set the result to the dividend if the divisor was + // zero (for modulo). + + if (div || mod) { + // Check if divisor is zero. + if (is64) { + emit_alu64(state, 0x85, RCX, RCX); + } else { + emit_alu32(state, 0x85, RCX, RCX); + } -#define REGISTER_MAP_SIZE 11 + // Save the dividend for the modulo case. + if (mod) { + emit_push(state, RAX); // Save dividend. + } -/* - * There are two common x86-64 calling conventions, as discussed at - * https://en.wikipedia.org/wiki/X86_calling_conventions#x86-64_calling_conventions - * - * Please Note: R12 is special and we are *not* using it. As a result, it is omitted - * from the list of non-volatile registers for both platforms (even though it is, in - * fact, non-volatile). - * - * BPF R0-R4 are "volatile" - * BPF R5-R10 are "non-volatile" - * In general, we attempt to map BPF volatile registers to x64 volatile and BPF non- - * volatile to x64 non-volatile. - */ + // Save the result of the test. + emit1(state, 0x9c); /* pushfq */ -// Because of this designation and the way that the registers are mapped -// between native and BPF, the value in native R10 is always something -// the BPF program has to consider trashed across external function calls. -// Therefore, during invocation of external function calls, we can use -// native R10 for free. -#define RCX_ALT R10 + // Set the divisor to 1 if it is zero. + emit_load_imm(state, RDX, 1); + emit1(state, 0x48); + emit1(state, 0x0f); + emit1(state, 0x44); + emit1(state, 0xca); /* cmove rcx,rdx */ -#if defined(_WIN32) -static int platform_nonvolatile_registers[] = {RBP, RBX, RDI, RSI, R12, R13, R14, R15}; // Callee-saved registers. -static int platform_volatile_registers[] = {RAX, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). -static int platform_parameter_registers[] = {RCX, RDX, R8, R9}; -static int register_map[REGISTER_MAP_SIZE] = { - // Scratch registers - RAX, - R10, - RDX, - R8, - R9, - R12, - // Non-volatile registers - RBX, - RDI, - RSI, - R14, - R15, // Until further notice, r15 must be mapped to eBPF register r10 -}; -#else -static int platform_nonvolatile_registers[] = {RBP, RBX, R12, R13, R14, R15}; // Callee-saved registers. -static int platform_volatile_registers[] = { - RAX, RDI, RSI, RDX, RCX, R8, R9, R10, R11}; // Caller-saved registers (if needed). -static int platform_parameter_registers[] = {RDI, RSI, RDX, RCX, R8, R9}; -static int register_map[REGISTER_MAP_SIZE] = { - // Scratch registers - RAX, - RDI, - RSI, - RDX, - R10, - R8, - // Non-volatile registers - RBX, - R12, - R13, - R14, - R15, // Until further notice, r15 must be mapped to eBPF register r10 -}; -#endif + /* xor %edx,%edx */ + emit_alu32(state, 0x31, RDX, RDX); + } -/* Return the x86 register for the given eBPF register */ -static int -map_register(int r) -{ - assert(r < _BPF_REG_MAX); - return register_map[r % _BPF_REG_MAX]; -} + if (is64) { + emit_rex(state, 1, 0, 0, 0); + } + + // Multiply or divide. + emit_alu32(state, 0xf7, mul ? 4 : 6, RCX); + + // Division operation stores the remainder in RDX and the quotient in RAX. + if (div || mod) { + // Restore the result of the test. + emit1(state, 0x9d); /* popfq */ + + // If zero flag is set, then the divisor was zero. + + if (div) { + // Set the dividend to zero if the divisor was zero. + emit_load_imm(state, RCX, 0); + + // Store 0 in RAX if the divisor was zero. + // Use conditional move to avoid a branch. + emit1(state, 0x48); + emit1(state, 0x0f); + emit1(state, 0x44); + emit1(state, 0xc1); /* cmove rax,rcx */ + } else { + // Restore dividend to RCX. + emit_pop(state, RCX); + + // Store the dividend in RAX if the divisor was zero. + // Use conditional move to avoid a branch. + emit1(state, 0x48); + emit1(state, 0x0f); + emit1(state, 0x44); + emit1(state, 0xd1); /* cmove rdx,rcx */ + } + } + if (dst != RDX) { + if (mod) { + emit_mov(state, RDX, dst); + } + emit_pop(state, RDX); + } + if (dst != RAX) { + if (div || mul) { + emit_mov(state, RAX, dst); + } + emit_pop(state, RAX); + } +} static inline void emit_local_call(struct ubpf_vm* vm, struct jit_state* state, uint32_t target_pc) { @@ -425,7 +1422,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) case EBPF_OP_DIV_REG: case EBPF_OP_MOD_IMM: case EBPF_OP_MOD_REG: - muldivmod(state, inst.opcode, src, dst, inst.imm); + emit_muldivmod(state, inst.opcode, src, dst, inst.imm); break; case EBPF_OP_OR_IMM: emit_alu32_imm32(state, 0x81, 1, dst, inst.imm); @@ -519,7 +1516,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) case EBPF_OP_DIV64_REG: case EBPF_OP_MOD64_IMM: case EBPF_OP_MOD64_REG: - muldivmod(state, inst.opcode, src, dst, inst.imm); + emit_muldivmod(state, inst.opcode, src, dst, inst.imm); break; case EBPF_OP_OR64_IMM: emit_alu64_imm32(state, 0x81, 1, dst, inst.imm); @@ -987,129 +1984,7 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) return 0; } -static void -muldivmod(struct jit_state* state, uint8_t opcode, int src, int dst, int32_t imm) -{ - bool mul = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MUL_IMM & EBPF_ALU_OP_MASK); - bool div = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_DIV_IMM & EBPF_ALU_OP_MASK); - bool mod = (opcode & EBPF_ALU_OP_MASK) == (EBPF_OP_MOD_IMM & EBPF_ALU_OP_MASK); - bool is64 = (opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU64; - bool reg = (opcode & EBPF_SRC_REG) == EBPF_SRC_REG; - - // Short circuit for imm == 0. - if (!reg && imm == 0) { - if (div || mul) { - // For division and multiplication, set result to zero. - emit_alu32(state, 0x31, dst, dst); - } else { - // For modulo, set result to dividend. - emit_mov(state, dst, dst); - } - return; - } - - if (dst != RAX) { - emit_push(state, RAX); - } - - if (dst != RDX) { - emit_push(state, RDX); - } - - // Load the divisor into RCX. - if (!reg) { - emit_load_imm(state, RCX, imm); - } else { - emit_mov(state, src, RCX); - } - - // Load the dividend into RAX. - emit_mov(state, dst, RAX); - - // BPF has two different semantics for division and modulus. For division - // if the divisor is zero, the result is zero. For modulus, if the divisor - // is zero, the result is the dividend. To handle this we set the divisor - // to 1 if it is zero and then set the result to zero if the divisor was - // zero (for division) or set the result to the dividend if the divisor was - // zero (for modulo). - - if (div || mod) { - // Check if divisor is zero. - if (is64) { - emit_alu64(state, 0x85, RCX, RCX); - } else { - emit_alu32(state, 0x85, RCX, RCX); - } - - // Save the dividend for the modulo case. - if (mod) { - emit_push(state, RAX); // Save dividend. - } - - // Save the result of the test. - emit1(state, 0x9c); /* pushfq */ - - // Set the divisor to 1 if it is zero. - emit_load_imm(state, RDX, 1); - emit1(state, 0x48); - emit1(state, 0x0f); - emit1(state, 0x44); - emit1(state, 0xca); /* cmove rcx,rdx */ - - /* xor %edx,%edx */ - emit_alu32(state, 0x31, RDX, RDX); - } - - if (is64) { - emit_rex(state, 1, 0, 0, 0); - } - - // Multiply or divide. - emit_alu32(state, 0xf7, mul ? 4 : 6, RCX); - - // Division operation stores the remainder in RDX and the quotient in RAX. - if (div || mod) { - // Restore the result of the test. - emit1(state, 0x9d); /* popfq */ - - // If zero flag is set, then the divisor was zero. - - if (div) { - // Set the dividend to zero if the divisor was zero. - emit_load_imm(state, RCX, 0); - - // Store 0 in RAX if the divisor was zero. - // Use conditional move to avoid a branch. - emit1(state, 0x48); - emit1(state, 0x0f); - emit1(state, 0x44); - emit1(state, 0xc1); /* cmove rax,rcx */ - } else { - // Restore dividend to RCX. - emit_pop(state, RCX); - - // Store the dividend in RAX if the divisor was zero. - // Use conditional move to avoid a branch. - emit1(state, 0x48); - emit1(state, 0x0f); - emit1(state, 0x44); - emit1(state, 0xd1); /* cmove rdx,rcx */ - } - } - if (dst != RDX) { - if (mod) { - emit_mov(state, RDX, dst); - } - emit_pop(state, RDX); - } - if (dst != RAX) { - if (div || mul) { - emit_mov(state, RAX, dst); - } - emit_pop(state, RAX); - } -} static bool resolve_patchable_relatives(struct jit_state* state) diff --git a/vm/ubpf_jit_x86_64.h b/vm/ubpf_jit_x86_64.h deleted file mode 100644 index 0f0ab5062..000000000 --- a/vm/ubpf_jit_x86_64.h +++ /dev/null @@ -1,911 +0,0 @@ -// Copyright (c) 2015 Big Switch Networks, Inc -// SPDX-License-Identifier: Apache-2.0 - -/* - * Copyright 2015 Big Switch Networks, Inc - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Generic x86-64 code generation functions - */ - -#ifndef UBPF_JIT_X86_64_H -#define UBPF_JIT_X86_64_H - -#include -#include -#include - -#include "ubpf.h" -#include "ubpf_jit_support.h" - -#define RAX 0 -#define RCX 1 -#define RDX 2 -#define RBX 3 -#define RSP 4 -#define RBP 5 -#define RIP 5 -#define RSI 6 -#define RDI 7 -#define R8 8 -#define R9 9 -#define R10 10 -#define R11 11 -#define R12 12 -#define R13 13 -#define R14 14 -#define R15 15 - -#define VOLATILE_CTXT 11 - -enum operand_size -{ - S8, - S16, - S32, - S64, -}; - -static inline void -emit_bytes(struct jit_state* state, void* data, uint32_t len) -{ - // Never emit any bytes if there is an error! - if (state->jit_status != NoError) { - return; - } - - // If we are trying to emit bytes to a spot outside the buffer, - // then there is not enough space! - if ((state->offset + len) > state->size) { - state->jit_status = NotEnoughSpace; - return; - } - - memcpy(state->buf + state->offset, data, len); - state->offset += len; -} - -static inline void -emit1(struct jit_state* state, uint8_t x) -{ - emit_bytes(state, &x, sizeof(x)); -} - -static inline void -emit2(struct jit_state* state, uint16_t x) -{ - emit_bytes(state, &x, sizeof(x)); -} - -static inline void -emit4(struct jit_state* state, uint32_t x) -{ - emit_bytes(state, &x, sizeof(x)); -} - -static inline void -emit8(struct jit_state* state, uint64_t x) -{ - emit_bytes(state, &x, sizeof(x)); -} - -static void -emit_4byte_offset_placeholder(struct jit_state* state) -{ - emit4(state, 0); -} - -static uint32_t -emit_jump_address_reloc(struct jit_state* state, int32_t target_pc) -{ - if (state->num_jumps == UBPF_MAX_INSTS) { - state->jit_status = TooManyJumps; - return 0; - } - uint32_t target_address_offset = state->offset; - emit_patchable_relative(state->offset, target_pc, 0, state->jumps, state->num_jumps++); - emit_4byte_offset_placeholder(state); - return target_address_offset; -} - -static uint32_t -emit_near_jump_address_reloc(struct jit_state* state, int32_t target_pc) -{ - if (state->num_jumps == UBPF_MAX_INSTS) { - state->jit_status = TooManyJumps; - return 0; - } - uint32_t target_address_offset = state->offset; - emit_patchable_relative_ex(state->offset, target_pc, 0, state->jumps, state->num_jumps++, true /* near */); - emit1(state, 0x0); - return target_address_offset; -} - -static uint32_t -emit_local_call_address_reloc(struct jit_state* state, int32_t target_pc) -{ - if (state->num_local_calls == UBPF_MAX_INSTS) { - state->jit_status = TooManyLocalCalls; - return 0; - } - uint32_t target_address_offset = state->offset; - emit_patchable_relative(state->offset, target_pc, 0, state->local_calls, state->num_local_calls++); - emit_4byte_offset_placeholder(state); - return target_address_offset; -} - -static inline void -emit_modrm(struct jit_state* state, int mod, int r, int m) -{ - // Only the top 2 bits of the mod should be used. - assert(!(mod & ~0xc0)); - emit1(state, (mod & 0xc0) | ((r & 7) << 3) | (m & 7)); -} - -static inline void -emit_modrm_reg2reg(struct jit_state* state, int r, int m) -{ - emit_modrm(state, 0xc0, r, m); -} - -/** - * @brief Emit an ModRM byte and accompanying displacement. - * Special case for the situation where the displacement is 0. - * - * @param[in] state The JIT state in which to emit this instruction. - * @param[in] reg The value for the reg of the ModRM byte. - * @param[in] rm The value for the rm of the ModRM byte. - * @param[in] d The displacement value - */ -static inline void -emit_modrm_and_displacement(struct jit_state* state, int reg, int rm, int32_t d) -{ - rm &= 0xf; - reg &= 0xf; - - // Handle 0 displacement special (where we can!). - if (d == 0 && rm != RSP && rm != RBP && rm != R12 && rm != R13) { - emit_modrm(state, 0x00, reg, rm); - return; - } - - uint32_t near_disp = (d >= -128 && d <= 127); - uint8_t mod = near_disp ? 0x40 : 0x80; - - emit_modrm(state, mod, reg, rm); - if (rm == R12) { - // When using R12 as the rm in (rm + disp), the actual - // rm has to be put in an SIB. SIB value of 0x24 means: - // scale (of index): N/A (see below) - // index: no index - // base: R12 - // A SIB byte with this value means that the resulting - // encoded instruction will mimic the semantics when - // using any other register. - emit1(state, 0x24); - } - - if (near_disp) - emit1(state, d); - else - emit4(state, d); -} - -static inline void -emit_rex(struct jit_state* state, int w, int r, int x, int b) -{ - assert(!(w & ~1)); - assert(!(r & ~1)); - assert(!(x & ~1)); - assert(!(b & ~1)); - emit1(state, 0x40 | (w << 3) | (r << 2) | (x << 1) | b); -} - -/* - * Emits a REX prefix with the top bit of src and dst. - * Skipped if no bits would be set. - */ -static inline void -emit_basic_rex(struct jit_state* state, int w, int src, int dst) -{ - if (w || (src & 8) || (dst & 8)) { - emit_rex(state, w, !!(src & 8), 0, !!(dst & 8)); - } -} - -static inline void -emit_push(struct jit_state* state, int r) -{ - emit_basic_rex(state, 0, 0, r); - emit1(state, 0x50 | (r & 7)); -} - -static inline void -emit_pop(struct jit_state* state, int r) -{ - emit_basic_rex(state, 0, 0, r); - emit1(state, 0x58 | (r & 7)); -} - -/* REX prefix and ModRM byte */ -/* We use the MR encoding when there is a choice */ -/* 'src' is often used as an opcode extension */ -static inline void -emit_alu32(struct jit_state* state, int op, int src, int dst) -{ - emit_basic_rex(state, 0, src, dst); - emit1(state, op); - emit_modrm_reg2reg(state, src, dst); -} - -/* REX prefix, ModRM byte, and 32-bit immediate */ -static inline void -emit_alu32_imm32(struct jit_state* state, int op, int src, int dst, int32_t imm) -{ - emit_alu32(state, op, src, dst); - emit4(state, imm); -} - -/* REX prefix, ModRM byte, and 8-bit immediate */ -static inline void -emit_alu32_imm8(struct jit_state* state, int op, int src, int dst, int8_t imm) -{ - emit_alu32(state, op, src, dst); - emit1(state, imm); -} - -static inline void -emit_truncate_u32(struct jit_state* state, int destination) -{ - emit_alu32_imm32(state, 0x81, 4, destination, UINT32_MAX); -} - -/* REX.W prefix and ModRM byte */ -/* We use the MR encoding when there is a choice */ -/* 'src' is often used as an opcode extension */ -static inline void -emit_alu64(struct jit_state* state, int op, int src, int dst) -{ - emit_basic_rex(state, 1, src, dst); - emit1(state, op); - emit_modrm_reg2reg(state, src, dst); -} - -/* REX.W prefix, ModRM byte, and 32-bit immediate */ -static inline void -emit_alu64_imm32(struct jit_state* state, int op, int src, int dst, int32_t imm) -{ - emit_alu64(state, op, src, dst); - emit4(state, imm); -} - -/* REX.W prefix, ModRM byte, and 8-bit immediate */ -static inline void -emit_alu64_imm8(struct jit_state* state, int op, int src, int dst, int8_t imm) -{ - emit_alu64(state, op, src, dst); - emit1(state, imm); -} - -/* Register to register mov */ -static inline void -emit_mov(struct jit_state* state, int src, int dst) -{ - emit_alu64(state, 0x89, src, dst); -} - -static inline void -emit_cmp_imm32(struct jit_state* state, int dst, int32_t imm) -{ - emit_alu64_imm32(state, 0x81, 7, dst, imm); -} - -static inline void -emit_cmp32_imm32(struct jit_state* state, int dst, int32_t imm) -{ - emit_alu32_imm32(state, 0x81, 7, dst, imm); -} - -static inline void -emit_cmp(struct jit_state* state, int src, int dst) -{ - emit_alu64(state, 0x39, src, dst); -} - -static inline void -emit_cmp32(struct jit_state* state, int src, int dst) -{ - emit_alu32(state, 0x39, src, dst); -} - -static inline uint32_t -emit_jcc(struct jit_state* state, int code, int32_t target_pc) -{ - emit1(state, 0x0f); - emit1(state, code); - return emit_jump_address_reloc(state, target_pc); -} - -/* Load [src + offset] into dst */ -static inline void -emit_load(struct jit_state* state, enum operand_size size, int src, int dst, int32_t offset) -{ - emit_basic_rex(state, size == S64, dst, src); - - if (size == S8 || size == S16) { - /* movzx */ - emit1(state, 0x0f); - emit1(state, size == S8 ? 0xb6 : 0xb7); - } else if (size == S32 || size == S64) { - /* mov */ - emit1(state, 0x8b); - } - - emit_modrm_and_displacement(state, dst, src, offset); -} - -/* Load sign-extended immediate into register */ -static inline void -emit_load_imm(struct jit_state* state, int dst, int64_t imm) -{ - if (imm >= INT32_MIN && imm <= INT32_MAX) { - emit_alu64_imm32(state, 0xc7, 0, dst, imm); - } else { - /* movabs $imm,dst */ - emit_basic_rex(state, 1, 0, dst); - emit1(state, 0xb8 | (dst & 7)); - emit8(state, imm); - } -} - -static uint32_t -emit_rip_relative_load(struct jit_state* state, int dst, int relative_load_tgt) -{ - if (state->num_loads == UBPF_MAX_INSTS) { - state->jit_status = TooManyLoads; - return 0; - } - - emit_rex(state, 1, 0, 0, 0); - emit1(state, 0x8b); - emit_modrm(state, 0, dst, 0x05); - uint32_t load_target_offset = state->offset; - note_load(state, relative_load_tgt); - emit_4byte_offset_placeholder(state); - return load_target_offset; -} - -static void -emit_rip_relative_lea(struct jit_state* state, int dst, int lea_tgt) -{ - if (state->num_leas == UBPF_MAX_INSTS) { - state->jit_status = TooManyLeas; - return; - } - - // lea dst, [rip + HELPER TABLE ADDRESS] - emit_rex(state, 1, 1, 0, 0); - emit1(state, 0x8d); - emit_modrm(state, 0, dst, 0x05); - note_lea(state, lea_tgt); - emit_4byte_offset_placeholder(state); -} - -/* Store register src to [dst + offset] */ -static inline void -emit_store(struct jit_state* state, enum operand_size size, int src, int dst, int32_t offset) -{ - if (size == S16) { - emit1(state, 0x66); /* 16-bit override */ - } - int rexw = size == S64; - if (rexw || src & 8 || dst & 8 || size == S8) { - emit_rex(state, rexw, !!(src & 8), 0, !!(dst & 8)); - } - emit1(state, size == S8 ? 0x88 : 0x89); - emit_modrm_and_displacement(state, src, dst, offset); -} - -/* Store immediate to [dst + offset] */ -static inline void -emit_store_imm32(struct jit_state* state, enum operand_size size, int dst, int32_t offset, int32_t imm) -{ - if (size == S16) { - emit1(state, 0x66); /* 16-bit override */ - } - emit_basic_rex(state, size == S64, 0, dst); - emit1(state, size == S8 ? 0xc6 : 0xc7); - emit_modrm_and_displacement(state, 0, dst, offset); - if (size == S32 || size == S64) { - emit4(state, imm); - } else if (size == S16) { - emit2(state, imm); - } else if (size == S8) { - emit1(state, imm); - } -} - -static inline void -emit_ret(struct jit_state* state) -{ - emit1(state, 0xc3); -} - -/** @brief Emit a (32-bit) jump. - * - * @param[in] state The JIT state. - * @param[in] target_pc The PC to which to jump when this near - * jump is executed. - * @return The offset in the JIT'd code where the jump offset starts. - */ -static inline uint32_t -emit_jmp(struct jit_state* state, uint32_t target_pc) -{ - emit1(state, 0xe9); - return emit_jump_address_reloc(state, target_pc); -} - -/** @brief Emit a near jump. - * - * @param[in] state The JIT state. - * @param[in] target_pc The PC to which to jump when this near - * jump is executed. - * @return The offset in the JIT'd code where the jump offset starts. - */ -static inline uint32_t -emit_near_jmp(struct jit_state* state, uint32_t target_pc) -{ - emit1(state, 0xeb); - return emit_near_jump_address_reloc(state, target_pc); -} - -static inline uint32_t -emit_call(struct jit_state* state, uint32_t target_pc) -{ - emit1(state, 0xe8); - uint32_t call_src = state->offset; - emit_jump_address_reloc(state, target_pc); - return call_src; -} - -static inline void -emit_pause(struct jit_state* state) -{ - emit1(state, 0xf3); - emit1(state, 0x90); -} - -static inline void -emit_dispatched_external_helper_call(struct jit_state* state, unsigned int idx) -{ - /* - * Note: We do *not* have to preserve any x86-64 registers here ... - * ... according to the SystemV ABI: rbx (eBPF6), - * r13 (eBPF7), - * r14 (eBPF8), - * r15 (eBPF9), and - * rbp (eBPF10) are all preserved. - * ... according to the Windows ABI: r15 (eBPF6) - * rdi (eBPF7), - * rsi (eBPF8), - * rbx (eBPF9), and - * rbp (eBPF10) are all preserved. - * - * When we enter here, our stack is 16-byte aligned. Keep - * it that way! - */ - - /* - * There are two things that could happen: - * 1. The user has registered an external dispatcher and we need to - * send control there to invoke an external helper. - * 2. The user is relying on the default dispatcher to pass control - * to the registered external helper. - * To determine which action to take, we will first consider the 8 - * bytes at TARGET_PC_EXTERNAL_DISPATCHER. If those 8 bytes have an - * address, that represents the address of the user-registered external - * dispatcher and we pass control there. That function signature looks like - * uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, unsigned int index, void* cookie - * so we make sure that the arguments are done properly depending on the abi. - * - * If there is no external dispatcher registered, the user is expected - * to have registered a handler with us for the helper with index idx. - * There is a table of MAX_ function pointers starting at TARGET_LOAD_HELPER_TABLE. - * Each of those functions has a signature that looks like - * uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, void* cookie - * We load the appropriate function pointer by using idx to index it and then - * make sure that the arguments are set properly depending on the abi. - */ - - // Save register where volatile context is stored. - emit_push(state, VOLATILE_CTXT); - emit_push(state, VOLATILE_CTXT); - // ^^ Stack is aligned here. - -#if defined(_WIN32) - /* Because we may need 24 bytes on the stack but at least 16, we have to take 32 - * to keep alignment happy. We may ultimately need it all, but we certainly - * need 16! Later, though, there is a push that always happens (MARKER2), so - * we only allocate 24 here. - */ - emit_alu64_imm32(state, 0x81, 5, RSP, 3 * sizeof(uint64_t)); -#endif - - emit_rip_relative_load(state, RAX, TARGET_PC_EXTERNAL_DISPATCHER); - // cmp rax, 0 - emit_cmp_imm32(state, RAX, 0); - // jne skip_default_dispatcher_label - uint32_t skip_default_dispatcher_source = emit_jcc(state, 0x85, 0); - - // Default dispatcher: - - // Load the address of the helper function from the table. - // mov rax, idx - emit_alu32(state, 0xc7, 0, RAX); - emit4(state, idx); - // shl rax, 3 (i.e., multiply the index by 8 because addresses are that size on x86-64) - emit_alu64_imm8(state, 0xc1, 4, RAX, 3); - - // lea r10, [rip + HELPER TABLE ADDRESS] - emit_rip_relative_lea(state, R10, TARGET_LOAD_HELPER_TABLE); - - // add rax, r10 - emit_alu64(state, 0x01, R10, RAX); - // load rax, [rax] - emit_load(state, S64, RAX, RAX, 0); - - // There is no index for the registered helper function. They just get - // 5 arguments and a context, which becomes the 6th argument to the function ... -#if defined(_WIN32) - // and spills to the stack on Windows. - // mov qword [rsp], VOLATILE_CTXT - emit1(state, 0x4c); - emit1(state, 0x89); - emit1(state, 0x5c); - emit1(state, 0x24); - emit1(state, 0x00); -#else - // and goes in R9 on SystemV. - emit_mov(state, VOLATILE_CTXT, R9); -#endif - - // jmp call_label - uint32_t skip_external_dispatcher_source = emit_jmp(state, 0); - - // External dispatcher: - - // skip_default_dispatcher_label: - emit_jump_target(state, skip_default_dispatcher_source); - - // Using an external dispatcher. They get a total of 7 arguments. The - // 6th argument is the index of the function to call which ... - -#if defined(_WIN32) - // and spills to the stack on Windows. - - // mov qword [rsp + 8], VOLATILE_CTXT - emit1(state, 0x4c); - emit1(state, 0x89); - emit1(state, 0x5c); - emit1(state, 0x24); - emit1(state, 0x08); - - // To make it easier on ourselves, let's just use - // VOLATILE_CTXT register to load the immediate - // and push to the stack. - emit_load_imm(state, VOLATILE_CTXT, (uint64_t)idx); - - // mov qword [rsp + 0], VOLATILE_CTXT - emit1(state, 0x4c); - emit1(state, 0x89); - emit1(state, 0x5c); - emit1(state, 0x24); - emit1(state, 0x00); -#else - // and goes in R9 on SystemV. - emit_load_imm(state, R9, (uint64_t)idx); - // And the 7th is already spilled to the stack in the right spot because - // we wanted to save it -- cool (see MARKER1, above). - - // Intentional no-op for 7th argument. -#endif - - // Control flow converges for call: - - // call_label: - emit_jump_target(state, skip_external_dispatcher_source); - -#if defined(_WIN32) - /* Windows x64 ABI spills 5th parameter to stack (MARKER2) */ - emit_push(state, map_register(5)); - - /* Windows x64 ABI requires home register space. - * Allocate home register space - 4 registers. - */ - emit_alu64_imm32(state, 0x81, 5, RSP, 4 * sizeof(uint64_t)); -#endif - -#ifndef UBPF_DISABLE_RETPOLINES - emit_call(state, TARGET_PC_RETPOLINE); -#else - /* TODO use direct call when possible */ - /* callq *%rax */ - emit1(state, 0xff); - // ModR/M byte: b11010000b = xd - // ^ - // register-direct addressing. - // ^ - // opcode extension (2) - // ^ - // rax is register 0 - emit1(state, 0xd0); -#endif - - // The result is in RAX. Nothing to do there. - // Just rationalize the stack! - -#if defined(_WIN32) - /* Deallocate home register space + (up to ) 3 spilled parameters + alignment space */ - emit_alu64_imm32(state, 0x81, 0, RSP, (4 + 3 + 1) * sizeof(uint64_t)); -#endif - - emit_pop(state, VOLATILE_CTXT); // Restore register where volatile context is stored. - emit_pop(state, VOLATILE_CTXT); // Restore register where volatile context is stored. -} - -#define X64_ALU_ADD 0x01 -#define X64_ALU_OR 0x09 -#define X64_ALU_AND 0x21 -#define X64_ALU_XOR 0x31 - -#define IS_64BIT 1 -#define IS_32BIT 0 - -/** - * @brief Emit an atomic ALU operation without fetch. - * - * This applies the operation to the memory location identified by the destination register and offset, using the source - * register as the operand. - * - * @param[in,out] state The JIT state - * @param[in] opcode The x64 opcode for the ALU operation - * @param[in] is_64bit Whether the operation is 64-bit or 32-bit - * @param[in] src Source register - * @param[in] dst Destination register - * @param[in] offset Offset from the destination register - */ -static inline void -emit_atomic_alu(struct jit_state* state, int opcode, int is_64bit, int src, int dst, int offset) -{ - emit1(state, 0xf0); // lock prefix - emit_basic_rex(state, is_64bit, src, dst); - emit1(state, opcode); - emit_modrm_and_displacement(state, src, dst, offset); -} - -/** - * @brief Atomically compare and exchange. - * This writes the value stored in src into the memory location identified by dst and offset, if the value at that - * location is equal to the value in RAX. - * - * @param[in,out] state The JIT state - * @param[in] is_64bit Whether the operation is 64-bit or 32-bit - * @param[in] src The value to write. - * @param[in] dst The base address of the destination memory location. - * @param[in] offset The offset from dst. - * @note This implicitly uses RAX as the original value to compare against and stores the original value in [destination - * + offset] into RAX. - */ -static inline void -emit_atomic_cmp_exch_with_rax(struct jit_state* state, int is_64bit, int src, int dst, int offset) -{ - emit1(state, 0xf0); // lock prefix - emit_basic_rex(state, is_64bit, src, dst); - emit1(state, 0x0f); - emit1(state, 0xb1); - emit_modrm_and_displacement(state, src, dst, offset); -} - -/** - * @brief Atomically swaps the value of src into the memory location identified by dst and offset and sets src to the - * original value at that location. - * - * @param[in,out] state The JIT state - * @param[in] is_64bit Whether the operation is 64-bit or 32-bit - * @param[in] src The value to write. - * @param[in] dst The base address of the destination memory location. - * @param[in] offset The offset from the destination memory location. - */ -static inline void -emit_atomic_exchange(struct jit_state* state, int is_64bit, int src, int dst, int offset) -{ - emit1(state, 0xf0); // lock prefix - emit_basic_rex(state, is_64bit, src, dst); - emit1(state, 0x87); - emit_modrm_and_displacement(state, src, dst, offset); -} - -/** - * @brief Perform an ALU operation atomically and fetch the result. - * - * @param[in,out] state The JIT state - * @param[in] is_64bit Whether the operation is 64-bit or 32-bit - * @param[in] opcode The x64 opcode for the ALU operation - * @param[in] src The source register, used as the operand for the ALU operation. - * @param[in] dst The base address of the destination memory location. - * @param[in] offset The offset from the destination memory location. - * @note This operation stores the original value in [destination + offset] into RAX. - * @note This operation is emulated on x64 for 64-bit and 32-bit operations as the x64 architecture does not have an - * atomic fetch-and, fetch-or, fetch-xor, or fetch-add instruction. - */ -static inline void -emit_atomic_fetch_alu(struct jit_state* state, int is_64bit, int opcode, int src, int dst, int offset) -{ - // x64 lacks a 64-bit atomic version of some alu-fetch instruction, so we emulate it with a compare-exchange. - // This is not a problem because the compare-exchange instruction is a full memory barrier. - - // Compare exchange overwrites RAX, so we need to save it. - emit_push(state, RAX); - - // Load the original value at the destination into RAX. - emit_load(state, is_64bit ? S64 : S32, dst, RAX, offset); - - // Loop until we successfully update the value. - uint32_t loop_start = state->offset; - - // Copy RAX to RCX - emit_mov(state, RAX, RCX); - - // Perform the ALU operation into RCX. - emit_alu64(state, opcode, src, RCX); - - // Attempt to compare-exchange the value. - emit_atomic_cmp_exch_with_rax(state, is_64bit, RCX, dst, offset); - - // If the compare-exchange failed, loop. - emit1(state, 0x75); - emit1(state, loop_start - state->offset - 1); - - // Move RAX into the src register - emit_mov(state, RAX, src); - - // Restore RAX - emit_pop(state, RAX); -} - -static inline void -emit_atomic_add64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_ADD, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_add32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_ADD, IS_32BIT, src, dst, offset); -} - -static inline void -emit_atomic_and64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_AND, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_and32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_AND, IS_32BIT, src, dst, offset); -} - -static inline void -emit_atomic_or64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_OR, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_or32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_OR, IS_32BIT, src, dst, offset); -} - -static inline void -emit_atomic_xor64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_XOR, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_xor32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_alu(state, X64_ALU_XOR, IS_32BIT, src, dst, offset); -} - - -static inline void -emit_atomic_compare_exchange64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_cmp_exch_with_rax(state, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_compare_exchange32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_cmp_exch_with_rax(state, IS_32BIT, src, dst, offset); -} - - -static inline void -emit_atomic_exchange64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_exchange(state, IS_64BIT, src, dst, offset); -} - -static inline void -emit_atomic_exchange32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_exchange(state, IS_32BIT, src, dst, offset); -} - -static inline void -emit_atomic_fetch_add64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_ADD, src, dst, offset); -} - -static inline void -emit_atomic_fetch_add32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_ADD, src, dst, offset); -} - -static inline void -emit_atomic_fetch_and64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_AND, src, dst, offset); -} - -static inline void -emit_atomic_fetch_and32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_AND, src, dst, offset); -} - -static inline void -emit_atomic_fetch_or64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_OR, src, dst, offset); -} - -static inline void -emit_atomic_fetch_or32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_OR, src, dst, offset); -} - -static inline void -emit_atomic_fetch_xor64(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_64BIT, X64_ALU_XOR, src, dst, offset); -} - -static inline void -emit_atomic_fetch_xor32(struct jit_state* state, int src, int dst, int offset) -{ - emit_atomic_fetch_alu(state, IS_32BIT, X64_ALU_XOR, src, dst, offset); -} - -#endif