diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index fd7a74902..764077299 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -94,7 +94,8 @@ struct MarshalVisitor { } public: - std::functionint16_t> label_to_offset; + std::functionint16_t> label_to_offset16; + std::functionint32_t> label_to_offset32; vector operator()(Undefined const& a) { assert(false); @@ -179,7 +180,7 @@ struct MarshalVisitor { .opcode = static_cast(INST_CLS_JMP | (op(b.cond->op) << 4)), .dst = b.cond->left.v, .src = 0, - .offset = label_to_offset(b.target), + .offset = label_to_offset16(b.target), }; visit(overloaded{[&](Reg right) { res.opcode |= INST_SRC_REG; @@ -189,7 +190,11 @@ struct MarshalVisitor { b.cond->right); return {res}; } else { - return {ebpf_inst{.opcode = INST_OP_JA, .dst = 0, .src = 0, .offset = label_to_offset(b.target), .imm = 0}}; + int32_t imm = label_to_offset32(b.target); + if (imm != 0) + return {ebpf_inst{.opcode = INST_OP_JA32, .imm = imm}}; + else + return {ebpf_inst{.opcode = INST_OP_JA16, .offset = label_to_offset16(b.target)}}; } } @@ -252,7 +257,9 @@ struct MarshalVisitor { } }; -vector marshal(const Instruction& ins, pc_t pc) { return std::visit(MarshalVisitor{label_to_offset(pc)}, ins); } +vector marshal(const Instruction& ins, pc_t pc) { + return std::visit(MarshalVisitor{label_to_offset16(pc), label_to_offset32(pc)}, ins); +} vector marshal(const vector& insts) { vector res; diff --git a/src/asm_ostream.hpp b/src/asm_ostream.hpp index 7ab2c059e..e36f25fa6 100644 --- a/src/asm_ostream.hpp +++ b/src/asm_ostream.hpp @@ -11,8 +11,21 @@ #include "asm_syntax.hpp" -inline std::function label_to_offset(pc_t pc) { - return [=](const label_t& label) { return label.from - pc - 1; }; +// We use a 16-bit offset whenever it fits in 16 bits. +inline std::function label_to_offset16(pc_t pc) { + return [=](const label_t& label) { + int64_t offset = label.from - (int64_t)pc - 1; + return (offset >= INT16_MIN && offset <= INT16_MAX) ? (int16_t)offset : 0; + }; +} + +// We use the JA32 opcode with the offset in 'imm' when the offset +// of an unconditional jump doesn't fit in a int16_t. +inline std::function label_to_offset32(pc_t pc) { + return [=](const label_t& label) { + int64_t offset = label.from - (int64_t)pc - 1; + return (offset >= INT16_MIN && offset <= INT16_MAX) ? 0 : (int32_t)offset; + }; } std::ostream& operator<<(std::ostream& os, const btf_line_info_t& line_info); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 5490bb933..55c0b082b 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -336,7 +336,8 @@ using InstructionSeq = std::vector; #define DECLARE_EQ1(T, f1) \ inline bool operator==(T const& a, T const& b) { return a.f1 == b.f1; } -using pc_t = uint16_t; +// cpu=v4 supports 32-bit PC offsets so we need a large enough type. +using pc_t = size_t; // Helpers: diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index d2e100e14..8b63f5a97 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -381,16 +381,19 @@ struct Unmarshaller { throw InvalidInstruction(pc, "Bad instruction"); return Exit{}; case 0x0: - if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP) + if ((inst.opcode & INST_CLS_MASK) != INST_CLS_JMP && + (inst.opcode & INST_CLS_MASK) != INST_CLS_JMP32) throw InvalidInstruction(pc, "Bad instruction"); default: { - pc_t new_pc = pc + 1 + inst.offset; + int32_t offset = (inst.opcode == INST_OP_JA32) ? inst.imm : inst.offset; + pc_t new_pc = pc + 1 + offset; if (new_pc >= insts.size()) throw InvalidInstruction(pc, "jump out of bounds"); else if (insts[new_pc].opcode == 0) throw InvalidInstruction(pc, "jump to middle of lddw"); - auto cond = inst.opcode == INST_OP_JA ? std::optional{} + auto cond = (inst.opcode == INST_OP_JA16 || inst.opcode == INST_OP_JA32) + ? std::optional{} : Condition{ .op = getJmpOp(pc, inst.opcode), .left = Reg{inst.dst}, @@ -400,7 +403,7 @@ struct Unmarshaller { }; return Jmp{ .cond = cond, - .target = label_t{new_pc}, + .target = label_t{(int)new_pc}, }; } } diff --git a/src/ebpf_vm_isa.hpp b/src/ebpf_vm_isa.hpp index 0f1f0974c..654e34525 100644 --- a/src/ebpf_vm_isa.hpp +++ b/src/ebpf_vm_isa.hpp @@ -54,7 +54,8 @@ enum { INST_OP_LDDW_IMM = (INST_CLS_LD | INST_SRC_IMM | INST_SIZE_DW), // Special - INST_OP_JA = (INST_CLS_JMP | 0x00), + INST_OP_JA32 = (INST_CLS_JMP32 | 0x00), + INST_OP_JA16 = (INST_CLS_JMP | 0x00), INST_OP_CALL = (INST_CLS_JMP | 0x80), INST_OP_EXIT = (INST_CLS_JMP | 0x90), diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 8be56961e..083d5e3fa 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -6,6 +6,25 @@ #include "asm_marshal.hpp" #include "asm_unmarshal.hpp" +// Verify that if we unmarshal an instruction and then re-marshal it, +// we get what we expect. +static void compare_unmarshal_marshal(const ebpf_inst& ins, const ebpf_inst& expected_result) { + program_info info{.platform = &g_ebpf_platform_linux, + .type = g_ebpf_platform_linux.get_program_type("unspec", "unspec")}; + const ebpf_inst exit{.opcode = INST_OP_EXIT}; + InstructionSeq parsed = std::get(unmarshal(raw_program{"", "", {ins, exit, exit}, info})); + REQUIRE(parsed.size() == 3); + auto [_, single, _2] = parsed.front(); + (void)_; // unused + (void)_2; // unused + std::vector marshaled = marshal(single, 0); + REQUIRE(marshaled.size() == 1); + ebpf_inst result = marshaled.back(); + REQUIRE(memcmp(&expected_result, &result, sizeof(result)) == 0); +} + +// Verify that if we marshal an instruction and then unmarshal it, +// we get the original. static void compare_marshal_unmarshal(const Instruction& ins, bool double_cmd = false) { program_info info{.platform = &g_ebpf_platform_linux, .type = g_ebpf_platform_linux.get_program_type("unspec", "unspec")}; @@ -81,6 +100,13 @@ TEST_CASE("disasm_marshal", "[disasm][marshal]") { // Condition::Op::NSET, does not exist in ebpf Condition::Op::NE, Condition::Op::SGT, Condition::Op::SGE, Condition::Op::LT, Condition::Op::LE, Condition::Op::SLT, Condition::Op::SLE}; + SECTION("goto offset") { + ebpf_inst jmp_offset{.opcode = INST_OP_JA16, .offset = 1}; + compare_unmarshal_marshal(jmp_offset, jmp_offset); + + // JA32 +1 is equivalent to JA16 +1 since the offset fits in 16 bits. + compare_unmarshal_marshal(ebpf_inst{.opcode = INST_OP_JA32, .imm = 1}, jmp_offset); + } SECTION("Reg right") { for (auto op : ops) { Condition cond{.op = op, .left = Reg{1}, .right = Reg{2}}; @@ -241,7 +267,7 @@ TEST_CASE("fail unmarshal", "[disasm][marshal]") { "0: Bad instruction\n"); check_unmarshal_fail(ebpf_inst{.opcode = (INST_MEM_UNUSED << 5) | INST_SIZE_W | INST_CLS_LDX, .imm = 8}, "0: Bad instruction\n"); - check_unmarshal_fail(ebpf_inst{.opcode = INST_CLS_JMP32}, "0: Bad instruction\n"); + check_unmarshal_fail(ebpf_inst{.opcode = INST_CLS_JMP32}, "0: jump out of bounds\n"); check_unmarshal_fail(ebpf_inst{.opcode = 0x90 | INST_CLS_JMP32}, "0: Bad instruction\n"); check_unmarshal_fail(ebpf_inst{.opcode = 0x10 | INST_CLS_JMP32}, "0: jump out of bounds\n"); }