diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 764077299..1a308bc93 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -72,11 +72,14 @@ static uint8_t imm(Un::Op op) { switch (op) { case Op::NEG: return 0; case Op::BE16: - case Op::LE16: return 16; + case Op::LE16: + case Op::SWAP16: return 16; case Op::BE32: - case Op::LE32: return 32; + case Op::LE32: + case Op::SWAP32: return 32; case Op::BE64: - case Op::LE64: return 64; + case Op::LE64: + case Op::SWAP64: return 64; } assert(false); return {}; @@ -156,6 +159,16 @@ struct MarshalVisitor { .offset = 0, .imm = imm(b.op), }}; + case Un::Op::SWAP16: + case Un::Op::SWAP32: + case Un::Op::SWAP64: + return {ebpf_inst{ + .opcode = static_cast(INST_CLS_ALU64 | (0xd << 4)), + .dst = b.dst.v, + .src = 0, + .offset = 0, + .imm = imm(b.op), + }}; } assert(false); return {}; diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 9f8c12254..3c8693954 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -224,6 +224,9 @@ struct InstructionPrinterVisitor { case Un::Op::LE16: os_ << "le16 "; break; case Un::Op::LE32: os_ << "le32 "; break; case Un::Op::LE64: os_ << "le64 "; break; + case Un::Op::SWAP16: os_ << "swap16 "; break; + case Un::Op::SWAP32: os_ << "swap32 "; break; + case Un::Op::SWAP64: os_ << "swap64 "; break; case Un::Op::NEG: os_ << "-"; break; } os_ << b.dst; diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 9f4e42c1c..a60fe6395 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -33,7 +33,7 @@ using crab::linear_expression_t; #define OPASSIGN R"_(\s*(\S*)=\s*)_" #define ASSIGN R"_(\s*=\s*)_" #define LONGLONG R"_(\s*(ll|)\s*)_" -#define UNOP R"_((-|be16|be32|be64))_" +#define UNOP R"_((-|be16|be32|be64|le16|le32|le64|swap16|swap32|swap64))_" #define PLUSMINUS R"_((\s*[+-])\s*)_" #define LPAREN R"_(\s*\(\s*)_" @@ -69,6 +69,9 @@ static const std::map str_to_unop = { {"le16", Un::Op::LE16}, {"le32", Un::Op::LE32}, {"le64", Un::Op::LE64}, + {"swap16", Un::Op::SWAP16}, + {"swap32", Un::Op::SWAP32}, + {"swap64", Un::Op::SWAP64}, {"-", Un::Op::NEG}, }; diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 55c0b082b..c8e621161 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -108,6 +108,9 @@ struct Un { LE16, // dst = htole16(dst) LE32, // dst = htole32(dst) LE64, // dst = htole64(dst) + SWAP16, // dst = bswap16(dst) + SWAP32, // dst = bswap32(dst) + SWAP64, // dst = bswap64(dst) NEG, // dst = -dst }; diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index 8b63f5a97..102f1fa80 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -122,14 +122,20 @@ struct Unmarshaller { note("arsh32 is not allowed"); return Bin::Op::ARSH; case INST_ALU_OP_END: + if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64) { + if (inst.opcode & INST_END_BE) + throw InvalidInstruction(pc, "invalid endian immediate"); + switch (inst.imm) { + case 16: return Un::Op::SWAP16; + case 32: return Un::Op::SWAP32; + case 64: return Un::Op::SWAP64; + default: throw InvalidInstruction(pc, "invalid endian immediate"); + } + } switch (inst.imm) { case 16: return (inst.opcode & INST_END_BE) ? Un::Op::BE16 : Un::Op::LE16; - case 32: - if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64) - throw InvalidInstruction(pc, "invalid endian immediate 32 for 64 bit instruction"); - return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32; - case 64: - return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64; + case 32: return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32; + case 64: return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64; default: throw InvalidInstruction(pc, "invalid endian immediate"); } diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp index b511de0a7..bce79438e 100644 --- a/src/crab/ebpf_domain.cpp +++ b/src/crab/ebpf_domain.cpp @@ -1484,6 +1484,18 @@ void ebpf_domain_t::operator()(const Un& stmt) { swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_little); swap_endianness(dst.svalue, uint64_t(0), boost::endian::native_to_little); break; + case Un::Op::SWAP16: + swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse); + swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse); + break; + case Un::Op::SWAP32: + swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse); + swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse); + break; + case Un::Op::SWAP64: + swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse); + swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse); + break; case Un::Op::NEG: neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32); havoc_offsets(stmt.dst); diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 083d5e3fa..33773791f 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -47,7 +47,9 @@ static void check_unmarshal_fail(ebpf_inst inst, std::string expected_error_mess program_info info{.platform = &g_ebpf_platform_linux, .type = g_ebpf_platform_linux.get_program_type("unspec", "unspec")}; std::vector insns = {inst}; - std::string error_message = std::get(unmarshal(raw_program{"", "", insns, info})); + auto result = unmarshal(raw_program{"", "", insns, info}); + REQUIRE(std::holds_alternative(result)); + std::string error_message = std::get(result); REQUIRE(error_message == expected_error_message); } @@ -270,4 +272,6 @@ TEST_CASE("fail unmarshal", "[disasm][marshal]") { check_unmarshal_fail(ebpf_inst{.opcode = INST_CLS_JMP32}, "0: jump out of bounds\n"); check_unmarshal_fail(ebpf_inst{.opcode = 0x90 | INST_CLS_JMP32}, "0: Bad instruction\n"); check_unmarshal_fail(ebpf_inst{.opcode = 0x10 | INST_CLS_JMP32}, "0: jump out of bounds\n"); + check_unmarshal_fail(ebpf_inst{.opcode = INST_ALU_OP_END | INST_CLS_ALU, .imm = 0}, "0: invalid endian immediate\n"); + check_unmarshal_fail(ebpf_inst{.opcode = INST_ALU_OP_END | INST_CLS_ALU64, .imm = 0}, "0: invalid endian immediate\n"); } diff --git a/test-data/unop.yaml b/test-data/unop.yaml index 2c23f914f..180ff41ca 100644 --- a/test-data/unop.yaml +++ b/test-data/unop.yaml @@ -40,3 +40,123 @@ code: r1 = -r1 post: ["r1.type=number", "r1.svalue=[-5, 5]"] +--- +test-case: be16 singleton + +pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"] + +code: + : | + r1 = be16 r1 ; 0x654321 -> 0x2143 + +post: ["r1.type=number", "r1.svalue=8515", "r1.uvalue=8515"] +--- +test-case: be32 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = be32 r1 ; 0x0987654321 -> 0x21436587 + +post: ["r1.type=number", "r1.svalue=558065031", "r1.uvalue=558065031"] +--- +test-case: be64 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = be64 r1 ; 0x0987654321 -> 0x2143658709000000 + +post: ["r1.type=number", "r1.svalue=2396871057337221120", "r1.uvalue=2396871057337221120"] +--- +test-case: be16 range + +pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"] + +code: + : | + r1 = be16 r1 ; [0x0000, 0x0002] -> [0x0000, 0x2000] but currently we just lose the range + +post: ["r1.type=number"] +--- +test-case: le16 singleton + +pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"] + +code: + : | + r1 = le16 r1 ; 0x654321 -> 0x4321 + +post: ["r1.type=number", "r1.svalue=17185", "r1.uvalue=17185"] +--- +test-case: le32 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = le32 r1 ; 0x0987654321 -> 0x87654321 + +post: ["r1.type=number", "r1.svalue=2271560481", "r1.uvalue=2271560481"] +--- +test-case: le64 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = le64 r1 ; 0x0987654321 -> 0x2143658709000000 + +post: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] +--- +test-case: le16 range + +pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"] + +code: + : | + r1 = le16 r1 ; this could preserve the range but we don't support that yet + +post: ["r1.type=number"] +--- +test-case: swap16 singleton + +pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"] + +code: + : | + r1 = swap16 r1 ; 0x654321 -> 0x2143 + +post: ["r1.type=number", "r1.svalue=8515", "r1.uvalue=8515"] +--- +test-case: swap32 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = swap32 r1 ; 0x0987654321 -> 0x21436587 + +post: ["r1.type=number", "r1.svalue=558065031", "r1.uvalue=558065031"] +--- +test-case: swap64 singleton + +pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"] + +code: + : | + r1 = swap64 r1 ; 0x0987654321 -> 0x2143658709000000 + +post: ["r1.type=number", "r1.svalue=2396871057337221120", "r1.uvalue=2396871057337221120"] +--- +test-case: swap16 range + +pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"] + +code: + : | + r1 = swap16 r1 ; [0x0000, 0x0002] -> [0x0000, 0x2000] but currently we just lose the range + +post: ["r1.type=number"]