vbpf · elazarg · Dec 29, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp
@@ -72,11 +72,14 @@ static uint8_t imm(Un::Op op) {
     switch (op) {
     case Op::NEG: return 0;
     case Op::BE16:
-    case Op::LE16: return 16;
+    case Op::LE16:
+    case Op::SWAP16: return 16;
     case Op::BE32:
-    case Op::LE32: return 32;
+    case Op::LE32:
+    case Op::SWAP32: return 32;
     case Op::BE64:
-    case Op::LE64: return 64;
+    case Op::LE64:
+    case Op::SWAP64: return 64;
     }
     assert(false);
     return {};
@@ -156,6 +159,16 @@ struct MarshalVisitor {
                 .offset = 0,
                 .imm = imm(b.op),
             }};
+        case Un::Op::SWAP16:
+        case Un::Op::SWAP32:
+        case Un::Op::SWAP64:
+            return {ebpf_inst{
+                .opcode = static_cast<uint8_t>(INST_CLS_ALU64 | (0xd << 4)),
+                .dst = b.dst.v,
+                .src = 0,
+                .offset = 0,
+                .imm = imm(b.op),
+            }};
         }
         assert(false);
         return {};

diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp
@@ -224,6 +224,9 @@ struct InstructionPrinterVisitor {
         case Un::Op::LE16: os_ << "le16 "; break;
         case Un::Op::LE32: os_ << "le32 "; break;
         case Un::Op::LE64: os_ << "le64 "; break;
+        case Un::Op::SWAP16: os_ << "swap16 "; break;
+        case Un::Op::SWAP32: os_ << "swap32 "; break;
+        case Un::Op::SWAP64: os_ << "swap64 "; break;
         case Un::Op::NEG: os_ << "-"; break;
         }
         os_ << b.dst;

diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp
@@ -33,7 +33,7 @@ using crab::linear_expression_t;
 #define OPASSIGN R"_(\s*(\S*)=\s*)_"
 #define ASSIGN R"_(\s*=\s*)_"
 #define LONGLONG R"_(\s*(ll|)\s*)_"
-#define UNOP R"_((-|be16|be32|be64))_"
+#define UNOP R"_((-|be16|be32|be64|le16|le32|le64|swap16|swap32|swap64))_"
 
 #define PLUSMINUS R"_((\s*[+-])\s*)_"
 #define LPAREN R"_(\s*\(\s*)_"
@@ -69,6 +69,9 @@ static const std::map<std::string, Un::Op> str_to_unop = {
     {"le16", Un::Op::LE16},
     {"le32", Un::Op::LE32},
     {"le64", Un::Op::LE64},
+    {"swap16", Un::Op::SWAP16},
+    {"swap32", Un::Op::SWAP32},
+    {"swap64", Un::Op::SWAP64},
     {"-", Un::Op::NEG},
 };
 

diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp
@@ -108,6 +108,9 @@ struct Un {
         LE16, // dst = htole16(dst)
         LE32, // dst = htole32(dst)
         LE64, // dst = htole64(dst)
+        SWAP16, // dst = bswap16(dst)
+        SWAP32, // dst = bswap32(dst)
+        SWAP64, // dst = bswap64(dst)
         NEG,  // dst = -dst
     };
 

diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp
@@ -122,14 +122,20 @@ struct Unmarshaller {
                 note("arsh32 is not allowed");
             return Bin::Op::ARSH;
         case INST_ALU_OP_END:
+            if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64) {
+                if (inst.opcode & INST_END_BE)
+                    throw InvalidInstruction(pc, "invalid endian immediate");
+                switch (inst.imm) {
+                case 16: return Un::Op::SWAP16;
+                case 32: return Un::Op::SWAP32;
+                case 64: return Un::Op::SWAP64;
+                default: throw InvalidInstruction(pc, "invalid endian immediate");
+                }
+            }
             switch (inst.imm) {
             case 16: return (inst.opcode & INST_END_BE) ? Un::Op::BE16 : Un::Op::LE16;
-            case 32:
-                if ((inst.opcode & INST_CLS_MASK) == INST_CLS_ALU64)
-                    throw InvalidInstruction(pc, "invalid endian immediate 32 for 64 bit instruction");
-                return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32;
-            case 64:
-                return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64;
+            case 32: return (inst.opcode & INST_END_BE) ? Un::Op::BE32 : Un::Op::LE32;
+            case 64: return (inst.opcode & INST_END_BE) ? Un::Op::BE64 : Un::Op::LE64;
             default:
                 throw InvalidInstruction(pc, "invalid endian immediate");
             }

diff --git a/src/crab/ebpf_domain.cpp b/src/crab/ebpf_domain.cpp
@@ -1484,6 +1484,18 @@ void ebpf_domain_t::operator()(const Un& stmt) {
         swap_endianness(dst.svalue, int64_t(0), boost::endian::native_to_little<int64_t>);
         swap_endianness(dst.svalue, uint64_t(0), boost::endian::native_to_little<uint64_t>);
         break;
+    case Un::Op::SWAP16:
+        swap_endianness(dst.svalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+        swap_endianness(dst.uvalue, uint16_t(0), boost::endian::endian_reverse<uint16_t>);
+        break;
+    case Un::Op::SWAP32:
+        swap_endianness(dst.svalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+        swap_endianness(dst.uvalue, uint32_t(0), boost::endian::endian_reverse<uint32_t>);
+        break;
+    case Un::Op::SWAP64:
+        swap_endianness(dst.svalue, int64_t(0), boost::endian::endian_reverse<int64_t>);
+        swap_endianness(dst.uvalue, uint64_t(0), boost::endian::endian_reverse<uint64_t>);
+        break;
     case Un::Op::NEG:
         neg(dst.svalue, dst.uvalue, stmt.is64 ? 64 : 32);
         havoc_offsets(stmt.dst);

diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp
@@ -47,7 +47,9 @@ static void check_unmarshal_fail(ebpf_inst inst, std::string expected_error_mess
     program_info info{.platform = &g_ebpf_platform_linux,
                       .type = g_ebpf_platform_linux.get_program_type("unspec", "unspec")};
     std::vector<ebpf_inst> insns = {inst};
-    std::string error_message = std::get<std::string>(unmarshal(raw_program{"", "", insns, info}));
+    auto result = unmarshal(raw_program{"", "", insns, info});
+    REQUIRE(std::holds_alternative<std::string>(result));
+    std::string error_message = std::get<std::string>(result);
     REQUIRE(error_message == expected_error_message);
 }
 
@@ -270,4 +272,6 @@ TEST_CASE("fail unmarshal", "[disasm][marshal]") {
     check_unmarshal_fail(ebpf_inst{.opcode = INST_CLS_JMP32}, "0: jump out of bounds\n");
     check_unmarshal_fail(ebpf_inst{.opcode = 0x90 | INST_CLS_JMP32}, "0: Bad instruction\n");
     check_unmarshal_fail(ebpf_inst{.opcode = 0x10 | INST_CLS_JMP32}, "0: jump out of bounds\n");
+    check_unmarshal_fail(ebpf_inst{.opcode = INST_ALU_OP_END | INST_CLS_ALU, .imm = 0}, "0: invalid endian immediate\n");
+    check_unmarshal_fail(ebpf_inst{.opcode = INST_ALU_OP_END | INST_CLS_ALU64, .imm = 0}, "0: invalid endian immediate\n");
 }
diff --git a/test-data/unop.yaml b/test-data/unop.yaml
@@ -40,3 +40,123 @@ code:
     r1 = -r1
 
 post: ["r1.type=number", "r1.svalue=[-5, 5]"]
+---
+test-case: be16 singleton
+
+pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"]
+
+code:
+  <start>: |
+    r1 = be16 r1 ; 0x654321 -> 0x2143
+
+post: ["r1.type=number", "r1.svalue=8515", "r1.uvalue=8515"]
+---
+test-case: be32 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = be32 r1 ; 0x0987654321 -> 0x21436587
+
+post: ["r1.type=number", "r1.svalue=558065031", "r1.uvalue=558065031"]
+---
+test-case: be64 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = be64 r1 ; 0x0987654321 -> 0x2143658709000000
+
+post: ["r1.type=number", "r1.svalue=2396871057337221120", "r1.uvalue=2396871057337221120"]
+---
+test-case: be16 range
+
+pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"]
+
+code:
+  <start>: |
+    r1 = be16 r1 ; [0x0000, 0x0002] -> [0x0000, 0x2000] but currently we just lose the range
+
+post: ["r1.type=number"]
+---
+test-case: le16 singleton
+
+pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"]
+
+code:
+  <start>: |
+    r1 = le16 r1 ; 0x654321 -> 0x4321
+
+post: ["r1.type=number", "r1.svalue=17185", "r1.uvalue=17185"]
+---
+test-case: le32 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = le32 r1 ; 0x0987654321 -> 0x87654321
+
+post: ["r1.type=number", "r1.svalue=2271560481", "r1.uvalue=2271560481"]
+---
+test-case: le64 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = le64 r1 ; 0x0987654321 -> 0x2143658709000000
+
+post: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+---
+test-case: le16 range
+
+pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"]
+
+code:
+  <start>: |
+    r1 = le16 r1 ; this could preserve the range but we don't support that yet
+
+post: ["r1.type=number"]
+---
+test-case: swap16 singleton
+
+pre: ["r1.type=number", "r1.svalue=6636321", "r1.uvalue=6636321"]
+
+code:
+  <start>: |
+    r1 = swap16 r1 ; 0x654321 -> 0x2143
+
+post: ["r1.type=number", "r1.svalue=8515", "r1.uvalue=8515"]
+---
+test-case: swap32 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = swap32 r1 ; 0x0987654321 -> 0x21436587
+
+post: ["r1.type=number", "r1.svalue=558065031", "r1.uvalue=558065031"]
+---
+test-case: swap64 singleton
+
+pre: ["r1.type=number", "r1.svalue=40926266145", "r1.uvalue=40926266145"]
+
+code:
+  <start>: |
+    r1 = swap64 r1 ; 0x0987654321 -> 0x2143658709000000
+
+post: ["r1.type=number", "r1.svalue=2396871057337221120", "r1.uvalue=2396871057337221120"]
+---
+test-case: swap16 range
+
+pre: ["r1.type=number", "r1.svalue=[0, 2]", "r1.uvalue=[0, 2]", "r1.svalue=r1.uvalue"]
+
+code:
+  <start>: |
+    r1 = swap16 r1 ; [0x0000, 0x0002] -> [0x0000, 0x2000] but currently we just lose the range
+
+post: ["r1.type=number"]