From fe8a79f9a3b1d39d0965f5a55b46a065644b1db3 Mon Sep 17 00:00:00 2001 From: Will Hawkins Date: Mon, 23 Sep 2024 12:37:43 -0400 Subject: [PATCH] Spruce Up Disassembler (#550) Bring disassembler code into modern Python era by using f-strings for formatting output. Also, add an option for giving verbose information about the fields of the disassembled instructions. Finally, add the ability to print warnings when unused fields in the instruction contain non-zero values. Signed-off-by: Will Hawkins Co-authored-by: Alan Jowett --- bin/ubpf-disassembler | 4 +- ubpf/disassembler.py | 170 +++++++++++++++++++++++++++++------------- 2 files changed, 120 insertions(+), 54 deletions(-) diff --git a/bin/ubpf-disassembler b/bin/ubpf-disassembler index 10c49b9b1..7b5f92547 100755 --- a/bin/ubpf-disassembler +++ b/bin/ubpf-disassembler @@ -21,6 +21,7 @@ def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('input', type=argparse.FileType('rb'), default='-', nargs='?') parser.add_argument('output', type=argparse.FileType('w'), default='-', nargs='?') + parser.add_argument('--verbose', dest='verbose', action='store_true', default=False) args = parser.parse_args() if args.input.name == "" and hasattr(args.input, "buffer"): @@ -29,7 +30,8 @@ def main(): else: input_ = args.input.read() - args.output.write(ubpf.disassembler.disassemble(input_)) + print(f"{args.verbose=}") + args.output.write(ubpf.disassembler.disassemble(input_, args.verbose)) if __name__ == "__main__": main() diff --git a/ubpf/disassembler.py b/ubpf/disassembler.py index bd78b2555..903dc4323 100644 --- a/ubpf/disassembler.py +++ b/ubpf/disassembler.py @@ -70,10 +70,10 @@ BPF_CLASS_LDX = 1 BPF_CLASS_ST = 2 BPF_CLASS_STX = 3 -BPF_CLASS_ALU = 4 +BPF_CLASS_ALU32 = 4 BPF_CLASS_JMP = 5 BPF_CLASS_JMP32 = 6 -BPF_CLASS_ALU64 = 7 +BPF_CLASS_ALU = 7 BPF_ALU_NEG = 8 BPF_ALU_END = 13 @@ -96,94 +96,158 @@ def O(off): else: return "-" + str(65536-off) -def disassemble_one(data, offset): +def disassemble_one(data, offset, verbose = False): code, regs, off, imm = Inst.unpack_from(data, offset) dst_reg = regs & 0xf src_reg = (regs >> 4) & 0xf - cls = code & 7 + clz = code & 7 - class_name = CLASSES.get(cls) + increment = 8 - if cls == BPF_CLASS_ALU or cls == BPF_CLASS_ALU64: + class Field(object): + def __init__(self, name, value): + self.name = name + self.used = False + self.value = value + + def set_used(self): + self.used = True + + def set_unused(self): + self.used = False + + fields = {} + fields['off'] = Field("offset", off) + fields['dst_reg'] = Field("destination register", dst_reg) + fields['src_reg'] = Field("source register", src_reg) + fields['imm'] = Field("immediate", imm) + + disassembled = "" + + class_name = CLASSES.get(clz) + + if clz == BPF_CLASS_ALU or clz == BPF_CLASS_ALU32: source = (code >> 3) & 1 opcode = (code >> 4) & 0xf opcode_name = ALU_OPCODES.get(opcode) - if cls == BPF_CLASS_ALU: + if clz == BPF_CLASS_ALU32: opcode_name += "32" if opcode == BPF_ALU_END: opcode_name = source == 1 and "be" or "le" - return "%s%d %s" % (opcode_name, imm, R(dst_reg)) + fields["imm"].used = True + fields["dst_reg"].used = True + disassembled = f'{opcode_name}{imm} {R(dst_reg)}' elif opcode == BPF_ALU_NEG: - return "%s %s" % (opcode_name, R(dst_reg)) + fields["dst_reg"].used = True + disassembled = f'{opcode_name} {R(dst_reg)}' elif source == 0: - return "%s %s, %s" % (opcode_name, R(dst_reg), I(imm)) + fields["dst_reg"].used = True + fields["imm"].used = True + disassembled = f'{opcode_name} {R(dst_reg)}, {I(imm)}' else: - return "%s %s, %s" % (opcode_name, R(dst_reg), R(src_reg)) - elif cls == BPF_CLASS_JMP: + fields["dst_reg"].used = True + fields["src_reg"].used = True + disassembled = f'{opcode_name} {R(dst_reg)}, {R(src_reg)}' + elif clz == BPF_CLASS_JMP or clz == BPF_CLASS_JMP32: source = (code >> 3) & 1 opcode = (code >> 4) & 0xf opcode_name = JMP_OPCODES.get(opcode) + if clz == BPF_CLASS_JMP32: + opcode_name += "32" if opcode_name == "exit": - return opcode_name - elif opcode_name == "call": - if src_reg == 1: - opcode_name += " local" - return "%s %s" % (opcode_name, I(imm)) - elif opcode_name == "ja": - return "%s %s" % (opcode_name, O(off)) - elif source == 0: - return "%s %s, %s, %s" % (opcode_name, R(dst_reg), I(imm), O(off)) - else: - return "%s %s, %s, %s" % (opcode_name, R(dst_reg), R(src_reg), O(off)) - elif cls == BPF_CLASS_JMP32: - source = (code >> 3) & 1 - opcode = (code >> 4) & 0xf - opcode_name = JMP_OPCODES.get(opcode) + "32" - - if opcode_name == "exit": - return opcode_name + disassembled = f'{opcode_name}' elif opcode_name == "call": if src_reg == 1: opcode_name += " local" - return "%s %s" % (opcode_name, I(imm)) + fields["imm"].used = True + disassembled = f'{opcode_name} {I(imm)}' elif opcode_name == "ja": - return "%s %s" % (opcode_name, O(off)) + fields["off"].used = True + disassembled = f'{opcode_name} {O(off)}' elif source == 0: - return "%s %s, %s, %s" % (opcode_name, R(dst_reg), I(imm), O(off)) + fields["dst_reg"].used = True + fields["imm"].used = True + fields["off"].used = True + disassembled = f'{opcode_name} {R(dst_reg)}, {I(imm)}, {O(off)}' else: - return "%s %s, %s, %s" % (opcode_name, R(dst_reg), R(src_reg), O(off)) - elif cls == BPF_CLASS_LD or cls == BPF_CLASS_LDX or cls == BPF_CLASS_ST or cls == BPF_CLASS_STX: + fields["dst_reg"].used = True + fields["src_reg"].used = True + fields["off"].used = True + disassembled = f'{opcode_name} {R(dst_reg)}, {R(src_reg)}, {O(off)}' + elif clz == BPF_CLASS_LD: size = (code >> 3) & 3 mode = (code >> 5) & 7 mode_name = MODES.get(mode, str(mode)) - # TODO use different syntax for non-MEM instructions size_name = SIZES.get(size, str(size)) - if code == 0x18: # lddw + if clz == BPF_CLASS_LD and size == 0x3 and src_reg == 0: + # Make sure that we skip the next instruction because we use it here! + increment += 8 _, _, _, imm2 = Inst.unpack_from(data, offset+8) imm = (imm2 << 32) | imm - return "%s %s, %s" % (class_name + size_name, R(dst_reg), I(imm)) - elif code == 0x00: - # Second instruction of lddw - return None - elif cls == BPF_CLASS_LDX: - return "%s %s, %s" % (class_name + size_name, R(dst_reg), M(R(src_reg), off)) - elif cls == BPF_CLASS_ST: - return "%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), I(imm)) - elif cls == BPF_CLASS_STX: - return "%s %s, %s" % (class_name + size_name, M(R(dst_reg), off), R(src_reg)) + fields["dst_reg"].used = True + fields["imm"].used = True + disassembled = f'{class_name}{size_name} {R(dst_reg)}, {I(imm)}' else: - return "unknown mem instruction %#x" % code - else: - return "unknown instruction %#x" % code + result = f"unknown/unsupported special LOAD instruction {code=:x}" -def disassemble(data): + elif clz == BPF_CLASS_LD or clz == BPF_CLASS_LDX or clz == BPF_CLASS_ST or clz == BPF_CLASS_STX: + size = (code >> 3) & 3 + mode = (code >> 5) & 7 + mode_name = MODES.get(mode, str(mode)) + size_name = SIZES.get(size, str(size)) + if clz == BPF_CLASS_LDX: + fields["dst_reg"].used = True + fields["src_reg"].used = True + fields["off"].used = True + disassembled = f'{class_name}{size_name} {R(dst_reg)}, {M(R(src_reg), off)}' + elif clz == BPF_CLASS_ST: + fields["dst_reg"].used = True + fields["off"].used = True + fields["imm"].used = True + disassembled = f'{class_name}{size_name} {M(R(dst_reg), off)}, {I(imm)}' + elif clz == BPF_CLASS_STX: + fields["dst_reg"].used = True + fields["src_reg"].used = True + fields["off"].used = True + disassembled = f'{class_name}{size_name} {M(R(dst_reg), off)}, {R(src_reg)}' + else: + disassembled = f'unknown/unsupported mem instruction {code=:x}' + else: + disassembled = f'unknown/unsupported instruction {code=:x}' + + warnings = "" + for k in fields.keys(): + if not fields[k].used and fields[k].value != 0: + if len(warnings) != 0: + warnings += "; " + warnings += f"The {fields[k].name} field of the instruction has a value but it is not used by the instruction" + + if len(warnings) != 0: + disassembled += f"\n\tWarnings: {warnings}." + disassembled += "\n" + + if verbose: + disassembled += "\nDetails:\n" + disassembled += f"\tClass: 0x{clz:x}" + disassembled += "\n" + disassembled += f"\tRegs: 0x{regs:x}" + disassembled += "\n" + disassembled += f"\tOffset: 0x{off:x}" + disassembled += "\n" + disassembled += f"\tImmediate: 0x{imm:x}" + disassembled += "\n" + disassembled += "-----------------" + + return disassembled, increment + +def disassemble(data, verbose = False): output = io() offset = 0 while offset < len(data): - s = disassemble_one(data, offset) + (s, increment) = disassemble_one(data, offset, verbose) if s: output.write(s + "\n") - offset += 8 + offset += increment return output.getvalue()