diff --git a/decompiler/frontend/binaryninja/handlers/assignments.py b/decompiler/frontend/binaryninja/handlers/assignments.py index d42633918..306336a5e 100644 --- a/decompiler/frontend/binaryninja/handlers/assignments.py +++ b/decompiler/frontend/binaryninja/handlers/assignments.py @@ -73,9 +73,9 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_ struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment) destination = MemberAccess( offset=assignment.offset, - member_name=struct_variable.type.get_member_by_offset(assignment.offset).name, + member_name=struct_variable.type.get_member_name_by_offset(assignment.offset), operands=[struct_variable], - writes_memory=assignment.ssa_memory_version, + writes_memory=assignment.dest.version, ) value = self._lifter.lift(assignment.src) # case 2 (contraction): @@ -99,7 +99,7 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al (x = ) <- for the sake of example, only rhs expression is lifted here. """ source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction) - if isinstance(source.type, Struct) or isinstance(source.type, Union): + if isinstance(source.type, Struct) or isinstance(source.type, Class) or isinstance(source.type, Union): return self._get_field_as_member_access(instruction, source, **kwargs) cast_type = source.type.resize(instruction.size * self.BYTE_SIZE) if instruction.offset: @@ -112,11 +112,11 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess: """Lift MLIL var_field as struct or union member read access.""" - if isinstance(source.type, Struct): - member_name = source.type.get_member_by_offset(instruction.offset).name + if isinstance(source.type, Struct) or isinstance(source.type, Class): + member_name = source.type.get_member_name_by_offset(instruction.offset) elif parent := kwargs.get("parent", None): parent_type = self._lifter.lift(parent.dest.type) - member_name = source.type.get_member_by_type(parent_type).name + member_name = source.type.get_member_name_by_type(parent_type) else: logging.warning(f"Cannot get member name for instruction {instruction}") member_name = f"field_{hex(instruction.offset)}" @@ -213,14 +213,8 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, """Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x).""" vartype = self._lifter.lift(instruction.dest.expr_type) struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction) - member = vartype.type.get_member_by_offset(instruction.offset) - if member is not None: - name = member.name - else: - name = f"__offset_{instruction.offset}" - name.replace("-", "minus_") struct_member_access = MemberAccess( - member_name=name, + member_name=vartype.type.get_member_name_by_offset(instruction.offset), offset=instruction.offset, operands=[struct_variable], vartype=vartype, diff --git a/decompiler/frontend/binaryninja/handlers/unary.py b/decompiler/frontend/binaryninja/handlers/unary.py index 824f23f7c..4bc0a089c 100644 --- a/decompiler/frontend/binaryninja/handlers/unary.py +++ b/decompiler/frontend/binaryninja/handlers/unary.py @@ -15,7 +15,7 @@ Pointer, UnaryOperation, ) -from decompiler.structures.pseudo.complextypes import Struct +from decompiler.structures.pseudo.complextypes import Class, Struct from decompiler.structures.pseudo.operations import MemberAccess @@ -98,12 +98,9 @@ def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct, """Lift a MLIL_LOAD_STRUCT_SSA (struct member access e.g. var#n->x) instruction.""" struct_variable = self._lifter.lift(instruction.src) struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type) - struct_member = struct_ptr.type.get_member_by_offset(instruction.offset) - if struct_member is not None: - name = struct_member.name - else: - name = f"__offset_{instruction.offset}" - name.replace("-", "minus_") + name = f"field_{hex(instruction.offset)}".replace("-", "minus_") + if isinstance(struct_ptr.type, Class) or isinstance(struct_ptr.type, Struct): + name = struct_ptr.type.get_member_name_by_offset(instruction.offset) return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=name) def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation: diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 0a885c785..7beb3b326 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -70,6 +70,15 @@ def add_member(self, member: ComplexTypeMember): def get_member_by_offset(self, offset: int) -> Optional[ComplexTypeMember]: return self.members.get(offset) + def get_member_name_by_offset(self, offset: int) -> str: + """Get the name of a member by its offset and gracefully handle unknown offsets.""" + member = self.get_member_by_offset(offset) + if member is not None: + return member.name + else: + logging.warning(f"Cannot get member name for type {self} at offset {offset}") + return f"field_{hex(offset)}".replace("-", "minus_") + def declaration(self) -> str: members = ";\n\t".join(self.members[k].declaration() for k in sorted(self.members.keys())) + ";" return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" @@ -103,6 +112,15 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember: if member.type == _type: return member + def get_member_name_by_type(self, _type: Type) -> str: + """Get the name of a member of a union by its type and gracefully handle unknown types.""" + member = self.get_member_by_type(_type) + if member is not None: + return member.name + else: + logging.warning(f"Cannot get member name for union {self}") + return "unknown_field" + @dataclass(frozen=True, order=True) class Enum(ComplexType): @@ -136,6 +154,7 @@ class UniqueNameProvider: """The purpose of this class is to provide unique names for types, as duplicate names can potentially be encountered in the lifting stage (especially anonymous structs, etc.) This class keeps track of all the names already used. If duplicates are found, they are renamed by appending suffixes with incrementing numbers. E.g. `classname`, `classname__2`, `classname__3`, ... + Assumes that incoming names do not end with __{number}. """ def __init__(self): @@ -145,6 +164,7 @@ def get_unique_name(self, name: str) -> str: """This method returns the input name if it was unique so far. Otherwise it returns the name with an added incrementing suffix. In any case, the name occurence of the name is counted. + Assumes that incoming names do not end with __{number}. """ if name not in self._name_to_count: self._name_to_count[name] = 1 diff --git a/tests/structures/pseudo/test_complextypes.py b/tests/structures/pseudo/test_complextypes.py index c5a7d5c13..7b09e82b3 100644 --- a/tests/structures/pseudo/test_complextypes.py +++ b/tests/structures/pseudo/test_complextypes.py @@ -1,6 +1,7 @@ import pytest from decompiler.structures.pseudo import Float, Integer, Pointer from decompiler.structures.pseudo.complextypes import ( + Class, ComplexTypeMap, ComplexTypeMember, ComplexTypeName, @@ -8,6 +9,7 @@ Enum, Struct, Union, + UniqueNameProvider, ) @@ -46,6 +48,65 @@ def test_get_member_by_offset(self, book, title, num_pages, author): assert book.get_member_by_offset(4) == num_pages assert book.get_member_by_offset(8) == author + def test_get_member_name_by_offset(self, book, title, num_pages, author): + assert book.get_member_name_by_offset(0) == title.name + assert book.get_member_name_by_offset(4) == num_pages.name + assert book.get_member_name_by_offset(8) == author.name + assert book.get_member_name_by_offset(0x100) == "field_0x100" + assert book.get_member_name_by_offset(-0x100) == "field_minus_0x100" + + def test_get_complex_type_name(self, book): + assert book.complex_type_name == (ComplexTypeName(0, "Book")) + + +class TestClass: + def test_declaration(self, class_book: Struct, record_id: Union): + assert class_book.declaration() == "class ClassBook {\n\tchar * title;\n\tint num_pages;\n\tchar * author;\n}" + # nest complex type + class_book.add_member( + m := ComplexTypeMember(size=64, name="id", offset=12, type=record_id), + ) + result = f"class ClassBook {{\n\tchar * title;\n\tint num_pages;\n\tchar * author;\n\t{m.declaration()};\n}}" + assert class_book.declaration() == result + + def test_str(self, class_book: Struct): + assert str(class_book) == "ClassBook" + + def test_copy(self, class_book: Struct): + new_class_book: Struct = class_book.copy() + assert id(new_class_book) != id(class_book) + assert new_class_book.size == class_book.size + assert new_class_book.type_specifier == class_book.type_specifier == ComplexTypeSpecifier.CLASS + assert id(new_class_book.members) != id(class_book.members) + assert new_class_book.get_member_by_offset(0) == class_book.get_member_by_offset(0) + assert id(new_class_book.get_member_by_offset(0)) != id(class_book.get_member_by_offset(0)) + assert len(new_class_book.members) == len(class_book.members) + + def test_add_members(self, class_book, title, num_pages, author): + empty_class_book = Class(name="ClassBook", members={}, size=96) + empty_class_book.add_member(title) + empty_class_book.add_member(author) + empty_class_book.add_member(num_pages) + assert empty_class_book == class_book + + def test_get_member_by_offset(self, class_book, title, num_pages, author): + assert class_book.get_member_by_offset(0) == title + assert class_book.get_member_by_offset(4) == num_pages + assert class_book.get_member_by_offset(8) == author + + def test_get_member_name_by_offset(self, class_book, title, num_pages, author): + assert class_book.get_member_name_by_offset(0) == title.name + assert class_book.get_member_name_by_offset(4) == num_pages.name + assert class_book.get_member_name_by_offset(8) == author.name + assert class_book.get_member_name_by_offset(0x100) == "field_0x100" + assert class_book.get_member_name_by_offset(-0x100) == "field_minus_0x100" + + def test_get_complex_type_name(self, class_book): + assert class_book.complex_type_name == (ComplexTypeName(0, "ClassBook")) + + def test_class_not_struct(self, class_book, book): + assert book != class_book + @pytest.fixture def book() -> Struct: @@ -60,6 +121,19 @@ def book() -> Struct: ) +@pytest.fixture +def class_book() -> Class: + return Class( + name="ClassBook", + members={ + 0: ComplexTypeMember(size=32, name="title", offset=0, type=Pointer(Integer.char())), + 4: ComplexTypeMember(size=32, name="num_pages", offset=4, type=Integer.int32_t()), + 8: ComplexTypeMember(size=32, name="author", offset=8, type=Pointer(Integer.char())), + }, + size=96, + ) + + @pytest.fixture def title() -> ComplexTypeMember: return ComplexTypeMember(size=32, name="title", offset=0, type=Pointer(Integer.char())) @@ -101,6 +175,15 @@ def test_get_member_by_type(self, record_id, float_id, int_id, double_id): assert record_id.get_member_by_type(Integer.int32_t()) == int_id assert record_id.get_member_by_type(Float.double()) == double_id + def test_get_member_name_by_type(self, record_id, float_id, int_id, double_id): + assert record_id.get_member_name_by_type(Float.float()) == float_id.name + assert record_id.get_member_name_by_type(Integer.int32_t()) == int_id.name + assert record_id.get_member_name_by_type(Float.double()) == double_id.name + assert record_id.get_member_name_by_type(record_id) == "unknown_field" + + def test_get_complex_type_name(self, record_id): + assert record_id.complex_type_name == (ComplexTypeName(0, "RecordID")) + @pytest.fixture def record_id() -> Union: @@ -153,6 +236,9 @@ def test_add_members(self, empty_color, color, red, green, blue): empty_color.add_member(blue) assert empty_color == color + def test_get_complex_type_name(self, color): + assert color.complex_type_name == (ComplexTypeName(0, "Color")) + @pytest.fixture def color(): @@ -188,20 +274,44 @@ def blue(): class TestComplexTypeMap: - def test_declarations(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union): - assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};" + def test_declarations(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union): + assert ( + complex_types.declarations() + == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};\n{class_book.declaration()};" + ) complex_types.add(book, 0) - assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};" + assert ( + complex_types.declarations() + == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};\n{class_book.declaration()};" + ) - def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union): + def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union): assert complex_types.retrieve_by_name(ComplexTypeName(0, "Book")) == book assert complex_types.retrieve_by_name(ComplexTypeName(0, "RecordID")) == record_id assert complex_types.retrieve_by_name(ComplexTypeName(0, "Color")) == color + assert complex_types.retrieve_by_name(ComplexTypeName(0, "ClassBook")) == class_book + + def test_retrieve_by_id(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union): + assert complex_types.retrieve_by_id(0) == book + assert complex_types.retrieve_by_id(1) == color + assert complex_types.retrieve_by_id(2) == record_id + assert complex_types.retrieve_by_id(3) == class_book @pytest.fixture - def complex_types(self, book: Struct, color: Enum, record_id: Union): + def complex_types(self, book: Struct, class_book: Class, color: Enum, record_id: Union): complex_types = ComplexTypeMap() complex_types.add(book, 0) complex_types.add(color, 1) complex_types.add(record_id, 2) + complex_types.add(class_book, 3) return complex_types + + +class TestUniqueNameProvider: + def test_unique_names(self): + unique_name_provider = UniqueNameProvider() + input_names = ["aa", "", "b", "", "c", "c", "d", "c"] + excepted_output = ["aa", "", "b", "__2", "c", "c__2", "d", "c__3"] + output_names = [unique_name_provider.get_unique_name(name) for name in input_names] + assert output_names == excepted_output + assert len(set(output_names)) == len(output_names) # uniqueness diff --git a/tests/structures/pseudo/test_expressions.py b/tests/structures/pseudo/test_expressions.py index b3cf2d9e7..c86401cd6 100644 --- a/tests/structures/pseudo/test_expressions.py +++ b/tests/structures/pseudo/test_expressions.py @@ -2,6 +2,7 @@ import pytest from decompiler.structures.pseudo import OperationType, UnaryOperation +from decompiler.structures.pseudo.complextypes import ComplexTypeMember, Enum from decompiler.structures.pseudo.expressions import ( Constant, ExternConstant, @@ -374,3 +375,24 @@ def test_copy(self): original = ImportedFunctionSymbol("foo", 0x42) copy = original.copy() assert id(original) != id(copy) and original == copy + + +@pytest.fixture +def color(): + return Enum( + 0, + "Color", + { + 0: ComplexTypeMember(0, "red", value=0, offset=0, type=Integer.int32_t()), + 1: ComplexTypeMember(0, "green", value=1, offset=0, type=Integer.int32_t()), + 2: ComplexTypeMember(0, "blue", value=2, offset=0, type=Integer.int32_t()), + }, + ) + + +class TestEnumConstant: + def test_available_value(self, color): + assert str(Constant(value=1, vartype=color)) == "green" + + def test_unavailable_value(self, color): + assert str(Constant(value=0xFFFF, vartype=color)) == str(Constant(0xFFFF, vartype=i32))