From 21557260555ebde102f7fdc0e7dcdefe7702747a Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 5 Oct 2023 11:14:14 +0200 Subject: [PATCH 01/16] prevent infinite loop when lifting complex types --- decompiler/frontend/binaryninja/handlers/types.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index 353a5922a..ed9cd0a11 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -88,6 +88,10 @@ def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexT def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, ComplexTypeName]: """Lift struct or union type.""" + complex_type_name = ComplexTypeName(0, name) + cached_type = self._lifter.complex_types.retrieve_by_name(complex_type_name) + if cached_type is not None: + return cached_type if struct.type == StructureVariant.StructStructureType: type_name = name if name else self._get_data_type_name(struct, keyword="struct") lifted_struct = Struct(struct.width * self.BYTE_SIZE, type_name, {}) @@ -96,9 +100,9 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio lifted_struct = Union_(struct.width * self.BYTE_SIZE, type_name, []) else: raise RuntimeError(f"Unknown struct type {struct.type.name}") + self._lifter.complex_types.add(lifted_struct) for member in struct.members: lifted_struct.add_member(self.lift_struct_member(member, type_name)) - self._lifter.complex_types.add(lifted_struct) return lifted_struct @abstractmethod From 41d1b285d1e78869352a1f1d0a5f6bf9a0f29143 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 12 Oct 2023 13:38:23 +0200 Subject: [PATCH 02/16] lift classes exactly like structs --- decompiler/frontend/binaryninja/handlers/types.py | 5 ++++- decompiler/structures/pseudo/complextypes.py | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index ed9cd0a11..f09b51d92 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -22,7 +22,7 @@ ) from decompiler.frontend.lifter import Handler from decompiler.structures.pseudo import CustomType, Float, FunctionTypeDef, Integer, Pointer, UnknownType, Variable -from decompiler.structures.pseudo.complextypes import ComplexTypeMember, ComplexTypeName, Enum, Struct +from decompiler.structures.pseudo.complextypes import Class, ComplexTypeMember, ComplexTypeName, Enum, Struct from decompiler.structures.pseudo.complextypes import Union as Union_ @@ -98,6 +98,9 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio elif struct.type == StructureVariant.UnionStructureType: type_name = name if name else self._get_data_type_name(struct, keyword="union") lifted_struct = Union_(struct.width * self.BYTE_SIZE, type_name, []) + elif struct.type == StructureVariant.ClassStructureType: + type_name = name if name else self._get_data_type_name(struct, keyword="class") + lifted_struct = Class(struct.width * self.BYTE_SIZE, type_name, {}) else: raise RuntimeError(f"Unknown struct type {struct.type.name}") self._lifter.complex_types.add(lifted_struct) diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index b32528b4a..678b0d21d 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -54,11 +54,11 @@ def declaration(self) -> str: @dataclass(frozen=True, order=True) -class Struct(ComplexType): +class _BaseStruct(ComplexType): """Class representing a struct type.""" members: Dict[int, ComplexTypeMember] = field(compare=False) - type_specifier: ComplexTypeSpecifier = ComplexTypeSpecifier.STRUCT + type_specifier: ComplexTypeSpecifier def add_member(self, member: ComplexTypeMember): self.members[member.offset] = member @@ -71,6 +71,16 @@ def declaration(self) -> str: return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" +@dataclass(frozen=True, order=True) +class Struct(_BaseStruct): + type_specifier: ComplexTypeSpecifier = ComplexTypeSpecifier.STRUCT + + +@dataclass(frozen=True, order=True) +class Class(_BaseStruct): + type_specifier: ComplexTypeSpecifier = ComplexTypeSpecifier.CLASS + + @dataclass(frozen=True, order=True) class Union(ComplexType): members: List[ComplexTypeMember] = field(compare=False) From 6962e1ea03a4696f31af5113cc3d89233752dffd Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Fri, 13 Oct 2023 11:17:50 +0200 Subject: [PATCH 03/16] robustly handle 'missing' enum values --- decompiler/structures/pseudo/complextypes.py | 5 +++-- decompiler/structures/pseudo/expressions.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 678b0d21d..0ef6ab1eb 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -108,8 +108,9 @@ class Enum(ComplexType): def add_member(self, member: ComplexTypeMember): self.members[member.value] = member - def get_name_by_value(self, value: int) -> str: - return self.members.get(value).name + def get_name_by_value(self, value: int) -> Optional[str]: + member = self.members.get(value) + return member.name if member is not None else None def declaration(self) -> str: members = ",\n\t".join(f"{x.name} = {x.value}" for x in self.members.values()) diff --git a/decompiler/structures/pseudo/expressions.py b/decompiler/structures/pseudo/expressions.py index 5c59afaf2..4f9774ac6 100644 --- a/decompiler/structures/pseudo/expressions.py +++ b/decompiler/structures/pseudo/expressions.py @@ -191,7 +191,10 @@ def __str__(self) -> str: Constants of type Enum are represented as strings (corresponding enumerator identifiers). """ if isinstance(self._type, Enum): - return self._type.get_name_by_value(self.value) + name = self._type.get_name_by_value(self.value) + if name is not None: + return name + # otherwise, i.e. if value is not found in Enum class, fall through if self._type.is_boolean: return "true" if self.value else "false" if isinstance(self.value, str): From 6c6705a0067b9c4a3a39a1f9c79c69ee61218f59 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Fri, 13 Oct 2023 15:58:37 +0200 Subject: [PATCH 04/16] robust handling of lifting setting of fields --- decompiler/frontend/binaryninja/handlers/assignments.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/assignments.py b/decompiler/frontend/binaryninja/handlers/assignments.py index 85d81038c..77dafd1d6 100644 --- a/decompiler/frontend/binaryninja/handlers/assignments.py +++ b/decompiler/frontend/binaryninja/handlers/assignments.py @@ -18,7 +18,7 @@ RegisterPair, UnaryOperation, ) -from decompiler.structures.pseudo.complextypes import Struct, Union +from decompiler.structures.pseudo.complextypes import Struct, Union, Class from decompiler.structures.pseudo.operations import MemberAccess @@ -67,9 +67,8 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_ """ # case 1 (struct), avoid set field of named integers: dest_type = self._lifter.lift(assignment.dest.type) - if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and not ( - isinstance(dest_type, Pointer) and isinstance(dest_type.type, Integer) - ): + if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and ( + isinstance(dest_type, Struct) or isinstance(dest_type, Class)): # otherwise get_member_by_offset not available struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment) destination = MemberAccess( offset=assignment.offset, From 7a577709014863f988f2489355aba538d71e4cf3 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Tue, 17 Oct 2023 15:26:13 +0200 Subject: [PATCH 05/16] fix import order --- decompiler/frontend/binaryninja/handlers/assignments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/handlers/assignments.py b/decompiler/frontend/binaryninja/handlers/assignments.py index 77dafd1d6..334ab18d9 100644 --- a/decompiler/frontend/binaryninja/handlers/assignments.py +++ b/decompiler/frontend/binaryninja/handlers/assignments.py @@ -18,7 +18,7 @@ RegisterPair, UnaryOperation, ) -from decompiler.structures.pseudo.complextypes import Struct, Union, Class +from decompiler.structures.pseudo.complextypes import Class, Struct, Union from decompiler.structures.pseudo.operations import MemberAccess From 6b3f1080bef26a6b75b3e2e5fe09b422e5426eac Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Wed, 18 Oct 2023 17:19:51 +0200 Subject: [PATCH 06/16] robust placeholder member names in lifter --- decompiler/frontend/binaryninja/handlers/assignments.py | 8 +++++++- decompiler/frontend/binaryninja/handlers/unary.py | 7 ++++++- decompiler/structures/pseudo/complextypes.py | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/assignments.py b/decompiler/frontend/binaryninja/handlers/assignments.py index 334ab18d9..ddb4f2459 100644 --- a/decompiler/frontend/binaryninja/handlers/assignments.py +++ b/decompiler/frontend/binaryninja/handlers/assignments.py @@ -212,8 +212,14 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, """Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x).""" vartype = self._lifter.lift(instruction.dest.expr_type) struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction) + member = vartype.type.get_member_by_offset(instruction.offset) + if member is not None: + name = member.name + else: + name = f"__offset_{instruction.offset}" + name.replace("-", "minus_") struct_member_access = MemberAccess( - member_name=vartype.type.members.get(instruction.offset), + member_name=name, offset=instruction.offset, operands=[struct_variable], vartype=vartype, diff --git a/decompiler/frontend/binaryninja/handlers/unary.py b/decompiler/frontend/binaryninja/handlers/unary.py index 180aecfd0..824f23f7c 100644 --- a/decompiler/frontend/binaryninja/handlers/unary.py +++ b/decompiler/frontend/binaryninja/handlers/unary.py @@ -99,7 +99,12 @@ def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct, struct_variable = self._lifter.lift(instruction.src) struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type) struct_member = struct_ptr.type.get_member_by_offset(instruction.offset) - return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=struct_member.offset, member_name=struct_member.name) + if struct_member is not None: + name = struct_member.name + else: + name = f"__offset_{instruction.offset}" + name.replace("-", "minus_") + return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=name) def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation: """Lift a MLIL_FTRUNC operation.""" diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 0ef6ab1eb..89ea554d5 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -63,7 +63,7 @@ class _BaseStruct(ComplexType): def add_member(self, member: ComplexTypeMember): self.members[member.offset] = member - def get_member_by_offset(self, offset: int) -> ComplexTypeMember: + def get_member_by_offset(self, offset: int) -> Optional[ComplexTypeMember]: return self.members.get(offset) def declaration(self) -> str: From 5e01dd7b1c5c3643b782f37397cb3b522d882ca5 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Wed, 18 Oct 2023 17:31:16 +0200 Subject: [PATCH 07/16] handle writes_memory of member_access like dereference --- decompiler/structures/pseudo/instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/structures/pseudo/instructions.py b/decompiler/structures/pseudo/instructions.py index 362aecb72..d7dfdc8fe 100644 --- a/decompiler/structures/pseudo/instructions.py +++ b/decompiler/structures/pseudo/instructions.py @@ -146,7 +146,7 @@ def writes_memory(self) -> Optional[int]: """Return the memory version generated by this assignment, if any.""" if isinstance(self.value, Call): return self.value.writes_memory - if isinstance(self.destination, UnaryOperation) and self.destination.operation == OperationType.dereference: + if isinstance(self.destination, UnaryOperation) and self.destination.operation in {OperationType.member_access, OperationType.dereference}: return self.destination.writes_memory for variable in self.definitions: if variable.is_aliased: From ed60ad8a9d37268f1457c14cbf9c0f902c28b49c Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 11:38:16 +0200 Subject: [PATCH 08/16] fix infinite recursion in declaration --- .../frontend/binaryninja/handlers/types.py | 26 +++++++++++-------- decompiler/structures/pseudo/complextypes.py | 17 +++++++++--- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index f09b51d92..3b8f6277f 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -88,21 +88,24 @@ def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexT def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, ComplexTypeName]: """Lift struct or union type.""" - complex_type_name = ComplexTypeName(0, name) - cached_type = self._lifter.complex_types.retrieve_by_name(complex_type_name) - if cached_type is not None: - return cached_type if struct.type == StructureVariant.StructStructureType: - type_name = name if name else self._get_data_type_name(struct, keyword="struct") - lifted_struct = Struct(struct.width * self.BYTE_SIZE, type_name, {}) + keyword, type, members = "struct", Struct, {} elif struct.type == StructureVariant.UnionStructureType: - type_name = name if name else self._get_data_type_name(struct, keyword="union") - lifted_struct = Union_(struct.width * self.BYTE_SIZE, type_name, []) + keyword, type, members = "union", Union_, [] elif struct.type == StructureVariant.ClassStructureType: - type_name = name if name else self._get_data_type_name(struct, keyword="class") - lifted_struct = Class(struct.width * self.BYTE_SIZE, type_name, {}) + keyword, type, members = "class", Class, {} else: raise RuntimeError(f"Unknown struct type {struct.type.name}") + + type_name = name if name else self._get_data_type_name(struct, keyword=keyword) + if type_name.strip() == "": + type_name = f"__anonymous_{keyword}" + lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, hash(struct), members) + + cached_type = self._lifter.complex_types.retrieve_by_name(lifted_struct.complex_type_name) + if cached_type is not None: + return cached_type + self._lifter.complex_types.add(lifted_struct) for member in struct.members: lifted_struct.add_member(self.lift_struct_member(member, type_name)) @@ -124,7 +127,8 @@ def lift_struct_member(self, member: StructureMember, parent_struct_name: str = else: # if member is an embedded struct/union, the name is already available member_type = self._lifter.lift(member.type, name=member.name) - return ComplexTypeMember(0, name=member.name, offset=member.offset, type=member_type) + # This is still wrong for Pointers... + return ComplexTypeMember(member_type.size, name=member.name, offset=member.offset, type=member_type) @abstractmethod def _get_member_pointer_on_the_parent_struct(self, member: StructureMember, parent_struct_name: str) -> ComplexTypeMember: diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 89ea554d5..f229c4ec3 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass, field from enum import Enum -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from decompiler.structures.pseudo.typing import Type @@ -57,6 +57,7 @@ def declaration(self) -> str: class _BaseStruct(ComplexType): """Class representing a struct type.""" + type_id: int members: Dict[int, ComplexTypeMember] = field(compare=False) type_specifier: ComplexTypeSpecifier @@ -70,6 +71,10 @@ def declaration(self) -> str: members = ";\n\t".join(self.members[k].declaration() for k in sorted(self.members.keys())) + ";" return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" + @property + def complex_type_name(self): + return ComplexTypeName(0, self.name, self.type_id) + @dataclass(frozen=True, order=True) class Struct(_BaseStruct): @@ -83,6 +88,7 @@ class Class(_BaseStruct): @dataclass(frozen=True, order=True) class Union(ComplexType): + type_id: int members: List[ComplexTypeMember] = field(compare=False) type_specifier = ComplexTypeSpecifier.UNION @@ -99,6 +105,10 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember: if member.type == _type: return member + @property + def complex_type_name(self): + return ComplexTypeName(0, self.name, self.type_id) + @dataclass(frozen=True, order=True) class Enum(ComplexType): @@ -123,6 +133,7 @@ class ComplexTypeName(Type): struct(...) members of the same complex type""" name: str + id: Any def __str__(self) -> str: return self.name @@ -134,13 +145,13 @@ class ComplexTypeMap: def __init__(self): self._name_to_type_map: Dict[ComplexTypeName, ComplexType] = {} - def retrieve_by_name(self, typename: ComplexTypeName) -> ComplexType: + def retrieve_by_name(self, typename: ComplexTypeName) -> Optional[ComplexType]: """Get complex type by name; used to avoid recursion.""" return self._name_to_type_map.get(typename, None) def add(self, complex_type: ComplexType): """Add complex type to the mapping.""" - self._name_to_type_map[ComplexTypeName(0, complex_type.name)] = complex_type + self._name_to_type_map[complex_type.complex_type_name] = complex_type def pretty_print(self): for t in self._name_to_type_map.values(): From 50a2abdd5cdb5b91713e276021b3248173a39dd5 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 12:09:22 +0200 Subject: [PATCH 09/16] prevent name collisions for union, struct, class --- .../frontend/binaryninja/handlers/types.py | 17 +++++--- decompiler/structures/pseudo/complextypes.py | 40 +++++++++++++++++-- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index 3b8f6277f..3155e8c81 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -76,7 +76,10 @@ def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwarg def lift_enum(self, binja_enum: EnumerationType, name: str = None, **kwargs) -> Enum: """Lift enum type.""" enum_name = name if name else self._get_data_type_name(binja_enum, keyword="enum") - enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, {}) + if enum_name.strip() == "": + enum_name = f"__anonymous_enum" + enum_name = self._lifter.unique_name_provider.get_unique_name(enum_name) + enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, hash(binja_enum), {}) for member in binja_enum.members: enum.add_member(self._lifter.lift(member)) self._lifter.complex_types.add(enum) @@ -87,6 +90,11 @@ def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexT return ComplexTypeMember(size=0, name=enum_member.name, offset=-1, type=Integer(32), value=int(enum_member.value)) def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, ComplexTypeName]: + type_id = hash(struct) + cached_type = self._lifter.complex_types.retrieve_by_id(type_id) + if cached_type is not None: + return cached_type + """Lift struct or union type.""" if struct.type == StructureVariant.StructStructureType: keyword, type, members = "struct", Struct, {} @@ -100,11 +108,8 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio type_name = name if name else self._get_data_type_name(struct, keyword=keyword) if type_name.strip() == "": type_name = f"__anonymous_{keyword}" - lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, hash(struct), members) - - cached_type = self._lifter.complex_types.retrieve_by_name(lifted_struct.complex_type_name) - if cached_type is not None: - return cached_type + type_name = self._lifter.unique_name_provider.get_unique_name(type_name) + lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, type_id, members) self._lifter.complex_types.add(lifted_struct) for member in struct.members: diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index f229c4ec3..475f1d0f5 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -73,7 +73,11 @@ def declaration(self) -> str: @property def complex_type_name(self): - return ComplexTypeName(0, self.name, self.type_id) + return ComplexTypeName(0, self.name) + + @property + def complex_type_id(self): + return self.type_id @dataclass(frozen=True, order=True) @@ -107,11 +111,16 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember: @property def complex_type_name(self): - return ComplexTypeName(0, self.name, self.type_id) + return ComplexTypeName(0, self.name) + + @property + def complex_type_id(self): + return self.type_id @dataclass(frozen=True, order=True) class Enum(ComplexType): + type_id: int members: Dict[int, ComplexTypeMember] = field(compare=False) type_specifier = ComplexTypeSpecifier.ENUM @@ -126,6 +135,14 @@ def declaration(self) -> str: members = ",\n\t".join(f"{x.name} = {x.value}" for x in self.members.values()) return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" + @property + def complex_type_name(self): + return ComplexTypeName(0, self.name) + + @property + def complex_type_id(self): + return self.type_id + @dataclass(frozen=True, order=True) class ComplexTypeName(Type): @@ -133,24 +150,41 @@ class ComplexTypeName(Type): struct(...) members of the same complex type""" name: str - id: Any def __str__(self) -> str: return self.name +class UniqueNameProvider: + def __init__(self): + self._name_to_count: Dict[str, int] = {} + + def get_unique_name(self, name): + if name not in self._name_to_count: + self._name_to_count[name] = 1 + return name + else: + self._name_to_count[name] += 1 + return f"{name}__{self._name_to_count[name]}" + + class ComplexTypeMap: """A class in charge of storing complex custom/user defined types by their string representation""" def __init__(self): self._name_to_type_map: Dict[ComplexTypeName, ComplexType] = {} + self._id_to_type_map: Dict[int, ComplexType] = {} def retrieve_by_name(self, typename: ComplexTypeName) -> Optional[ComplexType]: """Get complex type by name; used to avoid recursion.""" return self._name_to_type_map.get(typename, None) + def retrieve_by_id(self, id: int) -> Optional[ComplexType]: + return self._id_to_type_map.get(id, None) + def add(self, complex_type: ComplexType): """Add complex type to the mapping.""" + self._id_to_type_map[complex_type.complex_type_id] = complex_type self._name_to_type_map[complex_type.complex_type_name] = complex_type def pretty_print(self): From 00000a41f07ac3b88e9c01aaa11cee391675347c Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 12:31:55 +0200 Subject: [PATCH 10/16] take type_id out of ComplexTypes --- .../frontend/binaryninja/handlers/types.py | 11 ++++--- decompiler/structures/pseudo/complextypes.py | 31 ++++--------------- 2 files changed, 12 insertions(+), 30 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index 3155e8c81..f5c0e1161 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -75,21 +75,22 @@ def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwarg def lift_enum(self, binja_enum: EnumerationType, name: str = None, **kwargs) -> Enum: """Lift enum type.""" + type_id = hash(binja_enum) enum_name = name if name else self._get_data_type_name(binja_enum, keyword="enum") if enum_name.strip() == "": enum_name = f"__anonymous_enum" enum_name = self._lifter.unique_name_provider.get_unique_name(enum_name) - enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, hash(binja_enum), {}) + enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, {}) for member in binja_enum.members: enum.add_member(self._lifter.lift(member)) - self._lifter.complex_types.add(enum) + self._lifter.complex_types.add(enum, type_id) return enum def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexTypeMember: """Lift enum member type.""" return ComplexTypeMember(size=0, name=enum_member.name, offset=-1, type=Integer(32), value=int(enum_member.value)) - def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, ComplexTypeName]: + def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, Union_, Class, ComplexTypeName]: type_id = hash(struct) cached_type = self._lifter.complex_types.retrieve_by_id(type_id) if cached_type is not None: @@ -109,9 +110,9 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio if type_name.strip() == "": type_name = f"__anonymous_{keyword}" type_name = self._lifter.unique_name_provider.get_unique_name(type_name) - lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, type_id, members) + lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, members) - self._lifter.complex_types.add(lifted_struct) + self._lifter.complex_types.add(lifted_struct, type_id) for member in struct.members: lifted_struct.add_member(self.lift_struct_member(member, type_name)) return lifted_struct diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 475f1d0f5..98771e260 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -28,6 +28,10 @@ def copy(self, **kwargs) -> Type: def declaration(self) -> str: raise NotImplementedError + @property + def complex_type_name(self): + return ComplexTypeName(0, self.name) + @dataclass(frozen=True, order=True) class ComplexTypeMember(ComplexType): @@ -57,7 +61,6 @@ def declaration(self) -> str: class _BaseStruct(ComplexType): """Class representing a struct type.""" - type_id: int members: Dict[int, ComplexTypeMember] = field(compare=False) type_specifier: ComplexTypeSpecifier @@ -71,14 +74,6 @@ def declaration(self) -> str: members = ";\n\t".join(self.members[k].declaration() for k in sorted(self.members.keys())) + ";" return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" - @property - def complex_type_name(self): - return ComplexTypeName(0, self.name) - - @property - def complex_type_id(self): - return self.type_id - @dataclass(frozen=True, order=True) class Struct(_BaseStruct): @@ -92,7 +87,6 @@ class Class(_BaseStruct): @dataclass(frozen=True, order=True) class Union(ComplexType): - type_id: int members: List[ComplexTypeMember] = field(compare=False) type_specifier = ComplexTypeSpecifier.UNION @@ -109,18 +103,9 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember: if member.type == _type: return member - @property - def complex_type_name(self): - return ComplexTypeName(0, self.name) - - @property - def complex_type_id(self): - return self.type_id - @dataclass(frozen=True, order=True) class Enum(ComplexType): - type_id: int members: Dict[int, ComplexTypeMember] = field(compare=False) type_specifier = ComplexTypeSpecifier.ENUM @@ -139,10 +124,6 @@ def declaration(self) -> str: def complex_type_name(self): return ComplexTypeName(0, self.name) - @property - def complex_type_id(self): - return self.type_id - @dataclass(frozen=True, order=True) class ComplexTypeName(Type): @@ -182,9 +163,9 @@ def retrieve_by_name(self, typename: ComplexTypeName) -> Optional[ComplexType]: def retrieve_by_id(self, id: int) -> Optional[ComplexType]: return self._id_to_type_map.get(id, None) - def add(self, complex_type: ComplexType): + def add(self, complex_type: ComplexType, type_id: int): """Add complex type to the mapping.""" - self._id_to_type_map[complex_type.complex_type_id] = complex_type + self._id_to_type_map[type_id] = complex_type self._name_to_type_map[complex_type.complex_type_name] = complex_type def pretty_print(self): From 4c583ffd46e2c4da47f43f1b7757bd71d1218c26 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 12:32:25 +0200 Subject: [PATCH 11/16] fix test_complextypes --- tests/structures/pseudo/test_complextypes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/structures/pseudo/test_complextypes.py b/tests/structures/pseudo/test_complextypes.py index 3bad97d60..c5a7d5c13 100644 --- a/tests/structures/pseudo/test_complextypes.py +++ b/tests/structures/pseudo/test_complextypes.py @@ -190,7 +190,7 @@ def blue(): class TestComplexTypeMap: def test_declarations(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union): assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};" - complex_types.add(book) + complex_types.add(book, 0) assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};" def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union): @@ -201,7 +201,7 @@ def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, col @pytest.fixture def complex_types(self, book: Struct, color: Enum, record_id: Union): complex_types = ComplexTypeMap() - complex_types.add(book) - complex_types.add(color) - complex_types.add(record_id) + complex_types.add(book, 0) + complex_types.add(color, 1) + complex_types.add(record_id, 2) return complex_types From 495e2688e4a6e657602cbbbf7ba4161bc81b3a35 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 12:45:57 +0200 Subject: [PATCH 12/16] missing part of 50a2abd --- decompiler/frontend/binaryninja/lifter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/lifter.py b/decompiler/frontend/binaryninja/lifter.py index e42761763..244df6ed4 100644 --- a/decompiler/frontend/binaryninja/lifter.py +++ b/decompiler/frontend/binaryninja/lifter.py @@ -6,7 +6,7 @@ from decompiler.frontend.lifter import ObserverLifter from decompiler.structures.pseudo import DataflowObject, Tag, UnknownExpression, UnknownType -from ...structures.pseudo.complextypes import ComplexTypeMap +from ...structures.pseudo.complextypes import ComplexTypeMap, UniqueNameProvider from .handlers import HANDLERS @@ -17,6 +17,7 @@ def __init__(self, no_bit_masks: bool = True, bv: BinaryView = None): self.no_bit_masks = no_bit_masks self.bv: BinaryView = bv self.complex_types: ComplexTypeMap = ComplexTypeMap() + self.unique_name_provider: UniqueNameProvider = UniqueNameProvider() for handler in HANDLERS: handler(self).register() From 81448da3ba9142f70526a07477a9bacac6b41152 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 13:01:48 +0200 Subject: [PATCH 13/16] remove duplicate property --- decompiler/structures/pseudo/complextypes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 98771e260..76960fc15 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -120,10 +120,6 @@ def declaration(self) -> str: members = ",\n\t".join(f"{x.name} = {x.value}" for x in self.members.values()) return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}" - @property - def complex_type_name(self): - return ComplexTypeName(0, self.name) - @dataclass(frozen=True, order=True) class ComplexTypeName(Type): From 09919b03c4a7fbfe09f4dd734147021e153b1590 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 14:18:45 +0200 Subject: [PATCH 14/16] Documentation for UniqueNameProvider --- decompiler/structures/pseudo/complextypes.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/decompiler/structures/pseudo/complextypes.py b/decompiler/structures/pseudo/complextypes.py index 76960fc15..86359340f 100644 --- a/decompiler/structures/pseudo/complextypes.py +++ b/decompiler/structures/pseudo/complextypes.py @@ -133,10 +133,19 @@ def __str__(self) -> str: class UniqueNameProvider: + """ The purpose of this class is to provide unique names for types, as duplicate names can potentially be encountered in the lifting stage (especially anonymous structs, etc.) + This class keeps track of all the names already used. If duplicates are found, they are renamed by appending suffixes with incrementing numbers. + E.g. `classname`, `classname__2`, `classname__3`, ... + """ + def __init__(self): self._name_to_count: Dict[str, int] = {} - - def get_unique_name(self, name): + + def get_unique_name(self, name: str): + """ This method returns the input name if it was unique so far. + Otherwise it returns the name with an added incrementing suffix. + In any case, the name occurence of the name is counted. + """ if name not in self._name_to_count: self._name_to_count[name] = 1 return name From 9036c07e9af6964b8209c31ab197407203d7a2a3 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 14:37:48 +0200 Subject: [PATCH 15/16] move name handling to _get_data_type_name --- .../frontend/binaryninja/handlers/types.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index f5c0e1161..89f382e9f 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -76,10 +76,7 @@ def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwarg def lift_enum(self, binja_enum: EnumerationType, name: str = None, **kwargs) -> Enum: """Lift enum type.""" type_id = hash(binja_enum) - enum_name = name if name else self._get_data_type_name(binja_enum, keyword="enum") - if enum_name.strip() == "": - enum_name = f"__anonymous_enum" - enum_name = self._lifter.unique_name_provider.get_unique_name(enum_name) + enum_name = self._get_data_type_name(binja_enum, keyword="enum", provided_name=name) enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, {}) for member in binja_enum.members: enum.add_member(self._lifter.lift(member)) @@ -106,10 +103,7 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio else: raise RuntimeError(f"Unknown struct type {struct.type.name}") - type_name = name if name else self._get_data_type_name(struct, keyword=keyword) - if type_name.strip() == "": - type_name = f"__anonymous_{keyword}" - type_name = self._lifter.unique_name_provider.get_unique_name(type_name) + type_name = self._get_data_type_name(struct, keyword=keyword, provided_name=name) lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, members) self._lifter.complex_types.add(lifted_struct, type_id) @@ -118,12 +112,23 @@ def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Unio return lifted_struct @abstractmethod - def _get_data_type_name(self, complex_type: Union[StructureType, EnumerationType], keyword: str) -> str: - """Parse out the name of complex type.""" - string = complex_type.get_string() - if keyword in string: - return complex_type.get_string().split(keyword)[1] - return string + def _get_data_type_name(self, complex_type: Union[StructureType, EnumerationType], keyword: str, provided_name:str) -> str: + """Parse out the name of complex type. Empty and duplicate names are changed. + Calling this function has the side effect of incrementing a counter in the UniqueNameProvider.""" + if provided_name: + name = provided_name + else: + type_string = complex_type.get_string() + if keyword in type_string: + name = complex_type.get_string().split(keyword)[1] + else: + name = type_string + + if name.strip() == "": + name = f"__anonymous_{keyword}" + name = self._lifter.unique_name_provider.get_unique_name(name) + + return name def lift_struct_member(self, member: StructureMember, parent_struct_name: str = None) -> ComplexTypeMember: """Lift struct or union member.""" From e402d24a706373da71d4bf8fb5c2fdeb2585a9c5 Mon Sep 17 00:00:00 2001 From: Manuel Blatt Date: Thu, 19 Oct 2023 14:49:08 +0200 Subject: [PATCH 16/16] removed misleading comment --- decompiler/frontend/binaryninja/handlers/types.py | 1 - 1 file changed, 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index 89f382e9f..eeee02724 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -138,7 +138,6 @@ def lift_struct_member(self, member: StructureMember, parent_struct_name: str = else: # if member is an embedded struct/union, the name is already available member_type = self._lifter.lift(member.type, name=member.name) - # This is still wrong for Pointers... return ComplexTypeMember(member_type.size, name=member.name, offset=member.offset, type=member_type) @abstractmethod