Skip to content

Commit

Permalink
Add init enum type support
Browse files Browse the repository at this point in the history
  • Loading branch information
mari-mari committed Jul 21, 2023
1 parent 4a58927 commit 13d274b
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 36 deletions.
2 changes: 2 additions & 0 deletions decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def generate(self, tasks: Iterable[DecompilerTask], run_cleanup: bool = True):
for task in tasks:
if run_cleanup and not task.failed:
task.syntax_tree.clean_up()
# TODO change this in task
string_blocks.append("\n"+task._complex_types.declarations())
string_blocks.append(self.generate_function(task))
return "\n\n".join(string_blocks)

Expand Down
5 changes: 4 additions & 1 deletion decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
UnaryOperation,
Variable,
)
from decompiler.structures.pseudo.operations import StructMemberAccess
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
from decompiler.util.serialization.bytes_serializer import convert_bytes
Expand All @@ -41,6 +42,9 @@ def from_task(cls, task: DecompilerTask):
def visit_assignment(self, instruction: Assignment):
"""Remember all defined variables."""
self._variables.update(instruction.definitions)
# TODO is there a better way? Should structs be in assignment definitions?
if isinstance(instruction.destination, StructMemberAccess):
self._variables.update([instruction.destination.struct_variable])

def visit_loop_node(self, node: LoopNode):
"""Visit the given loop node, taking node of the loop declaration."""
Expand All @@ -67,7 +71,6 @@ def generate(self, param_names: list = []) -> Iterator[str]:
for variable in sorted(self._variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable):
variable_type_mapping[variable.type].append(variable)

for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)):
for chunked_variables in self._chunks(variables, self._vars_per_line):
variable_names = ", ".join([var.name for var in chunked_variables])
Expand Down
9 changes: 5 additions & 4 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

import logging
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union

from binaryninja import BinaryView, BinaryViewType, Function
from binaryninja.types import SymbolType
Expand All @@ -12,6 +12,7 @@
from decompiler.task import DecompilerTask
from decompiler.util.options import Options

from ...structures.pseudo.complextypes import ComplexTypeMap
from ..frontend import Frontend
from .lifter import BinaryninjaLifter
from .parser import BinaryninjaParser
Expand Down Expand Up @@ -127,10 +128,10 @@ def create_task(self, function_identifier: Union[str, Function], options: Option
tagging = CompilerIdiomsTagging(self._bv, function.function.start, options)
tagging.run()
try:
cfg = self._extract_cfg(function.function, options)
cfg, complex_types = self._extract_cfg(function.function, options)
task = DecompilerTask(
function.name, cfg, function_return_type=function.return_type, function_parameters=function.params,
options=options
options=options, complex_types=complex_types
)
except Exception as e:
task = DecompilerTask(
Expand All @@ -154,7 +155,7 @@ def get_all_function_names(self):
functions.append(function.name)
return functions

def _extract_cfg(self, function: Function, options: Options) -> ControlFlowGraph:
def _extract_cfg(self, function: Function, options: Options) -> Tuple[ControlFlowGraph, ComplexTypeMap]:
"""Extract a control flow graph utilizing the parser and fixing it afterwards."""
report_threshold = options.getint("lifter.report_threshold", fallback=3)
no_masks = options.getboolean("lifter.no_bit_masks", fallback=True)
Expand Down
52 changes: 34 additions & 18 deletions decompiler/frontend/binaryninja/handlers/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ArrayType,
BoolType,
CharType,
EnumerationMember,
EnumerationType,
FloatType,
FunctionType,
Expand All @@ -20,7 +21,7 @@
)
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import CustomType, Float, FunctionTypeDef, Integer, Pointer, UnknownType, Variable
from decompiler.structures.pseudo.complextypes import ComplexTypeMember, ComplexTypeName, Struct
from decompiler.structures.pseudo.complextypes import ComplexTypeMember, ComplexTypeName, Enum, Struct
from decompiler.structures.pseudo.complextypes import Union as Union_


Expand All @@ -38,11 +39,12 @@ def register(self):
VoidType: self.lift_void,
CharType: self.lift_integer,
WideCharType: self.lift_custom,
NamedTypeReferenceType: self.lift_custom,
NamedTypeReferenceType: self.lift_named_type_reference_type,
StructureType: self.lift_struct,
StructureMember: self.lift_struct_member,
FunctionType: self.lift_function_type,
EnumerationType: self.lift_custom,
EnumerationType: self.lift_enum,
EnumerationMember: self.lift_enum_member,
type(None): self.lift_none,
}
)
Expand All @@ -53,31 +55,48 @@ def lift_none(self, _: None, **kwargs):

def lift_custom(self, custom: Type, **kwargs) -> CustomType:
"""Lift custom types such as structs as a custom type."""
# TODO split lifting custom from lifting namedtypereferencetype
return CustomType(str(custom), custom.width * self.BYTE_SIZE)

def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwargs) -> Union[Type, CustomType]:
"""Lift a special type that binary ninja uses as placeholder for references on complex types like structs, unions, etc.
Binja does not attach complex types to expressions, but this type instead that barely holds infos about name of the
corresponding complex type.
We try to retrieve the original complex type from binary view using this placeholder type, and lift it correspondingly.
"""
view: BinaryView = self._lifter.bv
if isinstance(custom, NamedTypeReferenceType) and (defined_type := view.get_type_by_name(custom.name)):
if defined_type := view.get_type_by_name(custom.name): # actually should always be the case
return self._lifter.lift(defined_type, **kwargs)
logging.warning(f"NamedTypeReferenceType {custom} was not found in binary view types.")
return CustomType(str(custom), custom.width * self.BYTE_SIZE)

# def lift_union(self, union):
# logging.error("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
# return CustomType(str(union), union.width * self.BYTE_SIZE)

def lift_struct(self, struct: StructureType, name=None, incomplete=False, **kwargs) -> Union[Struct, ComplexTypeName]:
"""Lift struct type."""
def lift_enum(self, binja_enum: EnumerationType, **kwargs) -> Enum:
"""Lift enum type."""
# TODO better way to get enum name
enum = Enum(0, binja_enum.get_string().split("enum ")[1], {})
for member in binja_enum.members:
enum.add_member(self._lifter.lift(member))
self._lifter.complex_types.add(enum)
return enum

def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexTypeMember:
"""Lift enum member type."""
# TODO enum constant type is always int in Binja
return ComplexTypeMember(size=0, name=enum_member.name, offset=-1, type=Integer(32), value=self._lifter.lift(enum_member.value))

def lift_struct(self, struct: StructureType, name=None, **kwargs) -> Union[Struct, ComplexTypeName]:
"""Lift struct or union type."""
# TODO better way to get the name
# TODO type width?
# TODO type width for size?
if name:
struct_name = name
else:
struct_name = self._get_data_type_name(struct)
lifted_struct = None
if struct.type == StructureVariant.StructStructureType:
lifted_struct = Struct(0, struct_name, {})
elif struct.type == StructureVariant.UnionStructureType:
lifted_struct = Union_(0, struct_name, [])
else:
raise RuntimeError(f"Unk struct type {struct.type.name}")
raise RuntimeError(f"Unknown struct type {struct.type.name}")
for m in struct.members:
member = self.lift_struct_member(m, struct_name)
lifted_struct.add_member(member)
Expand All @@ -92,8 +111,6 @@ def _get_data_type_name(self, struct: StructureType):
return string

def lift_struct_member(self, member: StructureMember, parent_struct_name: str = None) -> ComplexTypeMember:
member_type = None

# handle the case when struct member is a pointer on the same struct
if (
isinstance(member.type, PointerType)
Expand All @@ -104,8 +121,7 @@ def lift_struct_member(self, member: StructureMember, parent_struct_name: str =
member_type = Pointer(ComplexTypeName(0, member_struct_name))

else:
# logging.error(f"Parent: {parent_struct_name}")
# logging.error(f"Member {member}")
# if member is an embedded struct/union, the name is already available
member_type = self._lifter.lift(member.type, name=member.name)
return ComplexTypeMember(0, name=member.name, offset=member.offset, type=member_type)

Expand Down
9 changes: 5 additions & 4 deletions decompiler/frontend/binaryninja/parser.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Implements the parser for the binaryninja frontend."""
from logging import info, warning
from typing import Dict, Iterator, List
from typing import Dict, Iterator, List, Tuple

from binaryninja import BranchType, Function, MediumLevelILBasicBlock, MediumLevelILInstruction, MediumLevelILJumpTo, RegisterValueType
from decompiler.frontend.lifter import Lifter
from decompiler.frontend.parser import Parser
from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, FalseCase, IndirectEdge, SwitchCase, TrueCase, UnconditionalEdge
from decompiler.structures.pseudo import Constant, Instruction
from decompiler.structures.pseudo.complextypes import ComplexTypeMap


class BinaryninjaParser(Parser):
Expand All @@ -26,7 +27,7 @@ def __init__(self, lifter: Lifter, report_threshold: int = 3):
self._unlifted_instructions: List[MediumLevelILInstruction] = []
self._report_threshold = int(report_threshold)

def parse(self, function: Function) -> ControlFlowGraph:
def parse(self, function: Function) -> Tuple[ControlFlowGraph, ComplexTypeMap]:
"""Generate a cfg from the given function."""
cfg = ControlFlowGraph()
index_to_BasicBlock = dict()
Expand All @@ -35,9 +36,9 @@ def parse(self, function: Function) -> ControlFlowGraph:
cfg.add_node(index_to_BasicBlock[basic_block.index])
for basic_block in function.medium_level_il.ssa_form:
self._add_basic_block_edges(cfg, index_to_BasicBlock, basic_block)
complex_types = self._lifter.complex_types
self._report_lifter_errors()
self._lifter.complex_types.pretty_print()
return cfg
return cfg, complex_types

def _add_basic_block_edges(self, cfg: ControlFlowGraph, vertices: dict, basic_block: MediumLevelILBasicBlock) -> None:
"""Add all outgoing edges of the given basic block to the given cfg."""
Expand Down
6 changes: 3 additions & 3 deletions decompiler/frontend/lifter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
class Lifter(ABC):
"""Represents a basic lifter emmiting decompiler IR."""

def __init__(self):
self.complex_types = None

@abstractmethod
def lift(self, expression, **kwargs) -> Expression:
"""Lift the given expression to pseudo IR."""
Expand All @@ -22,9 +25,6 @@ class ObserverLifter(Lifter):

HANDLERS: Dict[Type[T], Callable[[T], V]] = {}

def __init__(self):
self.complex_types = {}

def lift(self, expression: T, **kwargs) -> V:
"""Lift the given expression based on the registered handlers."""
handler = self.HANDLERS.get(type(expression), self.lift_unknown)
Expand Down
1 change: 1 addition & 0 deletions decompiler/structures/pseudo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .complextypes import ComplexType, ComplexTypeMember, ComplexTypeName, Enum, Struct, Union
from .delogic_logic import DelogicConverter
from .expressions import (
Constant,
Expand Down
31 changes: 26 additions & 5 deletions decompiler/structures/pseudo/complextypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from abc import ABC
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List
from typing import Dict, List, Optional

from decompiler.structures.pseudo.typing import Type

Expand All @@ -29,10 +29,12 @@ class ComplexTypeMember(ComplexType):
@param name: name of the struct member
@param offset: offset of the member within the struct
@param type: datatype of the member
@param value: initial value of the member, enums only
"""
name: str
offset: int
type: Type
value: Optional[int] = None

def __str__(self) -> str:
return f"{self.name}"
Expand Down Expand Up @@ -61,8 +63,8 @@ def get_member_by_offset(self, offset: int) -> ComplexTypeMember:
return self.members.get(offset)

def declaration(self):
members = ",\n\t".join(x.declaration() for x in self.members.values())
return f"{self.type_specifier.value} {{\n\t{members}\n}} {self.name};"
members = ";\n\t".join(x.declaration() for x in self.members.values())+";"
return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}};"


@dataclass(frozen=True, order=True)
Expand All @@ -77,8 +79,24 @@ def add_member_(self, name: str, type_: Type):
self.members.append(ComplexTypeMember(name, 0, type_))

def declaration(self):
members = ",\n\t".join(x.declaration() for x in self.members)
return f"{self.type_specifier.value} {{\n\t{members}\n}} {self.name} ;"
members = ";\n\t".join(x.declaration() for x in self.members)
return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}};"


@dataclass(frozen=True, order=True)
class Enum(ComplexType):
members: Dict[int, ComplexTypeMember] = field(compare=False)
type_specifier = ComplexTypeSpecifier.ENUM

def add_member(self, member: ComplexTypeMember):
self.members[member.value] = member

def get_name_by_value(self, value: int) -> str:
return self.members.get(value).name

def declaration(self):
members = ",\n\t".join(f"{x.name} = {x.value}" for x in self.members.values())
return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}};"


@dataclass(frozen=True, order=True)
Expand Down Expand Up @@ -107,3 +125,6 @@ def add(self, complex_type: Struct):
def pretty_print(self):
for t in self._name_to_type_map.values():
logging.error(t.declaration())

def declarations(self) -> str:
return "\n".join(t.declaration() for t in self._name_to_type_map.values())
7 changes: 6 additions & 1 deletion decompiler/structures/pseudo/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, Generic, Iterator, List, Optional, Tuple, TypeVar, Union

from .complextypes import Enum
from .typing import CustomType, Type, UnknownType

T = TypeVar("T")
Expand Down Expand Up @@ -186,7 +187,11 @@ def __repr__(self) -> str:
return f"{value} type: {self.type}"

def __str__(self) -> str:
"""Return a hex-based string representation for integers, strings are printed with double quotation marks"""
"""Return a hex-based string representation for integers, strings are printed with double quotation marks.
Constants of type Enum are represented as strings (corresponding enumerator identifiers).
"""
if isinstance(self._type, Enum):
return self._type.get_name_by_value(self.value)
if self._type.is_boolean:
return "true" if self.value else "false"
if isinstance(self.value, str):
Expand Down

0 comments on commit 13d274b

Please sign in to comment.