Skip to content

Commit

Permalink
Merge branch 'main' into issue-25-_Code_Generator_Simplify_Expressions
Browse files Browse the repository at this point in the history
  • Loading branch information
blattm authored Sep 21, 2023
2 parents 5f22eb1 + e1ff6e4 commit 9e1123d
Show file tree
Hide file tree
Showing 33 changed files with 1,508 additions and 112 deletions.
21 changes: 18 additions & 3 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
from ctypes import c_byte, c_int, c_long, c_short, c_ubyte, c_uint, c_ulong, c_ushort
from itertools import chain, repeat
from typing import Union

from decompiler.structures import pseudo as expressions
from decompiler.structures.pseudo import Float, Integer, OperationType, StringSymbol
from decompiler.structures.pseudo import Float, FunctionTypeDef, Integer, OperationType, Pointer, StringSymbol, Type
from decompiler.structures.pseudo import instructions as instructions
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface


Expand Down Expand Up @@ -65,6 +65,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
OperationType.greater_or_equal_us: ">=",
OperationType.dereference: "*",
OperationType.address: "&",
OperationType.member_access: ".",
# Handled in code
# OperationType.cast: "cast",
# OperationType.pointer: "point",
Expand Down Expand Up @@ -146,7 +147,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.low: "low",
OperationType.ternary: 30,
OperationType.call: 150,
OperationType.field: 150,
OperationType.member_access: 150,
OperationType.list_op: 10,
# TODO: Figure out what these are / how to handle this
# OperationType.adc: "adc",
Expand Down Expand Up @@ -180,6 +181,9 @@ def visit_list_operation(self, op: operations.ListOperation) -> str:

def visit_unary_operation(self, op: operations.UnaryOperation) -> str:
"""Return a string representation of the given unary operation (e.g. !a or &a)."""
if isinstance(op, MemberAccess):
operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation]
return f"{self.visit(op.struct_variable)}{operator_str}{op.member_name}"
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
if op.operation == OperationType.cast and op.contraction:
return f"({int(op.type.size / 8)}: ){operand}"
Expand Down Expand Up @@ -361,3 +365,14 @@ def _format_string_literal(constant: expressions.Constant) -> str:
escaped = string_representation.replace('"', '\\"')
return f'"{escaped}"'
return f"{constant}"

@staticmethod
def format_variables_declaration(var_type: Type, var_names: list[str]) -> str:
""" Return a string representation of variable declarations."""
match var_type:
case Pointer(type=FunctionTypeDef() as fun_type):
parameter_names = ", ".join(str(parameter) for parameter in fun_type.parameters)
declarations_without_return_type = [f"(* {var_name})({parameter_names})" for var_name in var_names]
return f"{fun_type.return_type} {', '.join(declarations_without_return_type)}"
case _:
return f"{var_type} {', '.join(var_names)}"
7 changes: 6 additions & 1 deletion decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from string import Template
from typing import Iterable, List

from decompiler.backend.cexpressiongenerator import CExpressionGenerator
from decompiler.backend.codevisitor import CodeVisitor
from decompiler.backend.variabledeclarations import GlobalDeclarationGenerator, LocalDeclarationGenerator
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -29,6 +30,7 @@ def generate(self, tasks: Iterable[DecompilerTask], run_cleanup: bool = True):
for task in tasks:
if run_cleanup and not task.failed:
task.syntax_tree.clean_up()
string_blocks.append(task.complex_types.declarations())
string_blocks.append(self.generate_function(task))
return "\n\n".join(string_blocks)

Expand All @@ -37,7 +39,10 @@ def generate_function(self, task: DecompilerTask) -> str:
return self.TEMPLATE.substitute(
return_type=task.function_return_type,
name=task.name,
parameters=", ".join(map(lambda param: f"{param.type} {param.name}", task.function_parameters)),
parameters=", ".join(map(
lambda param: CExpressionGenerator.format_variables_declaration(param.type, [param.name]),
task.function_parameters
)),
local_declarations=LocalDeclarationGenerator.from_task(task) if not task.failed else "",
function_body=CodeVisitor(task).visit(task.syntax_tree.root) if not task.failed else task.failure_message,
)
17 changes: 10 additions & 7 deletions decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections import defaultdict
from typing import Iterable, Iterator, List, Set

from decompiler.backend.cexpressiongenerator import CExpressionGenerator
from decompiler.structures.ast.ast_nodes import ForLoopNode, LoopNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo import (
Expand All @@ -17,6 +18,7 @@
UnaryOperation,
Variable,
)
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
from decompiler.util.serialization.bytes_serializer import convert_bytes
Expand Down Expand Up @@ -52,6 +54,8 @@ def visit_loop_node(self, node: LoopNode):

def visit_unary_operation(self, unary: UnaryOperation):
"""Visit unary operations to remember all variables those memory location was read."""
if isinstance(unary, MemberAccess):
self._variables.add(unary.struct_variable)
if unary.operation == OperationType.address or unary.operation == OperationType.dereference:
if isinstance(unary.operand, Variable):
self._variables.add(unary.operand)
Expand All @@ -61,19 +65,18 @@ def visit_unary_operation(self, unary: UnaryOperation):
else:
self.visit(unary.operand.left)

def generate(self, param_names: list = []) -> Iterator[str]:
def generate(self, param_names: list[str] = []) -> Iterator[str]:
"""Generate a string containing the variable definitions for the visited variables."""
variable_type_mapping = defaultdict(list)
for variable in sorted(self._variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable):
if not isinstance(variable, GlobalVariable) and variable.name not in param_names:
variable_type_mapping[variable.type].append(variable)

for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)):
for chunked_variables in self._chunks(variables, self._vars_per_line):
variable_names = ", ".join([var.name for var in chunked_variables])
if variable_names in param_names:
continue
yield f"{variable_type} {variable_names};"
yield CExpressionGenerator.format_variables_declaration(
variable_type,
[var.name for var in chunked_variables]
) + ";"

@staticmethod
def _chunks(lst: List, n: int) -> Iterator[List]:
Expand Down
17 changes: 9 additions & 8 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from __future__ import annotations

import logging
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union

from binaryninja import BinaryView, BinaryViewType, Function
from binaryninja import BinaryView, Function, load
from binaryninja.types import SymbolType
from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.pseudo.complextypes import ComplexTypeMap
from decompiler.structures.pseudo.expressions import Variable
from decompiler.structures.pseudo.typing import Type
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -112,7 +113,7 @@ def __init__(self, bv: BinaryView):
def from_path(cls, path: str, options: Options):
"""Create a frontend object by invoking binaryninja on the given sample."""
file_options = {"analysis.limits.maxFunctionSize": options.getint("binaryninja.max_function_size")}
if (bv := BinaryViewType.get_view_of_file_with_options(path, options=file_options)) is not None:
if (bv := load(path, options=file_options)) is not None:
return cls(bv)
raise RuntimeError("Failed to create binary view")

Expand All @@ -127,10 +128,10 @@ def create_task(self, function_identifier: Union[str, Function], options: Option
tagging = CompilerIdiomsTagging(self._bv, function.function.start, options)
tagging.run()
try:
cfg = self._extract_cfg(function.function, options)
cfg, complex_types = self._extract_cfg(function.function, options)
task = DecompilerTask(
function.name, cfg, function_return_type=function.return_type, function_parameters=function.params,
options=options
options=options, complex_types=complex_types
)
except Exception as e:
task = DecompilerTask(
Expand All @@ -154,9 +155,9 @@ def get_all_function_names(self):
functions.append(function.name)
return functions

def _extract_cfg(self, function: Function, options: Options) -> ControlFlowGraph:
def _extract_cfg(self, function: Function, options: Options) -> Tuple[ControlFlowGraph, ComplexTypeMap]:
"""Extract a control flow graph utilizing the parser and fixing it afterwards."""
report_threshold = options.getint("lifter.report_threshold", fallback=3)
no_masks = options.getboolean("lifter.no_bit_masks", fallback=True)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks), report_threshold)
return parser.parse(function)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks, bv=function.view), report_threshold)
return parser.parse(function), parser.complex_types
94 changes: 64 additions & 30 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Module implementing the AssignmentHandler for binaryninja."""
import logging
from functools import partial
from typing import Union

import binaryninja
from binaryninja import mediumlevelil
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import (
Assignment,
BinaryOperation,
Constant,
Expression,
GlobalVariable,
Integer,
Operation,
Expand All @@ -16,6 +18,8 @@
RegisterPair,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct, Union
from decompiler.structures.pseudo.operations import MemberAccess


class AssignmentHandler(Handler):
Expand All @@ -38,8 +42,8 @@ def register(self):
mediumlevelil.MediumLevelILVarAliasedField: partial(self.lift_get_field, is_aliased=True),
mediumlevelil.MediumLevelILStore: self.lift_store,
mediumlevelil.MediumLevelILStoreSsa: self.lift_store,
mediumlevelil.MediumLevelILStoreStruct: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStruct: self.lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self.lift_store_struct,
mediumlevelil.MediumLevelILLowPart: self._lift_mask_high,
}
)
Expand All @@ -54,16 +58,31 @@ def lift_assignment(self, assignment: mediumlevelil.MediumLevelILSetVar, is_alia
def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_aliased=False, **kwargs) -> Assignment:
"""
Lift an instruction writing to a subset of the given value.
In case of lower register (offset 0) lift as contraction
e.g. eax.al = .... <=> (char)eax ....
In case higher registers use masking
e.g. eax.ah = x <=> eax = (eax & 0xffff00ff) + (x << 2)
case 1: writing into struct member: book.title = value
lift as struct_member(book, title, writes_memory) = value
case 2: writing into lower register part (offset 0): eax.al = value
lift as contraction (char) eax = value
case 3: writing into higher register part: eax.ah = value
lift using bit masking eax = (eax & 0xffff00ff) + (value << 2)
"""
if assignment.offset == 0 and self._lifter.is_omitting_masks:
# case 1 (struct), avoid set field of named integers:
dest_type = self._lifter.lift(assignment.dest.type)
if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and not (
isinstance(dest_type, Pointer) and isinstance(dest_type.type, Integer)
):
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
elif assignment.offset == 0 and self._lifter.is_omitting_masks:
destination = self._lift_contraction(assignment, is_aliased=is_aliased, parent=assignment)
value = self._lifter.lift(assignment.src)
# case 3 (bit masking):
else:
destination = self._lifter.lift(assignment.dest, is_aliased=is_aliased, parent=assignment)
value = self._lift_masked_operand(assignment)
Expand All @@ -72,9 +91,16 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_aliased=False, **kwargs) -> Operation:
"""
Lift an instruction accessing a field from the outside.
e.g. x = eax.ah <=> x = eax & 0x0000ff00
case 1: struct member read access e.g. (x = )book.title
lift as (x = ) struct_member(book, title)
case 2: accessing register portion e.g. (x = )eax.ah
lift as (x = ) eax & 0x0000ff00
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
return BinaryOperation(
Expand All @@ -84,14 +110,30 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
)
return UnaryOperation(OperationType.cast, [source], vartype=cast_type, contraction=True)

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
return MemberAccess(
offset=instruction.offset,
member_name=member_name,
operands=[source],
)

def lift_store(self, assignment: mediumlevelil.MediumLevelILStoreSsa, **kwargs) -> Assignment:
"""Lift a store operation to pseudo (e.g. [ebp+4] = eax, or [global_var_label] = 25)."""
return Assignment(
self._lift_store_destination(assignment),
self._lifter.lift(assignment.src),
)

def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> Union[UnaryOperation, GlobalVariable]:
def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> UnaryOperation | GlobalVariable:
"""
Lift destination operand of store operation which is used for modelling both assignments of dereferences and global variables.
"""
Expand Down Expand Up @@ -167,24 +209,16 @@ def lift_split_assignment(self, assignment: mediumlevelil.MediumLevelILSetVarSpl
self._lifter.lift(assignment.src, parent=assignment),
)

def _lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
return Assignment(
UnaryOperation(
OperationType.dereference,
[
BinaryOperation(
OperationType.plus,
[
UnaryOperation(OperationType.cast, [self._lifter.lift(instruction.dest)], vartype=Pointer(Integer.char())),
Constant(instruction.offset),
],
vartype=vartype,
),
],
vartype=Pointer(vartype),
writes_memory=instruction.dest_memory
),
self._lifter.lift(instruction.src),
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
struct_member_access = MemberAccess(
member_name=vartype.type.members.get(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
writes_memory=instruction.dest_memory,
)
src = self._lifter.lift(instruction.src)
return Assignment(struct_member_access, src)
10 changes: 5 additions & 5 deletions decompiler/frontend/binaryninja/handlers/calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from functools import partial
from typing import List

from binaryninja import MediumLevelILInstruction, Tailcall, mediumlevelil
from binaryninja import FunctionType, PointerType, Tailcall, mediumlevelil
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import Assignment, Call, ImportedFunctionSymbol, IntrinsicSymbol, ListOperation

Expand Down Expand Up @@ -73,11 +73,11 @@ def lift_intrinsic(self, call: mediumlevelil.MediumLevelILIntrinsic, ssa: bool =

@staticmethod
def _lift_call_parameter_names(instruction: mediumlevelil.MediumLevelILCall) -> List[str]:
"""Lift parameter names of call from type string of instruction.dest.expr_type"""
if instruction.dest.expr_type is None:
"""Lift parameter names of call by iterating over the function parameters where the call is pointing to (if available)"""
if instruction.dest.expr_type is None or not isinstance(instruction.dest.expr_type, PointerType) or \
not isinstance(instruction.dest.expr_type.target, FunctionType):
return []
clean_type_string_of_parameters = instruction.dest.expr_type.get_string_after_name().strip("()")
return [type_parameter.rsplit(" ", 1)[-1] for type_parameter in clean_type_string_of_parameters.split(",")]
return [param.name for param in instruction.dest.expr_type.target.parameters]

@staticmethod
def _lift_syscall_parameter_names(instruction: mediumlevelil.MediumLevelILSyscall) -> List[str]:
Expand Down
Loading

0 comments on commit 9e1123d

Please sign in to comment.