Skip to content

Commit

Permalink
Merge branch 'main' into issue-35-_Restructuring_Constructing_initial…
Browse files Browse the repository at this point in the history
…_switch
  • Loading branch information
steffenenders authored Feb 7, 2024
2 parents 92f6583 + e80b8ca commit 7c2cb43
Show file tree
Hide file tree
Showing 134 changed files with 1,492 additions and 615 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ RUN apt -y update && apt -y upgrade && apt install -y --no-install-recommends \
virtualenv \
unzip \
astyle \
graphviz \
# plotting ascii graphs for debug purposes
libgraph-easy-perl \
z3
Expand Down
1 change: 1 addition & 0 deletions __init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Main plugin file registering plugin commands in bianryninja."""

from logging import info, warning
from os.path import dirname, realpath
from sys import path
Expand Down
11 changes: 5 additions & 6 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def visit_list_operation(self, op: operations.ListOperation) -> str:

def visit_unary_operation(self, op: operations.UnaryOperation) -> str:
"""Return a string representation of the given unary operation (e.g. !a or &a)."""
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
if isinstance(op, MemberAccess):
operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation]
return f"{self.visit(op.struct_variable)}{operator_str}{op.member_name}"
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
return f"{operand}{operator_str}{op.member_name}"
if op.operation == OperationType.cast and op.contraction:
return f"({int(op.type.size / 8)}: ){operand}"
if op.operation == OperationType.cast:
Expand Down Expand Up @@ -209,10 +209,9 @@ def visit_call(self, op: operations.Call) -> str:
Generic labels starting with 'arg' e.g. 'arg1', 'arg2' are being filtered.
Additionally we filter ellipsis argument '...' that is lifted from type string.
"""
func_name = self.visit(op.function)
if isinstance(op.function, expressions.Constant):
func_name = func_name.strip('"')
output = f"{func_name}("
func_expr_str = self._visit_bracketed(op.function) if self._has_lower_precedence(op.function, op) else self.visit(op.function)

output = f"{func_expr_str}("
if op.meta_data is not None:
parameter_names = op.meta_data.get("param_names", [])
is_tailcall = op.meta_data.get("is_tailcall")
Expand Down
1 change: 1 addition & 0 deletions decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module in charge of bundling all classes utilized to generate c-code from an AST."""

from string import Template
from typing import Iterable, List

Expand Down
30 changes: 27 additions & 3 deletions decompiler/backend/codevisitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, task: DecompilerTask):
self._int_repr_scope: int = task.options.getint("code-generator.int_representation_scope", fallback=256)
self._neg_hex_as_twos_complement: bool = task.options.getboolean("code-generator.negative_hex_as_twos_complement", fallback=True)
self._aggressive_array_detection: bool = task.options.getboolean("code-generator.aggressive_array_detection", fallback=False)
self._preferred_true_branch: str = task.options.getstring("code-generator.preferred_true_branch", fallback="none")
self.task = task

def visit_seq_node(self, node: ast_nodes.SeqNode) -> str:
Expand Down Expand Up @@ -70,10 +71,33 @@ def visit_condition_node(self, node: ast_nodes.ConditionNode) -> str:
true_str = self.visit(node.true_branch_child)
if node.false_branch is None:
return f"if ({self._condition_string(node.condition)}) {{{true_str}}}"

false_str = self.visit(node.false_branch_child)
if isinstance(node.false_branch_child, ast_nodes.ConditionNode):
return f"if ({self._condition_string(node.condition)}){{{true_str}}} else {false_str}"
return f"if ({self._condition_string(node.condition)}){{{true_str}}} else{{{false_str}}}"

condition = node.condition
true_child = node.true_branch_child
false_child = node.false_branch_child

swap_branches = None

# if only one branch is a condition node, we want to decide swapping by which branch is a condition node
if isinstance(false_child, ast_nodes.ConditionNode) != isinstance(true_child, ast_nodes.ConditionNode):
swap_branches = not isinstance(false_child, ast_nodes.ConditionNode)

# if we haven't already decided on swapping (swap_branches is None), decide by length
if swap_branches is None:
length_comparisons = {"none": None, "smallest": len(true_str) > len(false_str), "largest": len(true_str) < len(false_str)}
swap_branches = length_comparisons[self._preferred_true_branch]

if swap_branches:
condition = ~condition
true_str, false_str = false_str, true_str
true_child, false_child = false_child, true_child

if isinstance(false_child, ast_nodes.ConditionNode):
return f"if ({self._condition_string(condition)}) {{{true_str}}} else {false_str}"
else:
return f"if ({self._condition_string(condition)}) {{{true_str}}} else {{{false_str}}}"

def visit_true_node(self, node: ast_nodes.TrueNode) -> str:
"""Generate code for the given TrueNode by evaluating its child (Wrapper)."""
Expand Down
148 changes: 62 additions & 86 deletions decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,55 @@
"""Module containing the visitors used to generate variable declarations."""

from collections import defaultdict
from typing import Iterable, Iterator, List, Set
from typing import Iterable, Iterator, List

from decompiler.backend.cexpressiongenerator import CExpressionGenerator
from decompiler.structures.ast.ast_nodes import ForLoopNode, LoopNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo import (
Assignment,
BinaryOperation,
Constant,
DataflowObject,
Expression,
ExternConstant,
ExternFunctionPointer,
GlobalVariable,
Operation,
OperationType,
Pointer,
UnaryOperation,
Variable,
)
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
from decompiler.util.insertion_ordered_set import InsertionOrderedSet
from decompiler.util.serialization.bytes_serializer import convert_bytes


class LocalDeclarationGenerator(BaseAstDataflowObjectVisitor):
"""Visits all nodes in the AST and produces the variable declarations."""

def __init__(self, vars_per_line: int = 1):
"""Initialize a new VariableCollector with an empty set of variables."""
self._variables: Set[Variable] = set()
self._vars_per_line: int = vars_per_line

@classmethod
def from_task(cls, task: DecompilerTask):
"""Class method for shorthand usage."""
param_names = list(param.name for param in task.function_parameters)
generator = cls(task.options.getint("code-generator.variable_declarations_per_line", fallback=1))
generator.visit_ast(task.syntax_tree)
return "\n".join(generator.generate(param_names))

def visit_assignment(self, instruction: Assignment):
"""Remember all defined variables."""
self._variables.update(instruction.definitions)

def visit_loop_node(self, node: LoopNode):
"""Visit the given loop node, taking node of the loop declaration."""
if isinstance(node, ForLoopNode) and isinstance(node.declaration, Assignment):
if isinstance(node.declaration.destination, Operation):
self._variables.add(node.declaration.destination[0])
else:
self._variables.add(node.declaration.destination)

def visit_unary_operation(self, unary: UnaryOperation):
"""Visit unary operations to remember all variables those memory location was read."""
if isinstance(unary, MemberAccess):
self._variables.add(unary.struct_variable)
if unary.operation == OperationType.address or unary.operation == OperationType.dereference:
if isinstance(unary.operand, Variable):
self._variables.add(unary.operand)
elif isinstance(unary.operand, BinaryOperation):
if isinstance(unary.operand.left, Variable):
self._variables.add(unary.operand.left)
else:
self.visit(unary.operand.left)

def generate(self, param_names: list[str] = []) -> Iterator[str]:
class LocalDeclarationGenerator:
@staticmethod
def from_task(task: DecompilerTask):
vars_per_line = task.options.getint("code-generator.variable_declarations_per_line", fallback=1)

parameter_names = {p.name for p in task.function_parameters}
variables = InsertionOrderedSet(LocalDeclarationGenerator._get_variables(task.syntax_tree))

return "\n".join(LocalDeclarationGenerator.generate(parameter_names, variables, vars_per_line))

@staticmethod
def _get_variables(ast: AbstractSyntaxTree) -> Iterator[Variable]:
for node in ast.nodes:
for obj in node.get_dataflow_objets(ast.condition_map):
for expression in obj.subexpressions():
if isinstance(expression, Variable):
yield expression

@staticmethod
def generate(parameter_names: Iterable[str], variables: Iterable[Variable], vars_per_line: int) -> Iterator[str]:
"""Generate a string containing the variable definitions for the visited variables."""

variable_type_mapping = defaultdict(list)
for variable in sorted(self._variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable) and variable.name not in param_names:
for variable in sorted(variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable) and variable.name not in parameter_names:
variable_type_mapping[variable.type].append(variable)

for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)):
for chunked_variables in self._chunks(variables, self._vars_per_line):
for chunked_variables in LocalDeclarationGenerator._chunks(variables, vars_per_line):
yield CExpressionGenerator.format_variables_declaration(variable_type, [var.name for var in chunked_variables]) + ";"

@staticmethod
Expand All @@ -83,44 +60,43 @@ def _chunks(lst: List, n: int) -> Iterator[List]:


class GlobalDeclarationGenerator(BaseAstDataflowObjectVisitor):
"""Visits all nodes in the AST and produces the declarations of global variables."""
@staticmethod
def from_asts(asts: Iterable[AbstractSyntaxTree]) -> str:
global_variables, extern_constants = GlobalDeclarationGenerator._get_global_variables_and_constants(asts)
return "\n".join(GlobalDeclarationGenerator.generate(global_variables.__iter__(), extern_constants))

def __init__(self):
"""Generate a new declarator with an empty sets of visited globals."""
self._extern_constants: Set[ExternConstant] = set()
self._global_variables: Set[GlobalVariable] = set()
@staticmethod
def _get_global_variables_and_constants(asts: Iterable[AbstractSyntaxTree]) -> tuple[set[GlobalVariable], set[ExternConstant]]:
global_variables = InsertionOrderedSet()
extern_constants = InsertionOrderedSet()

# if this gets more complex, a visitor pattern should perhaps be used instead
def handle_obj(obj: DataflowObject):
match obj:
case GlobalVariable():
global_variables.add(obj)
if isinstance(obj.initial_value, Expression):
for subexpression in obj.initial_value.subexpressions():
handle_obj(subexpression)

case ExternConstant():
extern_constants.add(obj)

@classmethod
def from_asts(cls, asts: Iterable[AbstractSyntaxTree]) -> str:
"""Class method for shorthand usage."""
generator = cls()
for ast in asts:
generator.visit_ast(ast)
return "\n".join(generator.generate())
for node in ast.nodes:
for obj in node.get_dataflow_objets(ast.condition_map):
for expression in obj.subexpressions():
handle_obj(expression)

def generate(self) -> Iterator[str]:
"""Generate a string containing the variable definitions for the visited variables."""
for variable in self._global_variables:
yield f"extern {variable.type} {variable.name} = {self.get_initial_value(variable)};"
for constant in sorted(self._extern_constants, key=lambda x: x.value):
yield f"extern {constant.type} {constant.value};"
return global_variables, extern_constants

def visit_unary_operation(self, unary: UnaryOperation):
"""Visit an unary operation, visiting variable operands and nested operations along the way."""
if isinstance(unary.operand, UnaryOperation) or isinstance(unary.operand, Variable):
self.visit(unary.operand)

def visit_variable(self, expression: Variable):
"""Visit the given variable, remembering all visited global Variables."""
if isinstance(expression, GlobalVariable):
self._global_variables.add(expression)
if isinstance(expression.initial_value, UnaryOperation):
self.visit(expression.initial_value)

def visit_constant(self, expression: Constant):
"""Visit the given constant, checking if it has been defined externally."""
if isinstance(expression, ExternConstant):
self._extern_constants.add(expression)
@staticmethod
def generate(global_variables: Iterable[GlobalVariable], extern_constants: Iterable[ExternConstant]) -> Iterator[str]:
"""Generate all definitions"""
for variable in global_variables:
yield f"extern {variable.type} {variable.name} = {GlobalDeclarationGenerator.get_initial_value(variable)};"
for constant in sorted(extern_constants, key=lambda x: x.value):
yield f"extern {constant.type} {constant.value};"

@staticmethod
def get_initial_value(variable: GlobalVariable) -> str:
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""module for anything pipeline related."""

from .binaryninja.frontend import BinaryninjaFrontend
from .frontend import Frontend
from .lifter import Lifter
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Class implementing the main binaryninja frontend interface."""

from __future__ import annotations

import logging
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Main module containing all binaryninja handlers."""

from .assignments import AssignmentHandler
from .binary import BinaryOperationHandler
from .calls import CallHandler
Expand Down
21 changes: 8 additions & 13 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the AssignmentHandler for binaryninja."""

import logging
from functools import partial

Expand Down Expand Up @@ -73,9 +74,9 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
member_name=struct_variable.type.get_member_name_by_offset(assignment.offset),
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
writes_memory=assignment.dest.version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
Expand All @@ -99,7 +100,7 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
if isinstance(source.type, Struct) or isinstance(source.type, Class) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
Expand All @@ -112,11 +113,11 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
if isinstance(source.type, Struct) or isinstance(source.type, Class):
member_name = source.type.get_member_name_by_offset(instruction.offset)
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
member_name = source.type.get_member_name_by_type(parent_type)
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
Expand Down Expand Up @@ -213,14 +214,8 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct,
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
member = vartype.type.get_member_by_offset(instruction.offset)
if member is not None:
name = member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
struct_member_access = MemberAccess(
member_name=name,
member_name=vartype.type.get_member_name_by_offset(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/binary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the handler for binaryninja's binary operations."""

from functools import partial

from binaryninja import MediumLevelILInstruction, mediumlevelil
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/calls.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the binaryninja CallHandler."""

from functools import partial
from typing import List

Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/conditions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the ConditionHandler class."""

from functools import partial

from binaryninja import mediumlevelil
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the ConstantHandler for the binaryninja frontend."""

import math

from binaryninja import BinaryView, DataVariable, SectionSemantics, SymbolType, Type, mediumlevelil
Expand Down
1 change: 1 addition & 0 deletions decompiler/frontend/binaryninja/handlers/controlflow.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the ConditionHandler class."""

from binaryninja import mediumlevelil
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import Branch, Condition, Constant, IndirectBranch, OperationType, Return
Expand Down
Loading

0 comments on commit 7c2cb43

Please sign in to comment.