Skip to content

Commit

Permalink
Merge branch 'main' into issue-256-False_control_structure_for_structs
Browse files Browse the repository at this point in the history
  • Loading branch information
steffenenders authored Oct 9, 2023
2 parents 5d93eed + 83b07e6 commit 0e42894
Show file tree
Hide file tree
Showing 17 changed files with 1,888 additions and 1,743 deletions.
24 changes: 2 additions & 22 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from ctypes import c_byte, c_int, c_long, c_short, c_ubyte, c_uint, c_ulong, c_ushort
from itertools import chain, repeat

from decompiler.structures import pseudo as expressions
Expand All @@ -8,6 +7,7 @@
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface
from decompiler.util.integer_util import normalize_int


class CExpressionGenerator(DataflowObjectVisitorInterface):
Expand Down Expand Up @@ -80,20 +80,6 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.adc: "adc",
}

SIGNED_FORMATS = {
8: lambda x: c_byte(x).value,
16: lambda x: c_short(x).value,
32: lambda x: c_int(x).value,
64: lambda x: c_long(x).value,
}

UNSIGNED_FORMATS = {
8: lambda x: c_ubyte(x).value,
16: lambda x: c_ushort(x).value,
32: lambda x: c_uint(x).value,
64: lambda x: c_ulong(x).value,
}

"""
Precedence used for correctly generating brackets.
Higher precedence is more tightly binding.
Expand Down Expand Up @@ -298,13 +284,7 @@ def _get_integer_literal_value(self, literal: expressions.Constant) -> int:
Return the right integer value for the given type, assuming that the
re-compilation host has the same sizes as the decompilation host.
"""
if literal.type.is_signed:
if handler := self.SIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
elif literal.value < 0:
if handler := self.UNSIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
return literal.value
return normalize_int(literal.value, literal.type.size, literal.type.is_signed)

@staticmethod
def _interpret_integer_literal_type(value: int) -> Integer:
Expand Down
8 changes: 8 additions & 0 deletions decompiler/frontend/binaryninja/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MediumLevelILBasicBlock,
MediumLevelILConstPtr,
MediumLevelILInstruction,
MediumLevelILJump,
MediumLevelILJumpTo,
MediumLevelILTailcallSsa,
RegisterValueType,
Expand All @@ -18,6 +19,7 @@
from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, FalseCase, IndirectEdge, SwitchCase, TrueCase, UnconditionalEdge
from decompiler.structures.pseudo import Constant, Instruction
from decompiler.structures.pseudo.complextypes import ComplexTypeMap
from decompiler.structures.pseudo.instructions import Comment


class BinaryninjaParser(Parser):
Expand Down Expand Up @@ -135,6 +137,10 @@ def _get_lookup_table(self, block: MediumLevelILBasicBlock) -> Dict[int, List[Co
lookup[target] += [Constant(value)]
return lookup

def _has_undetermined_jump(self, basic_block: MediumLevelILBasicBlock) -> bool:
"""Return True if basic-block is ending in a jump and has no outgoing edges"""
return bool(len(basic_block) and isinstance(basic_block[-1], MediumLevelILJump) and not basic_block.outgoing_edges)

def _lift_instructions(self, basic_block: MediumLevelILBasicBlock) -> Iterator[Instruction]:
"""Yield the lifted versions of all instructions in the given basic block."""
for instruction in basic_block:
Expand All @@ -144,6 +150,8 @@ def _lift_instructions(self, basic_block: MediumLevelILBasicBlock) -> Iterator[I
self._unlifted_instructions.append(instruction)
continue
yield lifted_instruction
if self._has_undetermined_jump(basic_block):
yield Comment("jump -> undetermined")

def _report_lifter_errors(self):
"""Report instructions which could not be lifted and reset their counter."""
Expand Down
1 change: 1 addition & 0 deletions decompiler/pipeline/controlflowanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .expression_simplification.stages import ExpressionSimplificationAst, ExpressionSimplificationCfg
from .instruction_length_handler import InstructionLengthHandler
from .loop_name_generator import LoopNameGenerator
from .readability_based_refinement import ReadabilityBasedRefinement
from .variable_name_generation import VariableNameGeneration
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Callable, Optional

from decompiler.structures.pseudo import Constant, Integer, OperationType
from decompiler.util.integer_util import normalize_int


def constant_fold(operation: OperationType, constants: list[Constant]) -> Constant:
Expand Down Expand Up @@ -103,27 +104,6 @@ def _constant_fold_shift(constants: list[Constant], fun: Callable[[int, int], in
)


def normalize_int(v: int, size: int, signed: bool) -> int:
"""
Normalizes an integer value to a specific size and signedness.
This function takes an integer value 'v' and normalizes it to fit within
the specified 'size' in bits by discarding overflowing bits. If 'signed' is
true, the value is treated as a signed integer, i.e. interpreted as a two's complement.
Therefore the return value will be negative iff 'signed' is true and the most-significant bit is set.
:param v: The value to be normalized.
:param size: The desired bit size for the normalized integer.
:param signed: True if the integer should be treated as signed.
:return: The normalized integer value.
"""
value = v & ((1 << size) - 1)
if signed and value & (1 << (size - 1)):
return value - (1 << size)
else:
return value


_OPERATION_TO_FOLD_FUNCTION: dict[OperationType, Callable[[list[Constant]], Constant]] = {
OperationType.minus: partial(_constant_fold_arithmetic_binary, fun=operator.sub),
OperationType.plus: partial(_constant_fold_arithmetic_binary, fun=operator.add),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from functools import reduce
from typing import Iterator

from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import constant_fold
from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import FOLDABLE_OPERATIONS, constant_fold
from decompiler.pipeline.controlflowanalysis.expression_simplification.rules.rule import SimplificationRule
from decompiler.structures.pseudo import Constant, Expression, Operation, OperationType, Type
from decompiler.structures.pseudo.operations import COMMUTATIVE_OPERATIONS

_COLLAPSIBLE_OPERATIONS = COMMUTATIVE_OPERATIONS & FOLDABLE_OPERATIONS

class CollapseNestedConstants(SimplificationRule):
"""
Expand All @@ -14,7 +15,7 @@ class CollapseNestedConstants(SimplificationRule):
This stage exploits associativity and is the only stage doing so. Therefore, it cannot be replaced by a combination of `TermOrder` and `CollapseConstants`.
"""
def apply(self, operation: Operation) -> list[tuple[Expression, Expression]]:
if operation.operation not in COMMUTATIVE_OPERATIONS:
if operation.operation not in _COLLAPSIBLE_OPERATIONS:
return []
if not isinstance(operation, Operation):
raise TypeError(f"Expected Operation, got {type(operation)}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import normalize_int
from decompiler.pipeline.controlflowanalysis.expression_simplification.rules.rule import SimplificationRule
from decompiler.structures.pseudo import BinaryOperation, Constant, Expression, Integer, Operation, OperationType
from decompiler.util.integer_util import normalize_int


class PositiveConstants(SimplificationRule):
Expand Down
123 changes: 123 additions & 0 deletions decompiler/pipeline/controlflowanalysis/loop_name_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from typing import List

from decompiler.pipeline.controlflowanalysis.loop_utility_methods import (
AstInstruction,
_find_continuation_instruction,
_get_variable_initialisation,
_requirement_without_reinitialization,
_single_defininition_reaches_node,
)
from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import LoopNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo import Assignment, Expression, Operation, Variable
from decompiler.task import DecompilerTask


class WhileLoopVariableRenamer:
"""Iterate over While-Loop Nodes and rename their counter variables to counter, counter1, ..."""

def __init__(self, ast: AbstractSyntaxTree):
self._ast = ast
self._variable_counter: int = 0

def rename(self):
"""
Iterate over While-Loop Nodes and rename their counter variables to counter, counter1, ...
Only rename counter variables that suffice the following conditions:
-> any variable x is used in the loop condition
-> variable x is set inside the loop body
-> single definition of variable x reaches loop entry (x is initialized/used only once)
"""

for loop_node in self._ast.get_while_loop_nodes_topological_order():
if loop_node.is_endless_loop:
continue
for condition_var in loop_node.get_required_variables(self._ast.condition_map):
if not (variable_init := _get_variable_initialisation(self._ast, condition_var)):
continue
if not _find_continuation_instruction(self._ast, loop_node, condition_var, renaming=True):
continue
if not _single_defininition_reaches_node(self._ast, variable_init, loop_node):
continue
self._replace_variables(loop_node, variable_init)
break

def _replace_variables(self, loop_node: LoopNode, variable_init: AstInstruction):
"""
Rename old variable usages to counter variable in:
- variable initialization
- condition/condition map
- loop body
Also add a copy instruction if the variable is used after the loop without reinitialization.
"""
new_variable = Variable(self._get_variable_name(), variable_init.instruction.destination.type)
self._ast.replace_variable_in_subtree(loop_node, variable_init.instruction.destination, new_variable)
if _requirement_without_reinitialization(self._ast, loop_node, variable_init.instruction.destination):
self._ast.add_instructions_after(loop_node, Assignment(variable_init.instruction.destination, new_variable))
variable_init.node.replace_variable(variable_init.instruction.destination, new_variable)

def _get_variable_name(self) -> str:
variable_name = f"counter{self._variable_counter if self._variable_counter > 0 else ''}"
self._variable_counter += 1
return variable_name


class ForLoopVariableRenamer:
"""Iterate over ForLoopNodes and rename their variables to i, j, ..., i1, j1, ..."""

def __init__(self, ast: AbstractSyntaxTree, candidates: list[str]):
self._ast = ast
self._iteration: int = 0
self._variable_counter: int = -1
self._candidates: list[str] = candidates

def rename(self):
"""
Iterate over ForLoopNodes and rename their variables to i, j, k, ...
We skip renaming for loops that are not initialized in its declaration.
"""
for loop_node in self._ast.get_for_loop_nodes_topological_order():
if not isinstance(loop_node.declaration, Assignment):
continue

old_variable: Variable = self._get_variable_from_assignment(loop_node.declaration.destination)
new_variable = Variable(self._get_variable_name(), old_variable.type, ssa_name=old_variable.ssa_name)
self._ast.replace_variable_in_subtree(loop_node, old_variable, new_variable)

if _requirement_without_reinitialization(self._ast, loop_node, old_variable):
self._ast.add_instructions_after(loop_node, Assignment(old_variable, new_variable))

def _get_variable_name(self) -> str:
"""Return variable names in the form of [i, j, ..., i1, j1, ...]"""
self._variable_counter += 1
if self._variable_counter >= len(self._candidates):
self._variable_counter = 0
self._iteration += 1
return f"{self._candidates[self._variable_counter]}{self._iteration if self._iteration > 0 else ''}"

def _get_variable_from_assignment(self, expr: Expression) -> Variable:
if isinstance(expr, Variable):
return expr
if isinstance(expr, Operation) and len(expr.operands) == 1:
return expr.operands[0]
raise ValueError("Did not expect a Constant/Unknown/Operation with more then 1 operand as a ForLoop declaration")


class LoopNameGenerator(PipelineStage):
"""
Stage which renames while/for-loops to custom names.
"""

name = "loop-name-generator"

def run(self, task: DecompilerTask):
rename_while_loops: bool = task.options.getboolean("loop-name-generator.rename_while_loop_variables", fallback=False)
for_loop_names: List[str] = task.options.getlist("loop-name-generator.for_loop_variable_names", fallback=[])

if rename_while_loops:
WhileLoopVariableRenamer(task._ast).rename()

if for_loop_names:
ForLoopVariableRenamer(task._ast, for_loop_names).rename()
Loading

0 comments on commit 0e42894

Please sign in to comment.