Skip to content

Commit

Permalink
Merge branch 'main' into ComplexType_bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
blattm authored Oct 19, 2023
2 parents e0facea + 9c77091 commit 1e87a0b
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 119 deletions.
24 changes: 2 additions & 22 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from ctypes import c_byte, c_int, c_long, c_short, c_ubyte, c_uint, c_ulong, c_ushort
from itertools import chain, repeat

from decompiler.structures import pseudo as expressions
Expand All @@ -8,6 +7,7 @@
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface
from decompiler.util.integer_util import normalize_int


class CExpressionGenerator(DataflowObjectVisitorInterface):
Expand Down Expand Up @@ -80,20 +80,6 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.adc: "adc",
}

SIGNED_FORMATS = {
8: lambda x: c_byte(x).value,
16: lambda x: c_short(x).value,
32: lambda x: c_int(x).value,
64: lambda x: c_long(x).value,
}

UNSIGNED_FORMATS = {
8: lambda x: c_ubyte(x).value,
16: lambda x: c_ushort(x).value,
32: lambda x: c_uint(x).value,
64: lambda x: c_ulong(x).value,
}

"""
Precedence used for correctly generating brackets.
Higher precedence is more tightly binding.
Expand Down Expand Up @@ -298,13 +284,7 @@ def _get_integer_literal_value(self, literal: expressions.Constant) -> int:
Return the right integer value for the given type, assuming that the
re-compilation host has the same sizes as the decompilation host.
"""
if literal.type.is_signed:
if handler := self.SIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
elif literal.value < 0:
if handler := self.UNSIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
return literal.value
return normalize_int(literal.value, literal.type.size, literal.type.is_signed)

@staticmethod
def _interpret_integer_literal_type(value: int) -> Integer:
Expand Down
13 changes: 8 additions & 5 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,21 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
case 1: struct member read access e.g. (x = )book.title
lift as (x = ) struct_member(book, title)
case 2: accessing register portion e.g. (x = )eax.ah
lift as (x = ) eax & 0x0000ff00
lift as (x = ) (uint8_t)(eax >> 8)
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
return BinaryOperation(
OperationType.bitwise_and,
[source, Constant(self._get_all_ones_mask_for_type(instruction.size) << instruction.offset)],
vartype=cast_type,
return UnaryOperation(
OperationType.cast,
[BinaryOperation(
OperationType.right_shift_us,
[source, Constant(instruction.offset, Integer.int32_t())]
)],
cast_type
)
return UnaryOperation(OperationType.cast, [source], vartype=cast_type, contraction=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Callable, Optional

from decompiler.structures.pseudo import Constant, Integer, OperationType
from decompiler.util.integer_util import normalize_int


def constant_fold(operation: OperationType, constants: list[Constant]) -> Constant:
Expand Down Expand Up @@ -103,27 +104,6 @@ def _constant_fold_shift(constants: list[Constant], fun: Callable[[int, int], in
)


def normalize_int(v: int, size: int, signed: bool) -> int:
"""
Normalizes an integer value to a specific size and signedness.
This function takes an integer value 'v' and normalizes it to fit within
the specified 'size' in bits by discarding overflowing bits. If 'signed' is
true, the value is treated as a signed integer, i.e. interpreted as a two's complement.
Therefore the return value will be negative iff 'signed' is true and the most-significant bit is set.
:param v: The value to be normalized.
:param size: The desired bit size for the normalized integer.
:param signed: True if the integer should be treated as signed.
:return: The normalized integer value.
"""
value = v & ((1 << size) - 1)
if signed and value & (1 << (size - 1)):
return value - (1 << size)
else:
return value


_OPERATION_TO_FOLD_FUNCTION: dict[OperationType, Callable[[list[Constant]], Constant]] = {
OperationType.minus: partial(_constant_fold_arithmetic_binary, fun=operator.sub),
OperationType.plus: partial(_constant_fold_arithmetic_binary, fun=operator.add),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import normalize_int
from decompiler.pipeline.controlflowanalysis.expression_simplification.rules.rule import SimplificationRule
from decompiler.structures.pseudo import BinaryOperation, Constant, Expression, Integer, Operation, OperationType
from decompiler.util.integer_util import normalize_int


class PositiveConstants(SimplificationRule):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def _get_potential_guarded_do_while_loops(ast: AbstractSyntaxTree) -> tuple(Unio


def remove_guarded_do_while(ast: AbstractSyntaxTree):
""" Removes a if statement which guards a do-while loop/while loop when:
-> there is nothing in between the if-node and the do-while-node/while-node
-> the if-node has only one branch (true branch)
-> the condition of the branch is the same as the condition of the do-while-node
Replacement is a WhileLoop, otherwise the control flow would not be correct
"""Removes a if statement which guards a do-while loop/while loop when:
-> there is nothing in between the if-node and the do-while-node/while-node
-> the if-node has only one branch (true branch)
-> the condition of the branch is the same as the condition of the do-while-node
Replacement is a WhileLoop, otherwise the control flow would not be correct
"""
for do_while_node, condition_node in _get_potential_guarded_do_while_loops(ast):
if condition_node.false_branch:
Expand All @@ -43,40 +43,51 @@ def remove_guarded_do_while(ast: AbstractSyntaxTree):

class WhileLoopReplacer:
"""Convert WhileLoopNodes to ForLoopNodes depending on the configuration.
-> keep_empty_for_loops will keep empty for-loops in the code
-> force_for_loops will transform every while-loop into a for-loop, worst case with empty declaration/modification statement
-> forbidden_condition_types_in_simple_for_loops will not transform trivial for-loop candidates (with only one condition) into for-loops
if the operator matches one of the forbidden operator list
-> max_condition_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the condition complexity is
less/equal then the threshold
-> max_modification_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the modification complexity is
less/equal then the threshold
-> keep_empty_for_loops will keep empty for-loops in the code
-> force_for_loops will transform every while-loop into a for-loop, worst case with empty declaration/modification statement
-> forbidden_condition_types_in_simple_for_loops will not transform trivial for-loop candidates (with only one condition) into for-loops
if the operator matches one of the forbidden operator list
-> max_condition_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the condition complexity is
less/equal then the threshold
-> max_modification_complexity_for_loop_recovery will transform for-loop candidates only into for-loops if the modification complexity is
less/equal then the threshold
"""

def __init__(self, ast: AbstractSyntaxTree, options: Options):
self._ast = ast
self._restructure_for_loops = options.getboolean("readability-based-refinement.restructure_for_loops", fallback=True)
self._keep_empty_for_loops = options.getboolean("readability-based-refinement.keep_empty_for_loops", fallback=False)
self._hide_non_init_decl = options.getboolean("readability-based-refinement.hide_non_initializing_declaration", fallback=False)
self._force_for_loops = options.getboolean("readability-based-refinement.force_for_loops", fallback=False)
self._forbidden_condition_types = options.getlist("readability-based-refinement.forbidden_condition_types_in_simple_for_loops", fallback=[])
self._condition_max_complexity = options.getint("readability-based-refinement.max_condition_complexity_for_loop_recovery", fallback=100)
self._modification_max_complexity = options.getint("readability-based-refinement.max_modification_complexity_for_loop_recovery", fallback=100)
self._forbidden_condition_types = options.getlist(
"readability-based-refinement.forbidden_condition_types_in_simple_for_loops", fallback=[]
)
self._condition_max_complexity = options.getint(
"readability-based-refinement.max_condition_complexity_for_loop_recovery", fallback=100
)
self._modification_max_complexity = options.getint(
"readability-based-refinement.max_modification_complexity_for_loop_recovery", fallback=100
)

def run(self):
"""For each WhileLoop in AST check the following conditions:
-> any variable in loop condition has a valid continuation instruction in loop body
-> variable is initialized
-> loop condition complexity < condition complexity
-> loop condition complexity < condition complexity
-> possible modification complexity < modification complexity
-> if condition is only a symbol: check condition type for allowed one
If 'force_for_loops' is enabled, the complexity options are ignored and every while loop after the
initial transformation will be forced into a for loop with an empty declaration/modification
"""
If 'force_for_loops' is enabled, the complexity options are ignored and every while loop after the
initial transformation will be forced into a for loop with an empty declaration/modification
"""
if not self._restructure_for_loops:
return
for loop_node in list(self._ast.get_while_loop_nodes_topological_order()):
if loop_node.is_endless_loop or (not self._keep_empty_for_loops and _is_single_instruction_loop_node(loop_node)) \
or self._invalid_simple_for_loop_condition_type(loop_node.condition):
if (
loop_node.is_endless_loop
or (not self._keep_empty_for_loops and _is_single_instruction_loop_node(loop_node))
or self._invalid_simple_for_loop_condition_type(loop_node.condition)
):
continue

if any(node.does_end_with_continue for node in loop_node.body.get_descendant_code_nodes_interrupting_ancestor_loop()):
Expand All @@ -100,11 +111,11 @@ def run(self):
self._ast.substitute_loop_node(
loop_node,
ForLoopNode(
declaration=None,
condition=loop_node.condition,
modification=None,
reaching_condition=loop_node.reaching_condition,
)
declaration=None,
condition=loop_node.condition,
modification=None,
reaching_condition=loop_node.reaching_condition,
),
)

def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInstruction, init: AstInstruction):
Expand Down Expand Up @@ -139,9 +150,9 @@ def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInst
)
continuation.node.instructions.remove(continuation.instruction)
self._ast.clean_up()

def _invalid_simple_for_loop_condition_type(self, logic_condition) -> bool:
""" Checks if a logic condition is only a symbol, if true checks condition type of symbol for forbidden ones"""
"""Checks if a logic condition is only a symbol, if true checks condition type of symbol for forbidden ones"""
if not logic_condition.is_symbol or not self._forbidden_condition_types:
return False

Expand Down
11 changes: 11 additions & 0 deletions decompiler/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
self._failed = False
self._failure_origin = None
self._complex_types = complex_types if complex_types else ComplexTypeMap()
self._code = None

@property
def name(self) -> str:
Expand Down Expand Up @@ -99,3 +100,13 @@ def failure_message(self) -> str:
def complex_types(self) -> ComplexTypeMap:
"""Return complex types present in the function (structs, unions, enums, etc.)."""
return self._complex_types

@property
def code(self) -> str:
"""Return C-Code representation for the Task."""
return self._code

@code.setter
def code(self, value):
"""Setter function for C-Code representation of the Task"""
self._code = value
17 changes: 14 additions & 3 deletions decompiler/util/bugfinder/bugfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,18 @@
# Add project root to path (script located in dewolf/decompiler/util/bugfinder/)
project_root = Path(__file__).resolve().parents[3]
sys.path.append(str(project_root))
from binaryninja import BinaryViewType, Function, core_version
from binaryninja import Function, core_version

# use binaryninja.load for BN 3.5 up
version_numbers = core_version().split(".")
major, minor = int(version_numbers[0]), int(version_numbers[1])
if major >= 3 and minor >= 5:
from binaryninja import load
else:
from binaryninja import BinaryViewType

load = BinaryViewType.get_view_of_file

from decompile import Decompiler
from decompiler.frontend import BinaryninjaFrontend
from decompiler.logger import configure_logging
Expand Down Expand Up @@ -124,7 +135,7 @@ def get_function_info(function: Function) -> dict:
"function_size": function.highest_address - function.start,
"function_arch": str(function.arch),
"function_platform": str(function.platform),
"timestamp": datetime.now()
"timestamp": datetime.now(),
}

@staticmethod
Expand Down Expand Up @@ -198,7 +209,7 @@ def iter_function_reports(self, sample) -> Iterator[dict]:
def store_reports_from_sample(sample: Path, db_reports: DBConnector, max_size: int):
"""Store all reports from sample into database"""
logging.info(f"processing {sample}")
if not (binary_view := BinaryViewType.get_view_of_file(sample)):
if not (binary_view := load(sample)):
logging.warning(f"Could not get BinaryView '{sample}'")
return
try:
Expand Down
10 changes: 10 additions & 0 deletions decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,16 @@
"is_hidden_from_cli": false,
"argument_name": "--return-complexity-threshold"
},
{
"dest": "readability-based-refinement.restructure_for_loops",
"default": true,
"type": "boolean",
"title": "Enable for-loop recovery",
"description": "If enabled, certain while-loops will be transformed to for-loops. If set to false, no for-loops will be emitted at all.",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--restructure-for-loops"
},
{
"dest": "readability-based-refinement.keep_empty_for_loops",
"default": false,
Expand Down
19 changes: 19 additions & 0 deletions decompiler/util/integer_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def normalize_int(v: int, size: int, signed: bool) -> int:
"""
Normalizes an integer value to a specific size and signedness.
This function takes an integer value 'v' and normalizes it to fit within
the specified 'size' in bits by discarding overflowing bits. If 'signed' is
true, the value is treated as a signed integer, i.e. interpreted as a two's complement.
Therefore the return value will be negative iff 'signed' is true and the most-significant bit is set.
:param v: The value to be normalized.
:param size: The desired bit size for the normalized integer.
:param signed: True if the integer should be treated as signed.
:return: The normalized integer value.
"""
value = v & ((1 << size) - 1)
if signed and value & (1 << (size - 1)):
return value - (1 << size)
else:
return value
Loading

0 comments on commit 1e87a0b

Please sign in to comment.