Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into type-propagation-dead-code
Browse files Browse the repository at this point in the history
ebehner authored Jun 26, 2024
2 parents b5e0d31 + 749e702 commit 563b71b
Showing 36 changed files with 1,381 additions and 368 deletions.
3 changes: 3 additions & 0 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
@@ -199,6 +199,9 @@ def visit_constant_composition(self, expr: expressions.ConstantComposition):
case CustomType(text="wchar16") | CustomType(text="wchar32"):
val = "".join([x.value for x in expr.value])
return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(size=8, signed=False):
val = "".join([f"\\x{x.value:02X}" for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(8):
val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
2 changes: 1 addition & 1 deletion decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
@@ -63,7 +63,7 @@ def _generate_definitions(global_variables: set[GlobalVariable]) -> Iterator[str
match variable.type:
case ArrayType():
br, bl = "", ""
if not variable.type.type in [Integer.char(), CustomType.wchar16(), CustomType.wchar32()]:
if not variable.type.type in [Integer.char(), Integer.uint8_t(), CustomType.wchar16(), CustomType.wchar32()]:
br, bl = "{", "}"
yield f"{base}{variable.type.type} {variable.name}[{hex(variable.type.elements)}] = {br}{CExpressionGenerator().visit(variable.initial_value)}{bl};"
case _:
3 changes: 0 additions & 3 deletions decompiler/frontend/binaryninja/handlers/constants.py
Original file line number Diff line number Diff line change
@@ -72,9 +72,6 @@ def lift_constant_pointer(self, pointer: mediumlevelil.MediumLevelILConstPtr, **
if isinstance(res, Constant): # BNinja Error case handling
return res

if isinstance(res.type, Pointer) and res.type.type == CustomType.void():
return res

if isinstance(pointer, mediumlevelil.MediumLevelILImport): # Temp fix for '&'
return res

4 changes: 3 additions & 1 deletion decompiler/frontend/binaryninja/handlers/globals.py
Original file line number Diff line number Diff line change
@@ -247,7 +247,9 @@ def _get_unknown_value(self, variable: DataVariable):
type = PseudoArrayType(self._lifter.lift(data[1]), len(data[0]))
data = ConstantComposition([Constant(x, type.type) for x in data[0]], type)
else:
data, type = get_raw_bytes(variable.address, self._view), Pointer(CustomType.void(), self._view.address_size * BYTE_SIZE)
rbytes = get_raw_bytes(variable.address, self._view)
type = PseudoArrayType(Integer.uint8_t(), len(rbytes))
data = ConstantComposition([Constant(b, type.type) for b in rbytes], type)
return data, type

def _get_unknown_pointer_value(self, variable: DataVariable, callers: list[int] = None):
6 changes: 5 additions & 1 deletion decompiler/frontend/binaryninja/handlers/symbols.py
Original file line number Diff line number Diff line change
@@ -26,8 +26,12 @@ def __init__(self, lifter: ObserverLifter):
SymbolType.ImportedDataSymbol: Symbol,
SymbolType.ExternalSymbol: ImportedFunctionSymbol,
SymbolType.LibraryFunctionSymbol: Symbol,
SymbolType.SymbolicFunctionSymbol: FunctionSymbol,
}
# SymbolicFunctionSymbol is not available for Binary Ninja < 4
try:
self.SYMBOL_MAP[SymbolType.SymbolicFunctionSymbol] = FunctionSymbol
except AttributeError:
pass

def register(self):
"""Register the handler at the parent lifter."""
30 changes: 22 additions & 8 deletions decompiler/pipeline/commons/expressionpropagationcommons.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
Operation,
OperationType,
Phi,
Relation,
Return,
UnaryOperation,
UnknownExpression,
@@ -224,8 +225,9 @@ def _is_address_into_dereference(self, definition: Assignment, target: Instructi
if self._is_address(definition.value):
for subexpr in target:
for sub in self._find_subexpressions(subexpr):
if self._is_dereference(sub) and sub.operand == definition.destination:
if self._is_dereference(sub) and sub.operand in definition.definitions:
return True
return False

def _contains_aliased_variables(self, definition: Assignment) -> bool:
"""
@@ -265,15 +267,17 @@ def _definition_value_could_be_modified_via_memory_access_between_definition_and
) -> bool:
"""
Tests for definition containing aliased if a modification of the aliased value is possible, i.e.
via its pointer (ptr = &aliased) or via use of its reference (aka address) in function calls.
via its pointer (ptr = &aliased) or via use of its reference (aka address) in function calls
or if a relation is in between.
:return: true if a modification of the aliased value is possible (hence, the propagation should be avoided) false otherwise
"""
for aliased_variable in set(self._iter_aliased_variables(definition)):
dangerous_address_uses = self._get_dangerous_uses_of_variable_address(aliased_variable)
dangerous_pointer_uses = self._get_dangerous_uses_of_pointer_to_variable(aliased_variable)
if dangerous_address_uses or dangerous_pointer_uses:
dangerous_uses = dangerous_pointer_uses.union(dangerous_address_uses)
dangerous_alias_uses = self._get_dangerous_relations_between_definition_and_target(aliased_variable)
dangerous_uses = dangerous_pointer_uses | dangerous_address_uses | dangerous_alias_uses
if dangerous_uses:
if self._has_any_of_dangerous_uses_between_definition_and_target(definition, target, dangerous_uses):
return True
return False
@@ -323,14 +327,13 @@ def _has_any_of_dangerous_uses_between_definition_and_target(
def _get_dangerous_uses_of_variable_address(self, var: Variable) -> Set[Instruction]:
"""
Dangerous use of & of x is func(&x) cause it can potentially modify x.
*(&x) could also do the job but I consider it to be too exotic so that we could get such instruction from Binary Ninja
If it happens we can handle it later.
Another case is an Assignment where the left side is *(&).
:param var: aliased variable
:return: set of function call assignments that take &var as parameter
"""
dangerous_uses = set()
for use in self._use_map.get(var):
if not self._is_call_assignment(use):
if not self._is_call_assignment(use) and not (isinstance(use, Assignment) and self._is_dereference(use.destination)):
continue
for subexpr in self._find_subexpressions(use):
if self._is_address(subexpr):
@@ -353,6 +356,17 @@ def _get_dangerous_uses_of_pointer_to_variable(self, var: Variable) -> Set[Instr
dangerous_uses.update(self._get_dangerous_uses_of_pointer(pointer))
return dangerous_uses

def _get_dangerous_relations_between_definition_and_target(self, alias_variable: Variable) -> Set[Relation]:
"""Return all relations of the alias variable."""
relations = set()
# Collect all relations for alias_variable ignoring SSA
for basic_block in self._cfg:
for instruction in basic_block:
if isinstance(instruction, Relation) and instruction.destination.name == alias_variable.name:
relations |= {instruction}

return relations

def _get_dangerous_uses_of_pointer(self, pointer: Variable) -> Set[Instruction]:
"""
:param pointer to a variable
@@ -438,7 +452,7 @@ def _is_aliased_variable(expression: Expression) -> bool:
def _contains_writeable_global_variable(expression: Assignment) -> bool:
"""
:param expression: Assignment expression to be tested
:return: true if any requirement of expression is a GlobalVariable
:return: true if any requirement of expression is a writeable GlobalVariable
"""
for expr in expression.destination.requirements:
if isinstance(expr, GlobalVariable) and not expr.is_constant:
Original file line number Diff line number Diff line change
@@ -65,7 +65,12 @@ def constant_fold(operation: OperationType, constants: list[Constant], result_ty
)


def _constant_fold_arithmetic_binary(constants: list[Constant], fun: Callable[[int, int], int], norm_sign: Optional[bool] = None) -> int:
def _constant_fold_arithmetic_binary(
constants: list[Constant],
fun: Callable[[int, int], int],
norm_sign: Optional[bool] = None,
allow_mismatched_sizes: bool = False,
) -> int:
"""
Fold an arithmetic binary operation with constants as operands.
@@ -84,7 +89,7 @@ def _constant_fold_arithmetic_binary(constants: list[Constant], fun: Callable[[i

if len(constants) != 2:
raise IncompatibleOperandCount(f"Expected exactly 2 constants to fold, got {len(constants)}.")
if not all(constant.type.size == constants[0].type.size for constant in constants):
if not allow_mismatched_sizes and not all(constant.type.size == constants[0].type.size for constant in constants):
raise UnsupportedMismatchedSizes(f"Can not fold constants with different sizes: {[constant.type for constant in constants]}")

left, right = constants
@@ -137,13 +142,19 @@ def _constant_fold_shift(constants: list[Constant], fun: Callable[[int, int], in
return fun(normalize_int(left.value, left.type.size, norm_signed), right.value)


def remainder(n, d):
return (-1 if n < 0 else 1) * (n % d)


_OPERATION_TO_FOLD_FUNCTION: dict[OperationType, Callable[[list[Constant]], int]] = {
OperationType.minus: partial(_constant_fold_arithmetic_binary, fun=operator.sub),
OperationType.plus: partial(_constant_fold_arithmetic_binary, fun=operator.add),
OperationType.multiply: partial(_constant_fold_arithmetic_binary, fun=operator.mul, norm_sign=True),
OperationType.multiply_us: partial(_constant_fold_arithmetic_binary, fun=operator.mul, norm_sign=False),
OperationType.divide: partial(_constant_fold_arithmetic_binary, fun=operator.floordiv, norm_sign=True),
OperationType.divide_us: partial(_constant_fold_arithmetic_binary, fun=operator.floordiv, norm_sign=False),
OperationType.modulo: partial(_constant_fold_arithmetic_binary, fun=remainder, norm_sign=True, allow_mismatched_sizes=True),
OperationType.modulo_us: partial(_constant_fold_arithmetic_binary, fun=operator.mod, norm_sign=False, allow_mismatched_sizes=True),
OperationType.negate: partial(_constant_fold_arithmetic_unary, fun=operator.neg),
OperationType.left_shift: partial(_constant_fold_shift, fun=operator.lshift, signed=True),
OperationType.right_shift: partial(_constant_fold_shift, fun=operator.rshift, signed=True),
Original file line number Diff line number Diff line change
@@ -94,7 +94,10 @@ def _construct_refined_ast(self, seq_node_root: SeqNode) -> AbstractSyntaxTreeNo
ConditionBasedRefinement.refine(self.asforest)
acyclic_processor.preprocess_condition_aware_refinement()
if self.options.reconstruct_switch:
ConditionAwareRefinement.refine(self.asforest, self.options)
updated_switch_nodes = ConditionAwareRefinement.refine(self.asforest, self.options)
for switch_node in updated_switch_nodes:
for sequence_case in (c for c in switch_node.cases if isinstance(c.child, SeqNode)):
ConditionBasedRefinement.refine(self.asforest, sequence_case.child)
acyclic_processor.postprocess_condition_refinement()
root = self.asforest.current_root
self.asforest.remove_current_root()
Original file line number Diff line number Diff line change
@@ -232,7 +232,7 @@ def _group_by_reaching_conditions(self, nodes: Tuple[AbstractSyntaxTreeNode]) ->
:param nodes: The AST nodes that we want to group.
:return: A dictionary that assigns to a reaching condition the list of AST code nodes with this reaching condition,
if it are at least two with the same.
if there are at least two with the same.
"""
initial_groups: Dict[LogicCondition, List[AbstractSyntaxTreeNode]] = dict()
for node in nodes:
Original file line number Diff line number Diff line change
@@ -2,6 +2,8 @@
Module for Condition Aware Refinement
"""

from typing import Set

from decompiler.pipeline.controlflowanalysis.restructuring_commons.condition_aware_refinement_commons.base_class_car import (
BaseClassConditionAwareRefinement,
)
@@ -21,6 +23,7 @@
SwitchExtractor,
)
from decompiler.pipeline.controlflowanalysis.restructuring_options import RestructuringOptions
from decompiler.structures.ast.ast_nodes import SwitchNode
from decompiler.structures.ast.syntaxforest import AbstractSyntaxForest


@@ -35,13 +38,14 @@ class ConditionAwareRefinement(BaseClassConditionAwareRefinement):
]

@classmethod
def refine(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions):
def refine(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions) -> Set[SwitchNode]:
condition_aware_refinement = cls(asforest, options)
for stage in condition_aware_refinement.REFINEMENT_PIPELINE:
asforest.clean_up(asforest.current_root)
stage(asforest, options)
condition_aware_refinement.updated_switch_nodes.update(stage(asforest, options))
condition_aware_refinement._remove_redundant_reaching_condition_from_switch_nodes()
asforest.clean_up(asforest.current_root)
return set(switch for switch in condition_aware_refinement.updated_switch_nodes if switch in asforest)

def _remove_redundant_reaching_condition_from_switch_nodes(self):
"""Remove the reaching condition from all switch nodes if it is redundant."""
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dataclasses import dataclass
from typing import Iterator, Optional, Tuple
from typing import Iterator, Optional, Set, Tuple

from decompiler.pipeline.controlflowanalysis.restructuring_options import LoopBreakOptions, RestructuringOptions
from decompiler.structures.ast.ast_nodes import AbstractSyntaxTreeNode, CaseNode, FalseNode, SwitchNode, TrueNode
@@ -63,6 +63,7 @@ def __init__(self, asforest: AbstractSyntaxForest, options: RestructuringOptions
self.asforest: AbstractSyntaxForest = asforest
self.condition_handler: ConditionHandler = asforest.condition_handler
self.options: RestructuringOptions = options
self.updated_switch_nodes: Set[SwitchNode] = set()

def _get_constant_equality_check_expressions_and_conditions(
self, condition: LogicCondition
Original file line number Diff line number Diff line change
@@ -214,13 +214,14 @@ class InitialSwitchNodeConstructor(BaseClassConditionAwareRefinement):
"""Class that constructs switch nodes."""

@classmethod
def construct(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions):
def construct(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions) -> Set[SwitchNode]:
"""Constructs initial switch nodes if possible."""
initial_switch_constructor = cls(asforest, options)
for cond_node in asforest.get_condition_nodes_post_order(asforest.current_root):
initial_switch_constructor._extract_case_nodes_from_nested_condition(cond_node)
for seq_node in asforest.get_sequence_nodes_post_order(asforest.current_root):
initial_switch_constructor._try_to_construct_initial_switch_node_for(seq_node)
return initial_switch_constructor.updated_switch_nodes

def _extract_case_nodes_from_nested_condition(self, cond_node: ConditionNode) -> None:
"""
@@ -336,6 +337,7 @@ def _try_to_construct_initial_switch_node_for(self, seq_node: SeqNode) -> None:
sibling_reachability = self.asforest.get_sibling_reachability_of_children_of(seq_node)
switch_cases = list(possible_switch_node.construct_switch_cases())
switch_node = self.asforest.create_switch_node_with(possible_switch_node.expression, switch_cases)
self.updated_switch_nodes.add(switch_node)
case_dependency = CaseDependencyGraph.construct_case_dependency_for(self.asforest.children(switch_node), sibling_reachability)
self._update_reaching_condition_for_case_node_children(switch_node)
self._add_constants_to_cases(switch_node, case_dependency)
@@ -393,7 +395,7 @@ def _update_reaching_condition_for_case_node_children(self, switch_node: SwitchN
case_node.reaching_condition.is_disjunction_of_literals
), f"The condition of a case node should be a disjunction, but it is {case_node.reaching_condition}!"

if isinstance(cond_node := case_node.child, ConditionNode) and cond_node.false_branch is None:
if (cond_node := case_node.child).is_single_branch:
self._update_condition_for(cond_node, case_node)

case_node.child.reaching_condition = case_node.child.reaching_condition.substitute_by_true(case_node.reaching_condition)
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@ def _insert_case_node(self, new_case_node: AbstractSyntaxTreeNode, case_constant
if default_case := switch_node.default:
new_children.append(default_case)
switch_node._sorted_cases = tuple(new_children)
self.updated_switch_nodes.add(switch_node)

def _new_case_nodes_for(
self, new_case_node: AbstractSyntaxTreeNode, switch_node: SwitchNode, sorted_case_constants: List[Constant]
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ class MissingCaseFinderCondition(MissingCaseFinder):
"""

@classmethod
def find(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions):
def find(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions) -> Set[SwitchNode]:
"""Try to find missing cases that are branches of condition nodes."""
missing_case_finder = cls(asforest, options)
for condition_node in asforest.get_condition_nodes_post_order(asforest.current_root):
@@ -37,9 +37,10 @@ def find(cls, asforest: AbstractSyntaxForest, options: RestructuringOptions):
case_candidate_information.case_node, case_candidate_information.case_constants, case_candidate_information.switch_node
)
if case_candidate_information.in_sequence:
asforest.extract_switch_from_condition_sequence(case_candidate_information.switch_node, condition_node)
asforest.extract_switch_from_sequence(case_candidate_information.switch_node)
else:
asforest.replace_condition_node_by_single_branch(condition_node)
return missing_case_finder.updated_switch_nodes

def _can_insert_missing_case_node(self, condition_node: ConditionNode) -> Optional[CaseCandidateInformation]:
"""
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ def insert(self, possible_case: CaseNodeCandidate):
first fallthrough-cases.
- If the possible-case node is reached by the switch-node, then the content must be after any other code.
Thus, it must contain all constants from a block of fallthrough-cases. But here, it can contain more.
- If neither one reaches the other, then it can be insert anywhere, at long as it can be archived by only
- If neither one reaches the other, then it can be inserted anywhere, as long as it can be archived by only
resorting fallthrough-cases all leading to the same code-execution.
"""
cases_of_switch_node = {case.constant for case in self._switch_node.children}
@@ -70,6 +70,7 @@ def insert(self, possible_case: CaseNodeCandidate):
return

self._sibling_reachability_graph.update_when_inserting_new_case_node(compare_node, self._switch_node)
self.updated_switch_nodes.add(self._switch_node)
compare_node.clean()

def _add_case_before(self, intersecting_linear_case: Tuple[CaseNode], possible_case_properties: IntersectingCaseNodeProperties) -> bool:
Loading

0 comments on commit 563b71b

Please sign in to comment.