Skip to content

Commit

Permalink
Merge branch 'main' into issue-364-_Lifter_Fix_escaping/purging_of_nu…
Browse files Browse the repository at this point in the history
…llbytes
  • Loading branch information
0x6e62 authored Dec 7, 2023
2 parents ae0078e + 882b78c commit ca117b0
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 369 deletions.
11 changes: 5 additions & 6 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ def visit_list_operation(self, op: operations.ListOperation) -> str:

def visit_unary_operation(self, op: operations.UnaryOperation) -> str:
"""Return a string representation of the given unary operation (e.g. !a or &a)."""
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
if isinstance(op, MemberAccess):
operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation]
return f"{self.visit(op.struct_variable)}{operator_str}{op.member_name}"
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
return f"{operand}{operator_str}{op.member_name}"
if op.operation == OperationType.cast and op.contraction:
return f"({int(op.type.size / 8)}: ){operand}"
if op.operation == OperationType.cast:
Expand Down Expand Up @@ -209,10 +209,9 @@ def visit_call(self, op: operations.Call) -> str:
Generic labels starting with 'arg' e.g. 'arg1', 'arg2' are being filtered.
Additionally we filter ellipsis argument '...' that is lifted from type string.
"""
func_name = self.visit(op.function)
if isinstance(op.function, expressions.Constant):
func_name = func_name.strip('"')
output = f"{func_name}("
func_expr_str = self._visit_bracketed(op.function) if self._has_lower_precedence(op.function, op) else self.visit(op.function)

output = f"{func_expr_str}("
if op.meta_data is not None:
parameter_names = op.meta_data.get("param_names", [])
is_tailcall = op.meta_data.get("is_tailcall")
Expand Down
20 changes: 7 additions & 13 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
member_name=struct_variable.type.get_member_name_by_offset(assignment.offset),
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
writes_memory=assignment.dest.version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
Expand All @@ -99,7 +99,7 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
if isinstance(source.type, Struct) or isinstance(source.type, Class) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
Expand All @@ -112,11 +112,11 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
if isinstance(source.type, Struct) or isinstance(source.type, Class):
member_name = source.type.get_member_name_by_offset(instruction.offset)
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
member_name = source.type.get_member_name_by_type(parent_type)
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
Expand Down Expand Up @@ -213,14 +213,8 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct,
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
member = vartype.type.get_member_by_offset(instruction.offset)
if member is not None:
name = member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
struct_member_access = MemberAccess(
member_name=name,
member_name=vartype.type.get_member_name_by_offset(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
Expand Down
11 changes: 4 additions & 7 deletions decompiler/frontend/binaryninja/handlers/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Pointer,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct
from decompiler.structures.pseudo.complextypes import Class, Struct
from decompiler.structures.pseudo.operations import MemberAccess


Expand Down Expand Up @@ -98,12 +98,9 @@ def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct,
"""Lift a MLIL_LOAD_STRUCT_SSA (struct member access e.g. var#n->x) instruction."""
struct_variable = self._lifter.lift(instruction.src)
struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type)
struct_member = struct_ptr.type.get_member_by_offset(instruction.offset)
if struct_member is not None:
name = struct_member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
name = f"field_{hex(instruction.offset)}".replace("-", "minus_")
if isinstance(struct_ptr.type, Class) or isinstance(struct_ptr.type, Struct):
name = struct_ptr.type.get_member_name_by_offset(instruction.offset)
return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=name)

def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation:
Expand Down
28 changes: 0 additions & 28 deletions decompiler/pipeline/commons/expressionpropagationcommons.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ class ExpressionPropagationBase(PipelineStage, ABC):
name = "expression-propagation-base"

def __init__(self):
self._limit: Optional[int] = None
self._limits: Dict[Instruction, int]
self._use_map: UseMap
self._def_map: DefMap
self._pointers_info: Optional[Pointers] = None
Expand All @@ -43,7 +41,6 @@ def __init__(self):

def run(self, task: DecompilerTask):
"""Execute the expression propagation on the current ControlFlowGraph."""
self._parse_options(task)
iteration = 0
# execute until there are no more changes
while self.perform(task.graph, iteration):
Expand Down Expand Up @@ -89,15 +86,6 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ
"""
pass

def _parse_options(self, task: DecompilerTask):
"""Parse the config options for this pipeline stage."""
self._limit = task.options.getint(f"{self.name}.maximum_instruction_complexity")
self._limits = {
Branch: min(self._limit, task.options.getint(f"{self.name}.maximum_branch_complexity")),
Call: min(self._limit, task.options.getint(f"{self.name}.maximum_call_complexity")),
Assignment: min(self._limit, task.options.getint(f"{self.name}.maximum_assignment_complexity")),
}

def _initialize_maps(self, cfg: ControlFlowGraph) -> None:
"""
Fills use and def maps.
Expand Down Expand Up @@ -205,22 +193,6 @@ def _operation_is_propagated_in_phi(self, target: Instruction, definition: Assig
do not allow phi arguments to be unary or binary operations"""
return isinstance(target, Phi) and isinstance(definition.value, Operation)

def _resulting_instruction_is_too_long(self, target: Instruction, definition: Assignment) -> bool:
"""Instruction after expression propagation should not be longer than a given limit
we already test that only vars and constants are propagated in phi,
therefore the length of phi after propagation will be constant;
same with propagating instructions like e.g. a = b or a = 10.
"""
if self._is_phi(target) or self._is_copy_assignment(definition):
return False
limit = self._limits.get(type(target), self._limit)
if self._is_call_assignment(target):
limit = self._limits[Call]
count = len([expr for expr in self._find_subexpressions(target) if expr == definition.destination])
propagated_complexity = target.complexity + (definition.value.complexity - definition.destination.complexity) * count
return propagated_complexity > limit

def _is_address_assignment(self, definition: Assignment) -> bool:
"""
Currently propagating a = &x into uses of a causes problems (see test21 in test_memory). So for the moment is not propagated.
Expand Down
4 changes: 2 additions & 2 deletions decompiler/pipeline/dataflowanalysis/dead_loop_elimination.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from decompiler.pipeline.preprocessing.util import _init_basicblocks_of_definition, _init_maps
from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, GraphEdgeInterface
from decompiler.structures.graphs.interface import GraphInterface
from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph
from decompiler.structures.graphs.interface import GraphEdgeInterface, GraphInterface
from decompiler.structures.maps import DefMap, UseMap
from decompiler.structures.pseudo.delogic_logic import DelogicConverter
from decompiler.structures.pseudo.expressions import Constant, Variable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ
or self._is_address_assignment(definition)
or self._contains_global_variable(definition)
or self._operation_is_propagated_in_phi(target, definition)
or self._resulting_instruction_is_too_long(target, definition)
or self._is_invalid_propagation_into_address_operation(target, definition)
or self._is_dereference_assignment(definition)
)
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ
or self._is_address_assignment(definition)
or self._contains_global_variable(definition)
or self._operation_is_propagated_in_phi(target, definition)
or self._resulting_instruction_is_too_long(target, definition)
or self._is_invalid_propagation_into_address_operation(target, definition)
or self._is_dereference_assignment(definition)
or self._definition_value_could_be_modified_via_memory_access_between_definition_and_target(definition, target)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ
or self._contains_global_variable(definition)
or self._operation_is_propagated_in_phi(target, definition)
or self._is_invalid_propagation_into_address_operation(target, definition)
or self._resulting_instruction_is_too_long(target, definition)
or self._is_aliased_postponed_for_propagation(target, definition)
or self._definition_value_could_be_modified_via_memory_access_between_definition_and_target(definition, target)
or self._pointer_value_used_in_definition_could_be_modified_via_memory_access_between_definition_and_target(definition, target)
Expand Down
15 changes: 12 additions & 3 deletions decompiler/structures/graphs/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,22 @@
from __future__ import annotations

from itertools import chain
from typing import Dict, Set
from typing import Dict, Iterator, List, Optional, Set

from decompiler.structures.pseudo import Assignment, Condition, Instruction, Variable
from decompiler.structures.pseudo import Assignment, Condition, Expression, Instruction, Variable
from networkx import DiGraph

from .basicblock import BasicBlock
from .branches import *
from .branches import (
BasicBlockEdge,
BasicBlockEdgeCondition,
ConditionalEdge,
FalseCase,
IndirectEdge,
SwitchCase,
TrueCase,
UnconditionalEdge,
)
from .classifiedgraph import ClassifiedGraph


Expand Down
20 changes: 20 additions & 0 deletions decompiler/structures/pseudo/complextypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ def add_member(self, member: ComplexTypeMember):
def get_member_by_offset(self, offset: int) -> Optional[ComplexTypeMember]:
return self.members.get(offset)

def get_member_name_by_offset(self, offset: int) -> str:
"""Get the name of a member by its offset and gracefully handle unknown offsets."""
member = self.get_member_by_offset(offset)
if member is not None:
return member.name
else:
logging.warning(f"Cannot get member name for type {self} at offset {offset}")
return f"field_{hex(offset)}".replace("-", "minus_")

def declaration(self) -> str:
members = ";\n\t".join(self.members[k].declaration() for k in sorted(self.members.keys())) + ";"
return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}"
Expand Down Expand Up @@ -103,6 +112,15 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember:
if member.type == _type:
return member

def get_member_name_by_type(self, _type: Type) -> str:
"""Get the name of a member of a union by its type and gracefully handle unknown types."""
member = self.get_member_by_type(_type)
if member is not None:
return member.name
else:
logging.warning(f"Cannot get member name for union {self}")
return "unknown_field"


@dataclass(frozen=True, order=True)
class Enum(ComplexType):
Expand Down Expand Up @@ -136,6 +154,7 @@ class UniqueNameProvider:
"""The purpose of this class is to provide unique names for types, as duplicate names can potentially be encountered in the lifting stage (especially anonymous structs, etc.)
This class keeps track of all the names already used. If duplicates are found, they are renamed by appending suffixes with incrementing numbers.
E.g. `classname`, `classname__2`, `classname__3`, ...
Assumes that incoming names do not end with __{number}.
"""

def __init__(self):
Expand All @@ -145,6 +164,7 @@ def get_unique_name(self, name: str) -> str:
"""This method returns the input name if it was unique so far.
Otherwise it returns the name with an added incrementing suffix.
In any case, the name occurence of the name is counted.
Assumes that incoming names do not end with __{number}.
"""
if name not in self._name_to_count:
self._name_to_count[name] = 1
Expand Down
Loading

0 comments on commit ca117b0

Please sign in to comment.