Skip to content

Commit

Permalink
Merge branch 'main' into issue-263-_Expression_propagation_Call_incor…
Browse files Browse the repository at this point in the history
…rect_propagation_of_return_value
  • Loading branch information
mari-mari authored Oct 24, 2023
2 parents d7cebb0 + 4486fc0 commit 81eedc0
Show file tree
Hide file tree
Showing 26 changed files with 2,146 additions and 1,851 deletions.
24 changes: 2 additions & 22 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
from ctypes import c_byte, c_int, c_long, c_short, c_ubyte, c_uint, c_ulong, c_ushort
from itertools import chain, repeat

from decompiler.structures import pseudo as expressions
Expand All @@ -8,6 +7,7 @@
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface
from decompiler.util.integer_util import normalize_int


class CExpressionGenerator(DataflowObjectVisitorInterface):
Expand Down Expand Up @@ -80,20 +80,6 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.adc: "adc",
}

SIGNED_FORMATS = {
8: lambda x: c_byte(x).value,
16: lambda x: c_short(x).value,
32: lambda x: c_int(x).value,
64: lambda x: c_long(x).value,
}

UNSIGNED_FORMATS = {
8: lambda x: c_ubyte(x).value,
16: lambda x: c_ushort(x).value,
32: lambda x: c_uint(x).value,
64: lambda x: c_ulong(x).value,
}

"""
Precedence used for correctly generating brackets.
Higher precedence is more tightly binding.
Expand Down Expand Up @@ -298,13 +284,7 @@ def _get_integer_literal_value(self, literal: expressions.Constant) -> int:
Return the right integer value for the given type, assuming that the
re-compilation host has the same sizes as the decompilation host.
"""
if literal.type.is_signed:
if handler := self.SIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
elif literal.value < 0:
if handler := self.UNSIGNED_FORMATS.get(literal.type.size, None):
return handler(literal.value)
return literal.value
return normalize_int(literal.value, literal.type.size, literal.type.is_signed)

@staticmethod
def _interpret_integer_literal_type(value: int) -> Integer:
Expand Down
28 changes: 18 additions & 10 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
RegisterPair,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct, Union
from decompiler.structures.pseudo.complextypes import Class, Struct, Union
from decompiler.structures.pseudo.operations import MemberAccess


Expand Down Expand Up @@ -67,9 +67,8 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
"""
# case 1 (struct), avoid set field of named integers:
dest_type = self._lifter.lift(assignment.dest.type)
if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and not (
isinstance(dest_type, Pointer) and isinstance(dest_type.type, Integer)
):
if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and (
isinstance(dest_type, Struct) or isinstance(dest_type, Class)): # otherwise get_member_by_offset not available
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
Expand All @@ -95,18 +94,21 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
case 1: struct member read access e.g. (x = )book.title
lift as (x = ) struct_member(book, title)
case 2: accessing register portion e.g. (x = )eax.ah
lift as (x = ) eax & 0x0000ff00
lift as (x = ) (uint8_t)(eax >> 8)
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
return BinaryOperation(
OperationType.bitwise_and,
[source, Constant(self._get_all_ones_mask_for_type(instruction.size) << instruction.offset)],
vartype=cast_type,
return UnaryOperation(
OperationType.cast,
[BinaryOperation(
OperationType.right_shift_us,
[source, Constant(instruction.offset, Integer.int32_t())]
)],
cast_type
)
return UnaryOperation(OperationType.cast, [source], vartype=cast_type, contraction=True)

Expand Down Expand Up @@ -213,8 +215,14 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct,
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
member = vartype.type.get_member_by_offset(instruction.offset)
if member is not None:
name = member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
struct_member_access = MemberAccess(
member_name=vartype.type.members.get(instruction.offset),
member_name=name,
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
Expand Down
53 changes: 37 additions & 16 deletions decompiler/frontend/binaryninja/handlers/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
)
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import CustomType, Float, FunctionTypeDef, Integer, Pointer, UnknownType, Variable
from decompiler.structures.pseudo.complextypes import ComplexTypeMember, ComplexTypeName, Enum, Struct
from decompiler.structures.pseudo.complextypes import Class, ComplexTypeMember, ComplexTypeName, Enum, Struct
from decompiler.structures.pseudo.complextypes import Union as Union_


Expand Down Expand Up @@ -75,39 +75,60 @@ def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwarg

def lift_enum(self, binja_enum: EnumerationType, name: str = None, **kwargs) -> Enum:
"""Lift enum type."""
enum_name = name if name else self._get_data_type_name(binja_enum, keyword="enum")
type_id = hash(binja_enum)
enum_name = self._get_data_type_name(binja_enum, keyword="enum", provided_name=name)
enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, {})
for member in binja_enum.members:
enum.add_member(self._lifter.lift(member))
self._lifter.complex_types.add(enum)
self._lifter.complex_types.add(enum, type_id)
return enum

def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexTypeMember:
"""Lift enum member type."""
return ComplexTypeMember(size=0, name=enum_member.name, offset=-1, type=Integer(32), value=int(enum_member.value))

def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, ComplexTypeName]:
def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, Union_, Class, ComplexTypeName]:
type_id = hash(struct)
cached_type = self._lifter.complex_types.retrieve_by_id(type_id)
if cached_type is not None:
return cached_type

"""Lift struct or union type."""
if struct.type == StructureVariant.StructStructureType:
type_name = name if name else self._get_data_type_name(struct, keyword="struct")
lifted_struct = Struct(struct.width * self.BYTE_SIZE, type_name, {})
keyword, type, members = "struct", Struct, {}
elif struct.type == StructureVariant.UnionStructureType:
type_name = name if name else self._get_data_type_name(struct, keyword="union")
lifted_struct = Union_(struct.width * self.BYTE_SIZE, type_name, [])
keyword, type, members = "union", Union_, []
elif struct.type == StructureVariant.ClassStructureType:
keyword, type, members = "class", Class, {}
else:
raise RuntimeError(f"Unknown struct type {struct.type.name}")

type_name = self._get_data_type_name(struct, keyword=keyword, provided_name=name)
lifted_struct = type(struct.width * self.BYTE_SIZE, type_name, members)

self._lifter.complex_types.add(lifted_struct, type_id)
for member in struct.members:
lifted_struct.add_member(self.lift_struct_member(member, type_name))
self._lifter.complex_types.add(lifted_struct)
return lifted_struct

@abstractmethod
def _get_data_type_name(self, complex_type: Union[StructureType, EnumerationType], keyword: str) -> str:
"""Parse out the name of complex type."""
string = complex_type.get_string()
if keyword in string:
return complex_type.get_string().split(keyword)[1]
return string
def _get_data_type_name(self, complex_type: Union[StructureType, EnumerationType], keyword: str, provided_name:str) -> str:
"""Parse out the name of complex type. Empty and duplicate names are changed.
Calling this function has the side effect of incrementing a counter in the UniqueNameProvider."""
if provided_name:
name = provided_name
else:
type_string = complex_type.get_string()
if keyword in type_string:
name = complex_type.get_string().split(keyword)[1]
else:
name = type_string

if name.strip() == "":
name = f"__anonymous_{keyword}"
name = self._lifter.unique_name_provider.get_unique_name(name)

return name

def lift_struct_member(self, member: StructureMember, parent_struct_name: str = None) -> ComplexTypeMember:
"""Lift struct or union member."""
Expand All @@ -117,7 +138,7 @@ def lift_struct_member(self, member: StructureMember, parent_struct_name: str =
else:
# if member is an embedded struct/union, the name is already available
member_type = self._lifter.lift(member.type, name=member.name)
return ComplexTypeMember(0, name=member.name, offset=member.offset, type=member_type)
return ComplexTypeMember(member_type.size, name=member.name, offset=member.offset, type=member_type)

@abstractmethod
def _get_member_pointer_on_the_parent_struct(self, member: StructureMember, parent_struct_name: str) -> ComplexTypeMember:
Expand Down
7 changes: 6 additions & 1 deletion decompiler/frontend/binaryninja/handlers/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,12 @@ def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct,
struct_variable = self._lifter.lift(instruction.src)
struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type)
struct_member = struct_ptr.type.get_member_by_offset(instruction.offset)
return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=struct_member.offset, member_name=struct_member.name)
if struct_member is not None:
name = struct_member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=name)

def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation:
"""Lift a MLIL_FTRUNC operation."""
Expand Down
3 changes: 2 additions & 1 deletion decompiler/frontend/binaryninja/lifter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from decompiler.frontend.lifter import ObserverLifter
from decompiler.structures.pseudo import DataflowObject, Tag, UnknownExpression, UnknownType

from ...structures.pseudo.complextypes import ComplexTypeMap
from ...structures.pseudo.complextypes import ComplexTypeMap, UniqueNameProvider
from .handlers import HANDLERS


Expand All @@ -17,6 +17,7 @@ def __init__(self, no_bit_masks: bool = True, bv: BinaryView = None):
self.no_bit_masks = no_bit_masks
self.bv: BinaryView = bv
self.complex_types: ComplexTypeMap = ComplexTypeMap()
self.unique_name_provider: UniqueNameProvider = UniqueNameProvider()
for handler in HANDLERS:
handler(self).register()

Expand Down
8 changes: 8 additions & 0 deletions decompiler/frontend/binaryninja/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MediumLevelILBasicBlock,
MediumLevelILConstPtr,
MediumLevelILInstruction,
MediumLevelILJump,
MediumLevelILJumpTo,
MediumLevelILTailcallSsa,
RegisterValueType,
Expand All @@ -18,6 +19,7 @@
from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, FalseCase, IndirectEdge, SwitchCase, TrueCase, UnconditionalEdge
from decompiler.structures.pseudo import Constant, Instruction
from decompiler.structures.pseudo.complextypes import ComplexTypeMap
from decompiler.structures.pseudo.instructions import Comment


class BinaryninjaParser(Parser):
Expand Down Expand Up @@ -135,6 +137,10 @@ def _get_lookup_table(self, block: MediumLevelILBasicBlock) -> Dict[int, List[Co
lookup[target] += [Constant(value)]
return lookup

def _has_undetermined_jump(self, basic_block: MediumLevelILBasicBlock) -> bool:
"""Return True if basic-block is ending in a jump and has no outgoing edges"""
return bool(len(basic_block) and isinstance(basic_block[-1], MediumLevelILJump) and not basic_block.outgoing_edges)

def _lift_instructions(self, basic_block: MediumLevelILBasicBlock) -> Iterator[Instruction]:
"""Yield the lifted versions of all instructions in the given basic block."""
for instruction in basic_block:
Expand All @@ -144,6 +150,8 @@ def _lift_instructions(self, basic_block: MediumLevelILBasicBlock) -> Iterator[I
self._unlifted_instructions.append(instruction)
continue
yield lifted_instruction
if self._has_undetermined_jump(basic_block):
yield Comment("jump -> undetermined")

def _report_lifter_errors(self):
"""Report instructions which could not be lifted and reset their counter."""
Expand Down
1 change: 1 addition & 0 deletions decompiler/pipeline/controlflowanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .expression_simplification.stages import ExpressionSimplificationAst, ExpressionSimplificationCfg
from .instruction_length_handler import InstructionLengthHandler
from .loop_name_generator import LoopNameGenerator
from .readability_based_refinement import ReadabilityBasedRefinement
from .variable_name_generation import VariableNameGeneration
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Callable, Optional

from decompiler.structures.pseudo import Constant, Integer, OperationType
from decompiler.util.integer_util import normalize_int


def constant_fold(operation: OperationType, constants: list[Constant]) -> Constant:
Expand Down Expand Up @@ -103,27 +104,6 @@ def _constant_fold_shift(constants: list[Constant], fun: Callable[[int, int], in
)


def normalize_int(v: int, size: int, signed: bool) -> int:
"""
Normalizes an integer value to a specific size and signedness.
This function takes an integer value 'v' and normalizes it to fit within
the specified 'size' in bits by discarding overflowing bits. If 'signed' is
true, the value is treated as a signed integer, i.e. interpreted as a two's complement.
Therefore the return value will be negative iff 'signed' is true and the most-significant bit is set.
:param v: The value to be normalized.
:param size: The desired bit size for the normalized integer.
:param signed: True if the integer should be treated as signed.
:return: The normalized integer value.
"""
value = v & ((1 << size) - 1)
if signed and value & (1 << (size - 1)):
return value - (1 << size)
else:
return value


_OPERATION_TO_FOLD_FUNCTION: dict[OperationType, Callable[[list[Constant]], Constant]] = {
OperationType.minus: partial(_constant_fold_arithmetic_binary, fun=operator.sub),
OperationType.plus: partial(_constant_fold_arithmetic_binary, fun=operator.add),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from functools import reduce
from typing import Iterator

from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import constant_fold
from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import FOLDABLE_OPERATIONS, constant_fold
from decompiler.pipeline.controlflowanalysis.expression_simplification.rules.rule import SimplificationRule
from decompiler.structures.pseudo import Constant, Expression, Operation, OperationType, Type
from decompiler.structures.pseudo.operations import COMMUTATIVE_OPERATIONS

_COLLAPSIBLE_OPERATIONS = COMMUTATIVE_OPERATIONS & FOLDABLE_OPERATIONS

class CollapseNestedConstants(SimplificationRule):
"""
Expand All @@ -14,7 +15,7 @@ class CollapseNestedConstants(SimplificationRule):
This stage exploits associativity and is the only stage doing so. Therefore, it cannot be replaced by a combination of `TermOrder` and `CollapseConstants`.
"""
def apply(self, operation: Operation) -> list[tuple[Expression, Expression]]:
if operation.operation not in COMMUTATIVE_OPERATIONS:
if operation.operation not in _COLLAPSIBLE_OPERATIONS:
return []
if not isinstance(operation, Operation):
raise TypeError(f"Expected Operation, got {type(operation)}")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from decompiler.pipeline.controlflowanalysis.expression_simplification.constant_folding import normalize_int
from decompiler.pipeline.controlflowanalysis.expression_simplification.rules.rule import SimplificationRule
from decompiler.structures.pseudo import BinaryOperation, Constant, Expression, Integer, Operation, OperationType
from decompiler.util.integer_util import normalize_int


class PositiveConstants(SimplificationRule):
Expand Down
Loading

0 comments on commit 81eedc0

Please sign in to comment.