Skip to content

Commit

Permalink
[Structure type] Improve current structure type support (#253)
Browse files Browse the repository at this point in the history
* Add init support for structure variable members being accessed.

* Add init support for structure variable members being written, identified occasional recursion problem by struct members.

* Fix isort errors

* Add init support for unions plus lifting

* Fix isort errors

* Add structure member direct access (without pointers) + lifting

* Add init enum type support

* Change struct member access to unary operation, try to fix failing unittests

* Add fixed unittests -_-

* Fix failing systemtest

* Clean up structs code generation, use accept/visit of unary operation

* Fix lifting of unions, add complex type width, refactor

* Rename StructMemberAccess to MemberAccess, remove unused methods, add tests for complex types and member access

* Fix forgot git add

* Add member access tests for code generation

* Fix too much semicolons in declarations

* Fix trying to get parent type of non-complex-type-varible

* Start cleanup

* Continue cleanup

* Continue cleanup

* Continue cleanup

* Fix adding ; in case complex type is empty breaks code generation todo test that in code generation

* Remove todos

* Add review changes

* Fix isort errors, add test to check member access in decompiler output

* MR changes

* Add mr review change

---------

Co-authored-by: Mariia Rybalka <[email protected]>
  • Loading branch information
github-actions[bot] and mari-mari authored Sep 7, 2023
1 parent d437bbc commit d4b9357
Show file tree
Hide file tree
Showing 20 changed files with 790 additions and 70 deletions.
7 changes: 6 additions & 1 deletion decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from decompiler.structures.pseudo import Float, FunctionTypeDef, Integer, OperationType, Pointer, StringSymbol, Type
from decompiler.structures.pseudo import instructions as instructions
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface


Expand Down Expand Up @@ -64,6 +65,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
OperationType.greater_or_equal_us: ">=",
OperationType.dereference: "*",
OperationType.address: "&",
OperationType.member_access: ".",
# Handled in code
# OperationType.cast: "cast",
# OperationType.pointer: "point",
Expand Down Expand Up @@ -145,7 +147,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.low: "low",
OperationType.ternary: 30,
OperationType.call: 150,
OperationType.field: 150,
OperationType.member_access: 150,
OperationType.list_op: 10,
# TODO: Figure out what these are / how to handle this
# OperationType.adc: "adc",
Expand Down Expand Up @@ -179,6 +181,9 @@ def visit_list_operation(self, op: operations.ListOperation) -> str:

def visit_unary_operation(self, op: operations.UnaryOperation) -> str:
"""Return a string representation of the given unary operation (e.g. !a or &a)."""
if isinstance(op, MemberAccess):
operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation]
return f"{self.visit(op.struct_variable)}{operator_str}{op.member_name}"
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
if op.operation == OperationType.cast and op.contraction:
return f"({int(op.type.size / 8)}: ){operand}"
Expand Down
1 change: 1 addition & 0 deletions decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def generate(self, tasks: Iterable[DecompilerTask], run_cleanup: bool = True):
for task in tasks:
if run_cleanup and not task.failed:
task.syntax_tree.clean_up()
string_blocks.append(task.complex_types.declarations())
string_blocks.append(self.generate_function(task))
return "\n\n".join(string_blocks)

Expand Down
4 changes: 3 additions & 1 deletion decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
UnaryOperation,
Variable,
)
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
from decompiler.util.serialization.bytes_serializer import convert_bytes
Expand Down Expand Up @@ -53,6 +54,8 @@ def visit_loop_node(self, node: LoopNode):

def visit_unary_operation(self, unary: UnaryOperation):
"""Visit unary operations to remember all variables those memory location was read."""
if isinstance(unary, MemberAccess):
self._variables.add(unary.struct_variable)
if unary.operation == OperationType.address or unary.operation == OperationType.dereference:
if isinstance(unary.operand, Variable):
self._variables.add(unary.operand)
Expand All @@ -68,7 +71,6 @@ def generate(self, param_names: list[str] = []) -> Iterator[str]:
for variable in sorted(self._variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable) and variable.name not in param_names:
variable_type_mapping[variable.type].append(variable)

for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)):
for chunked_variables in self._chunks(variables, self._vars_per_line):
yield CExpressionGenerator.format_variables_declaration(
Expand Down
13 changes: 7 additions & 6 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from __future__ import annotations

import logging
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union

from binaryninja import BinaryView, Function, load
from binaryninja.types import SymbolType
from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.pseudo.complextypes import ComplexTypeMap
from decompiler.structures.pseudo.expressions import Variable
from decompiler.structures.pseudo.typing import Type
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -127,10 +128,10 @@ def create_task(self, function_identifier: Union[str, Function], options: Option
tagging = CompilerIdiomsTagging(self._bv, function.function.start, options)
tagging.run()
try:
cfg = self._extract_cfg(function.function, options)
cfg, complex_types = self._extract_cfg(function.function, options)
task = DecompilerTask(
function.name, cfg, function_return_type=function.return_type, function_parameters=function.params,
options=options
options=options, complex_types=complex_types
)
except Exception as e:
task = DecompilerTask(
Expand All @@ -154,9 +155,9 @@ def get_all_function_names(self):
functions.append(function.name)
return functions

def _extract_cfg(self, function: Function, options: Options) -> ControlFlowGraph:
def _extract_cfg(self, function: Function, options: Options) -> Tuple[ControlFlowGraph, ComplexTypeMap]:
"""Extract a control flow graph utilizing the parser and fixing it afterwards."""
report_threshold = options.getint("lifter.report_threshold", fallback=3)
no_masks = options.getboolean("lifter.no_bit_masks", fallback=True)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks), report_threshold)
return parser.parse(function)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks, bv=function.view), report_threshold)
return parser.parse(function), parser.complex_types
94 changes: 64 additions & 30 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Module implementing the AssignmentHandler for binaryninja."""
import logging
from functools import partial
from typing import Union

import binaryninja
from binaryninja import mediumlevelil
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import (
Assignment,
BinaryOperation,
Constant,
Expression,
GlobalVariable,
Integer,
Operation,
Expand All @@ -16,6 +18,8 @@
RegisterPair,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct, Union
from decompiler.structures.pseudo.operations import MemberAccess


class AssignmentHandler(Handler):
Expand All @@ -38,8 +42,8 @@ def register(self):
mediumlevelil.MediumLevelILVarAliasedField: partial(self.lift_get_field, is_aliased=True),
mediumlevelil.MediumLevelILStore: self.lift_store,
mediumlevelil.MediumLevelILStoreSsa: self.lift_store,
mediumlevelil.MediumLevelILStoreStruct: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStruct: self.lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self.lift_store_struct,
mediumlevelil.MediumLevelILLowPart: self._lift_mask_high,
}
)
Expand All @@ -54,16 +58,31 @@ def lift_assignment(self, assignment: mediumlevelil.MediumLevelILSetVar, is_alia
def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_aliased=False, **kwargs) -> Assignment:
"""
Lift an instruction writing to a subset of the given value.
In case of lower register (offset 0) lift as contraction
e.g. eax.al = .... <=> (char)eax ....
In case higher registers use masking
e.g. eax.ah = x <=> eax = (eax & 0xffff00ff) + (x << 2)
case 1: writing into struct member: book.title = value
lift as struct_member(book, title, writes_memory) = value
case 2: writing into lower register part (offset 0): eax.al = value
lift as contraction (char) eax = value
case 3: writing into higher register part: eax.ah = value
lift using bit masking eax = (eax & 0xffff00ff) + (value << 2)
"""
if assignment.offset == 0 and self._lifter.is_omitting_masks:
# case 1 (struct), avoid set field of named integers:
dest_type = self._lifter.lift(assignment.dest.type)
if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and not (
isinstance(dest_type, Pointer) and isinstance(dest_type.type, Integer)
):
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
elif assignment.offset == 0 and self._lifter.is_omitting_masks:
destination = self._lift_contraction(assignment, is_aliased=is_aliased, parent=assignment)
value = self._lifter.lift(assignment.src)
# case 3 (bit masking):
else:
destination = self._lifter.lift(assignment.dest, is_aliased=is_aliased, parent=assignment)
value = self._lift_masked_operand(assignment)
Expand All @@ -72,9 +91,16 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_aliased=False, **kwargs) -> Operation:
"""
Lift an instruction accessing a field from the outside.
e.g. x = eax.ah <=> x = eax & 0x0000ff00
case 1: struct member read access e.g. (x = )book.title
lift as (x = ) struct_member(book, title)
case 2: accessing register portion e.g. (x = )eax.ah
lift as (x = ) eax & 0x0000ff00
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
return BinaryOperation(
Expand All @@ -84,14 +110,30 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
)
return UnaryOperation(OperationType.cast, [source], vartype=cast_type, contraction=True)

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
return MemberAccess(
offset=instruction.offset,
member_name=member_name,
operands=[source],
)

def lift_store(self, assignment: mediumlevelil.MediumLevelILStoreSsa, **kwargs) -> Assignment:
"""Lift a store operation to pseudo (e.g. [ebp+4] = eax, or [global_var_label] = 25)."""
return Assignment(
self._lift_store_destination(assignment),
self._lifter.lift(assignment.src),
)

def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> Union[UnaryOperation, GlobalVariable]:
def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> UnaryOperation | GlobalVariable:
"""
Lift destination operand of store operation which is used for modelling both assignments of dereferences and global variables.
"""
Expand Down Expand Up @@ -167,24 +209,16 @@ def lift_split_assignment(self, assignment: mediumlevelil.MediumLevelILSetVarSpl
self._lifter.lift(assignment.src, parent=assignment),
)

def _lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
return Assignment(
UnaryOperation(
OperationType.dereference,
[
BinaryOperation(
OperationType.plus,
[
UnaryOperation(OperationType.cast, [self._lifter.lift(instruction.dest)], vartype=Pointer(Integer.char())),
Constant(instruction.offset),
],
vartype=vartype,
),
],
vartype=Pointer(vartype),
writes_memory=instruction.dest_memory
),
self._lifter.lift(instruction.src),
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
struct_member_access = MemberAccess(
member_name=vartype.type.members.get(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
writes_memory=instruction.dest_memory,
)
src = self._lifter.lift(instruction.src)
return Assignment(struct_member_access, src)
Loading

0 comments on commit d4b9357

Please sign in to comment.