Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Structure type] Improve current structure type support #253

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
608bc2e
Add init support for structure variable members being accessed.
mari-mari Jun 15, 2023
f8982db
Add init support for structure variable members being written, identi…
mari-mari Jun 20, 2023
7d54902
Fix isort errors
mari-mari Jun 20, 2023
735714a
Add init support for unions plus lifting
mari-mari Jul 7, 2023
13f4c25
Fix isort errors
mari-mari Jul 7, 2023
52feef3
Add structure member direct access (without pointers) + lifting
mari-mari Jul 17, 2023
0576fad
Add init enum type support
mari-mari Jul 21, 2023
8d04803
Change struct member access to unary operation, try to fix failing un…
mari-mari Jul 24, 2023
1d09730
Add fixed unittests -_-
mari-mari Jul 24, 2023
5dde07d
Fix failing systemtest
mari-mari Jul 24, 2023
b8f85e3
Clean up structs code generation, use accept/visit of unary operation
mari-mari Jul 25, 2023
8196d34
Fix lifting of unions, add complex type width, refactor
mari-mari Jul 28, 2023
7f81265
Rename StructMemberAccess to MemberAccess, remove unused methods, add…
mari-mari Aug 8, 2023
9d41ffe
Fix forgot git add
mari-mari Aug 8, 2023
e2a68bd
Add member access tests for code generation
mari-mari Aug 11, 2023
4095e7a
Fix too much semicolons in declarations
mari-mari Aug 11, 2023
e586bee
Fix trying to get parent type of non-complex-type-varible
mari-mari Aug 11, 2023
145d559
Start cleanup
mari-mari Aug 14, 2023
a8b1711
Continue cleanup
mari-mari Aug 14, 2023
49fd05d
Continue cleanup
mari-mari Aug 14, 2023
dc8fd48
Continue cleanup
mari-mari Aug 14, 2023
ad7567a
Fix adding ; in case complex type is empty breaks code generation tod…
mari-mari Aug 14, 2023
c202e8c
Remove todos
mari-mari Aug 15, 2023
607b4d1
Add review changes
mari-mari Aug 28, 2023
201b2e0
Fix isort errors, add test to check member access in decompiler output
mari-mari Aug 28, 2023
822f5f0
MR changes
mari-mari Aug 30, 2023
fb0ad27
Add mr review change
mari-mari Sep 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from decompiler.structures.pseudo import Float, FunctionTypeDef, Integer, OperationType, Pointer, StringSymbol, Type
from decompiler.structures.pseudo import instructions as instructions
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface


Expand Down Expand Up @@ -64,6 +65,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
OperationType.greater_or_equal_us: ">=",
OperationType.dereference: "*",
OperationType.address: "&",
OperationType.member_access: ".",
# Handled in code
# OperationType.cast: "cast",
# OperationType.pointer: "point",
Expand Down Expand Up @@ -145,7 +147,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.low: "low",
OperationType.ternary: 30,
OperationType.call: 150,
OperationType.field: 150,
OperationType.member_access: 150,
OperationType.list_op: 10,
# TODO: Figure out what these are / how to handle this
# OperationType.adc: "adc",
Expand Down Expand Up @@ -179,6 +181,9 @@ def visit_list_operation(self, op: operations.ListOperation) -> str:

def visit_unary_operation(self, op: operations.UnaryOperation) -> str:
"""Return a string representation of the given unary operation (e.g. !a or &a)."""
if isinstance(op, MemberAccess):
operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation]
return f"{self.visit(op.struct_variable)}{operator_str}{op.member_name}"
operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand)
if op.operation == OperationType.cast and op.contraction:
return f"({int(op.type.size / 8)}: ){operand}"
Expand Down
1 change: 1 addition & 0 deletions decompiler/backend/codegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def generate(self, tasks: Iterable[DecompilerTask], run_cleanup: bool = True):
for task in tasks:
if run_cleanup and not task.failed:
task.syntax_tree.clean_up()
string_blocks.append(task.complex_types.declarations())
string_blocks.append(self.generate_function(task))
return "\n\n".join(string_blocks)

Expand Down
4 changes: 3 additions & 1 deletion decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
UnaryOperation,
Variable,
)
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
from decompiler.util.serialization.bytes_serializer import convert_bytes
Expand Down Expand Up @@ -53,6 +54,8 @@ def visit_loop_node(self, node: LoopNode):

def visit_unary_operation(self, unary: UnaryOperation):
"""Visit unary operations to remember all variables those memory location was read."""
if isinstance(unary, MemberAccess):
self._variables.add(unary.struct_variable)
if unary.operation == OperationType.address or unary.operation == OperationType.dereference:
if isinstance(unary.operand, Variable):
self._variables.add(unary.operand)
Expand All @@ -68,7 +71,6 @@ def generate(self, param_names: list[str] = []) -> Iterator[str]:
for variable in sorted(self._variables, key=lambda x: str(x)):
if not isinstance(variable, GlobalVariable) and variable.name not in param_names:
variable_type_mapping[variable.type].append(variable)

for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)):
for chunked_variables in self._chunks(variables, self._vars_per_line):
yield CExpressionGenerator.format_variables_declaration(
Expand Down
13 changes: 7 additions & 6 deletions decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
from __future__ import annotations

import logging
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union

from binaryninja import BinaryView, Function, load
from binaryninja.types import SymbolType
from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.pseudo.complextypes import ComplexTypeMap
from decompiler.structures.pseudo.expressions import Variable
from decompiler.structures.pseudo.typing import Type
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -127,10 +128,10 @@ def create_task(self, function_identifier: Union[str, Function], options: Option
tagging = CompilerIdiomsTagging(self._bv, function.function.start, options)
tagging.run()
try:
cfg = self._extract_cfg(function.function, options)
cfg, complex_types = self._extract_cfg(function.function, options)
task = DecompilerTask(
function.name, cfg, function_return_type=function.return_type, function_parameters=function.params,
options=options
options=options, complex_types=complex_types
)
except Exception as e:
task = DecompilerTask(
Expand All @@ -154,9 +155,9 @@ def get_all_function_names(self):
functions.append(function.name)
return functions

def _extract_cfg(self, function: Function, options: Options) -> ControlFlowGraph:
def _extract_cfg(self, function: Function, options: Options) -> Tuple[ControlFlowGraph, ComplexTypeMap]:
"""Extract a control flow graph utilizing the parser and fixing it afterwards."""
report_threshold = options.getint("lifter.report_threshold", fallback=3)
no_masks = options.getboolean("lifter.no_bit_masks", fallback=True)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks), report_threshold)
return parser.parse(function)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks, bv=function.view), report_threshold)
return parser.parse(function), parser.complex_types
94 changes: 64 additions & 30 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Module implementing the AssignmentHandler for binaryninja."""
import logging
from functools import partial
from typing import Union

import binaryninja
from binaryninja import mediumlevelil
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import (
Assignment,
BinaryOperation,
Constant,
Expression,
GlobalVariable,
Integer,
Operation,
Expand All @@ -16,6 +18,8 @@
RegisterPair,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct, Union
from decompiler.structures.pseudo.operations import MemberAccess


class AssignmentHandler(Handler):
Expand All @@ -38,8 +42,8 @@ def register(self):
mediumlevelil.MediumLevelILVarAliasedField: partial(self.lift_get_field, is_aliased=True),
mediumlevelil.MediumLevelILStore: self.lift_store,
mediumlevelil.MediumLevelILStoreSsa: self.lift_store,
mediumlevelil.MediumLevelILStoreStruct: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self._lift_store_struct,
mediumlevelil.MediumLevelILStoreStruct: self.lift_store_struct,
mediumlevelil.MediumLevelILStoreStructSsa: self.lift_store_struct,
mediumlevelil.MediumLevelILLowPart: self._lift_mask_high,
}
)
Expand All @@ -54,16 +58,31 @@ def lift_assignment(self, assignment: mediumlevelil.MediumLevelILSetVar, is_alia
def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_aliased=False, **kwargs) -> Assignment:
"""
Lift an instruction writing to a subset of the given value.

In case of lower register (offset 0) lift as contraction
e.g. eax.al = .... <=> (char)eax ....

In case higher registers use masking
e.g. eax.ah = x <=> eax = (eax & 0xffff00ff) + (x << 2)
case 1: writing into struct member: book.title = value
lift as struct_member(book, title, writes_memory) = value
case 2: writing into lower register part (offset 0): eax.al = value
lift as contraction (char) eax = value
case 3: writing into higher register part: eax.ah = value
lift using bit masking eax = (eax & 0xffff00ff) + (value << 2)
"""
if assignment.offset == 0 and self._lifter.is_omitting_masks:
# case 1 (struct), avoid set field of named integers:
dest_type = self._lifter.lift(assignment.dest.type)
if isinstance(assignment.dest.type, binaryninja.NamedTypeReferenceType) and not (
isinstance(dest_type, Pointer) and isinstance(dest_type.type, Integer)
):
0x6e62 marked this conversation as resolved.
Show resolved Hide resolved
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
elif assignment.offset == 0 and self._lifter.is_omitting_masks:
destination = self._lift_contraction(assignment, is_aliased=is_aliased, parent=assignment)
value = self._lifter.lift(assignment.src)
# case 3 (bit masking):
else:
destination = self._lifter.lift(assignment.dest, is_aliased=is_aliased, parent=assignment)
value = self._lift_masked_operand(assignment)
Expand All @@ -72,9 +91,16 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_aliased=False, **kwargs) -> Operation:
"""
Lift an instruction accessing a field from the outside.
e.g. x = eax.ah <=> x = eax & 0x0000ff00

case 1: struct member read access e.g. (x = )book.title
lift as (x = ) struct_member(book, title)
case 2: accessing register portion e.g. (x = )eax.ah
lift as (x = ) eax & 0x0000ff00
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
return BinaryOperation(
Expand All @@ -84,14 +110,30 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
)
return UnaryOperation(OperationType.cast, [source], vartype=cast_type, contraction=True)

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
0x6e62 marked this conversation as resolved.
Show resolved Hide resolved
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
return MemberAccess(
offset=instruction.offset,
member_name=member_name,
operands=[source],
)

def lift_store(self, assignment: mediumlevelil.MediumLevelILStoreSsa, **kwargs) -> Assignment:
"""Lift a store operation to pseudo (e.g. [ebp+4] = eax, or [global_var_label] = 25)."""
return Assignment(
self._lift_store_destination(assignment),
self._lifter.lift(assignment.src),
)

def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> Union[UnaryOperation, GlobalVariable]:
def _lift_store_destination(self, store_assignment: mediumlevelil.MediumLevelILStoreSsa) -> UnaryOperation | GlobalVariable:
"""
Lift destination operand of store operation which is used for modelling both assignments of dereferences and global variables.
"""
Expand Down Expand Up @@ -167,24 +209,16 @@ def lift_split_assignment(self, assignment: mediumlevelil.MediumLevelILSetVarSpl
self._lifter.lift(assignment.src, parent=assignment),
)

def _lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct, **kwargs) -> Assignment:
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
return Assignment(
UnaryOperation(
OperationType.dereference,
[
BinaryOperation(
OperationType.plus,
[
UnaryOperation(OperationType.cast, [self._lifter.lift(instruction.dest)], vartype=Pointer(Integer.char())),
Constant(instruction.offset),
],
vartype=vartype,
),
],
vartype=Pointer(vartype),
writes_memory=instruction.dest_memory
),
self._lifter.lift(instruction.src),
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
struct_member_access = MemberAccess(
member_name=vartype.type.members.get(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
writes_memory=instruction.dest_memory,
)
src = self._lifter.lift(instruction.src)
return Assignment(struct_member_access, src)
Loading