Skip to content

Commit

Permalink
Add init support for structure variable members being accessed.
Browse files Browse the repository at this point in the history
  • Loading branch information
mari-mari committed Jun 20, 2023
1 parent d3d8001 commit 3b71f4f
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 26 deletions.
4 changes: 4 additions & 0 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from decompiler.structures.pseudo import Float, Integer, OperationType, Pointer, StringSymbol
from decompiler.structures.pseudo import instructions as instructions
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.operations import StructMember
from decompiler.structures.pseudo.typing import StructureType
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface


Expand Down Expand Up @@ -65,6 +67,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
OperationType.greater_or_equal_us: ">=",
OperationType.dereference: "*",
OperationType.address: "&",
# OperationType.struct_member: "->",
# Handled in code
# OperationType.cast: "cast",
# OperationType.pointer: "point",
Expand Down Expand Up @@ -147,6 +150,7 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
OperationType.ternary: 30,
OperationType.call: 150,
OperationType.field: 150,
OperationType.struct_member: 150,
OperationType.list_op: 10,
# TODO: Figure out what these are / how to handle this
# OperationType.adc: "adc",
Expand Down
2 changes: 1 addition & 1 deletion decompiler/frontend/binaryninja/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,5 +158,5 @@ def _extract_cfg(self, function: Function, options: Options) -> ControlFlowGraph
"""Extract a control flow graph utilizing the parser and fixing it afterwards."""
report_threshold = options.getint("lifter.report_threshold", fallback=3)
no_masks = options.getboolean("lifter.no_bit_masks", fallback=True)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks), report_threshold)
parser = BinaryninjaParser(BinaryninjaLifter(no_masks, bv=function.view), report_threshold)
return parser.parse(function)
25 changes: 23 additions & 2 deletions decompiler/frontend/binaryninja/handlers/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

from binaryninja import BinaryView
from binaryninja.types import (
ArrayType,
BoolType,
Expand All @@ -12,12 +13,14 @@
NamedTypeReferenceType,
PointerType,
StructureType,
StructureMember,
Type,
VoidType,
WideCharType,
)
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import CustomType, Float, FunctionTypeDef, Integer, Parameter, Pointer, UnknownType
from decompiler.structures.pseudo.typing import StructureType as PseudoStructureType, StructureMemberType as PseudoStructureMember


class TypeHandler(Handler):
Expand All @@ -35,7 +38,8 @@ def register(self):
CharType: self.lift_integer,
WideCharType: self.lift_custom,
NamedTypeReferenceType: self.lift_custom,
StructureType: self.lift_custom,
StructureType: self.lift_struct,
StructureMember: self.lift_struct_member,
FunctionParameter: self.lift_function_parameter,
FunctionType: self.lift_function_type,
EnumerationType: self.lift_custom,
Expand All @@ -49,9 +53,26 @@ def lift_none(self, _: None, **kwargs):

def lift_custom(self, custom: Type, **kwargs) -> CustomType:
"""Lift custom types such as structs as a custom type."""
logging.debug(f"[TypeHandler] lifting custom type: {custom}")
# TODO split lifting custom from lifting namedtypereferencetype
view: BinaryView = self._lifter.bv
if (defined_type:= view.get_type_by_name(custom.name)):
return self._lifter.lift(defined_type, **kwargs)
return CustomType(str(custom), custom.width * self.BYTE_SIZE)

def lift_struct(self, struct: StructureType, **kwargs) -> PseudoStructureType:
"""Lift struct type."""
# TODO better way to get the name
# TODO type width?
struct_name = struct.get_string().split(" ")[1]
# members_dict = {m.offset: self.lift_struct_member(m) for m in struct.members}
members_dict = {}
for m in struct.members:
members_dict[m.offset] = self.lift_struct_member(m)
return PseudoStructureType(tag_name=struct_name, members=members_dict, size=0)

def lift_struct_member(self, member: StructureMember) -> PseudoStructureMember:
return PseudoStructureMember(name=member.name, offset=member.offset, type=self._lifter.lift(member.type), size=0)

def lift_void(self, _, **kwargs) -> CustomType:
"""Lift the void-type (should only be used as function return type)."""
return CustomType.void()
Expand Down
30 changes: 14 additions & 16 deletions decompiler/frontend/binaryninja/handlers/unary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Module implementing the UnaryOperationHandler."""
import logging
from functools import partial
from typing import Union

Expand All @@ -14,6 +15,8 @@
Pointer,
UnaryOperation,
)
from decompiler.structures.pseudo.operations import StructMember
from decompiler.structures.pseudo.typing import StructureType


class UnaryOperationHandler(Handler):
Expand Down Expand Up @@ -91,22 +94,17 @@ def _lift_zx_operation(self, instruction: MediumLevelILInstruction, **kwargs) ->
)
return self.lift_cast(instruction, **kwargs)

def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct, **kwargs) -> UnaryOperation:
"""Lift a MLIL_LOAD_STRUCT_SSA instruction."""
return UnaryOperation(
OperationType.dereference,
[
BinaryOperation(
OperationType.plus,
[
UnaryOperation(OperationType.cast, [self._lifter.lift(instruction.src)], vartype=Pointer(Integer.char())),
Constant(instruction.offset),
],
vartype=self._lifter.lift(instruction.src.expr_type),
),
],
vartype=Pointer(self._lifter.lift(instruction.src.expr_type)),
)
def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct, **kwargs) -> StructMember:
"""Lift a MLIL_LOAD_STRUCT_SSA (struct member access e.g. var#n->x) instruction."""
# TODO type of struct variable should be either ptr on struct or struct
# TODO type of the member hm actually we want member instance to know the struct type.
# TODO But it is not the same as vartype
# TODO check what happens if members values are changed
struct_variable = self._lifter.lift(instruction.src)
struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type)
struct_type: StructureType = struct_ptr.type
struct_member_name = struct_type.members.get(instruction.offset).name
return StructMember(src=struct_variable, vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=struct_member_name)

def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation:
"""Lift a MLIL_FTRUNC operation."""
Expand Down
5 changes: 3 additions & 2 deletions decompiler/frontend/binaryninja/lifter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from logging import warning
from typing import Optional, Tuple, Union

from binaryninja import MediumLevelILInstruction, Type
from binaryninja import MediumLevelILInstruction, Type, BinaryView
from decompiler.frontend.lifter import ObserverLifter
from decompiler.structures.pseudo import DataflowObject, Tag, UnknownExpression, UnknownType

Expand All @@ -12,8 +12,9 @@
class BinaryninjaLifter(ObserverLifter):
"""Lifter converting Binaryninja.mediumlevelil expressions to pseudo expressions."""

def __init__(self, no_bit_masks: bool = True):
def __init__(self, no_bit_masks: bool = True, bv: BinaryView = None):
self.no_bit_masks = no_bit_masks
self.bv: BinaryView = bv
for handler in HANDLERS:
handler(self).register()

Expand Down
5 changes: 3 additions & 2 deletions decompiler/pipeline/dataflowanalysis/type_propagation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from collections import Counter, defaultdict
from enum import Enum
from itertools import chain
from logging import info
from logging import info, debug
from typing import DefaultDict, Iterator, List, Set, Tuple

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.graphs.cfg import ControlFlowGraph
from decompiler.structures.pseudo.expressions import Expression, Variable
from decompiler.structures.pseudo.instructions import BaseAssignment, Instruction
from decompiler.structures.pseudo.typing import CustomType, Float, Integer, Pointer, Type, UnknownType
from decompiler.structures.pseudo.typing import CustomType, Float, Integer, Pointer, Type, UnknownType, StructureType
from decompiler.task import DecompilerTask
from networkx import DiGraph, Graph, connected_components

Expand Down Expand Up @@ -109,6 +109,7 @@ def propagate(self, graph: TypeGraph):
common_type = self._get_common_type(equivalence_group)
types.add(common_type)
self._propagate_type(graph, equivalence_group, common_type)
debug(types)
info(f"[{self.name}]Propagated {len(types)} different types")

@staticmethod
Expand Down
46 changes: 45 additions & 1 deletion decompiler/structures/pseudo/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from decompiler.util.insertion_ordered_set import InsertionOrderedSet

from .expressions import Constant, Expression, FunctionSymbol, ImportedFunctionSymbol, IntrinsicSymbol, Symbol, Tag, Variable
from .typing import CustomType, Type, UnknownType
from .typing import CustomType, Type, UnknownType, Pointer

T = TypeVar("T")

Expand Down Expand Up @@ -73,6 +73,7 @@ class OperationType(Enum):
field = auto()
list_op = auto()
adc = auto()
struct_member = auto()


# For pretty-printing and debug
Expand Down Expand Up @@ -127,6 +128,7 @@ class OperationType(Enum):
OperationType.field: "->",
OperationType.list_op: "list",
OperationType.adc: "adc",
OperationType.struct_member: ".",
}

UNSIGNED_OPERATIONS = {
Expand Down Expand Up @@ -376,6 +378,48 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T:
return visitor.visit_unary_operation(self)


class StructMember(Operation):
def __init__(
self,
src: Expression,
offset: int,
member_name: str,
operands: List[Expression],
vartype: Type = UnknownType(),
writes_memory: Optional[int] = None,
):
super().__init__(OperationType.struct_member, operands, vartype, writes_memory)
self.struct_variable = src
self.member_offset = offset
self.member_name = member_name

def __str__(self):
return f"{self.struct_variable}->{self.member_name}"
# if isinstance(self.src.type, Pointer):
# return f"{self.src}->{self.member_name}"
# return f"{self.src}.{self.member_name}"

def substitute(self, replacee: Expression, replacement: Expression) -> None:
if isinstance(replacee, Variable) and replacee == self.struct_variable and isinstance(replacement, Variable):
self.struct_variable = replacement
self.operands[:] = [replacement]

def copy(self) -> StructMember:
"""Copy the current UnaryOperation, copying all operands and the type."""
return StructMember(
self.struct_variable,
self.member_offset,
self.member_name,
[operand.copy() for operand in self._operands],
self._type.copy(),
# writes_memory=self._writes_memory,
)

def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T:
"""Invoke the appropriate visitor for this Operation."""
return str(self)


class BinaryOperation(Operation):
"""Class representing operations with two operands."""

Expand Down
41 changes: 39 additions & 2 deletions decompiler/structures/pseudo/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, replace
from typing import Tuple
from dataclasses import dataclass, replace, field
from typing import Tuple, Dict


@dataclass(frozen=True, order=True)
Expand Down Expand Up @@ -229,6 +229,43 @@ def __str__(self) -> str:
return f"{self.return_type}({', '.join(str(x) for x in self.parameters)})"


@dataclass(frozen=True, order=True)
class StructureMemberType(Type):
"""Class representing a member of a struct type."""

# TODO check subclassing and this size field
name: str
offset: int
type: Type
size = 0

def __str__(self) -> str:
return f"{self.name}"


@dataclass(frozen=True, order=True)
class StructureType(Type):
"""Class representing a struct type."""

# TODO check subclassing and this size field
tag_name: str
members: Dict[int, StructureMemberType] = field(compare=False)
size = 0

def __str__(self) -> str:
return f"{self.tag_name}"


@dataclass(frozen=True, order=True)
class FunctionTypeDef(Type):
return_type: Type
parameters: Tuple[Parameter, ...]

def __str__(self) -> str:
"""Return an anonymous string representation such as void*(int, int, char*)."""
return f"{self.return_type}({', '.join(str(x) for x in self.parameters)})"


class TypeParser:
"""A type parser in charge of creating types."""

Expand Down

0 comments on commit 3b71f4f

Please sign in to comment.