Skip to content

Commit

Permalink
Merge branch 'main' into remove-type-copy
Browse files Browse the repository at this point in the history
  • Loading branch information
blattm authored Aug 27, 2024
2 parents 51dee91 + c64d75b commit a165d03
Show file tree
Hide file tree
Showing 16 changed files with 425 additions and 237 deletions.
59 changes: 56 additions & 3 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,67 @@
)
from decompiler.structures.pseudo import instructions as instructions
from decompiler.structures.pseudo import operations as operations
from decompiler.structures.pseudo.complextypes import Struct
from decompiler.structures.pseudo.operations import MemberAccess
from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface
from decompiler.util.integer_util import normalize_int

MAX_GLOBAL_INIT_LENGTH = 128
INLINE_STRUCT_STRINGS = True
DETECT_STRUCT_STRINGS = True


def get_struct_string_address_offset(vartype) -> int | None:
"""This function return the offset of its address field if the vartype is a "struct string".
Otherwise it returns None.
struct strings are structs comprising of a length and a pointer to string data.
The code does not assume whether data or length comes first. The loop is for determining the order.
"""
if not isinstance(vartype, Struct):
return None
if len(vartype.members) != 2:
return None
address_offset = None
length_offset = None
for offset, member in vartype.members.items():
match member.type:
case Pointer(type=Integer(size=8)):
address_offset = offset
case Integer():
length_offset = offset
case _:
return None
if address_offset is None or length_offset is None:
return None
return address_offset


def is_struct_string(vartype) -> bool:
"""Checks if a vartype represents a "struct string" (i.e. a struct comprising of a length and a pointer to string data) or not."""
if not DETECT_STRUCT_STRINGS:
return False
return get_struct_string_address_offset(vartype) is not None


def get_data_of_struct_string(variable) -> GlobalVariable:
"""Returns the data of a "struct string" (i.e. a struct comprising of a length and a pointer to string data)."""
address_offset = get_struct_string_address_offset(variable.type)
address = variable.initial_value.value[address_offset]
return address


def inline_global_variable(var) -> bool:
"""Decides whether or not to inline a global variable."""
if not var.is_constant:
return False
match var.type:
case ArrayType():
if var.type.type in [Integer.char(), CustomType.wchar16(), CustomType.wchar32()]:
return True
case Struct():
if INLINE_STRUCT_STRINGS and is_struct_string(var.type):
return True
case _:
return False
return False
Expand Down Expand Up @@ -163,6 +210,10 @@ class CExpressionGenerator(DataflowObjectVisitorInterface):
# OperationType.adc: "adc",
}

ESCAPE_TABLE = str.maketrans(
{"\\": r"\\", '"': r"\"", "'": r"\'", "\n": r"\n", "\r": r"\r", "\t": r"\t", "\v": r"\v", "\b": r"\b", "\f": r"\f", "\0": r"\0"}
)

def visit_unknown_expression(self, expr: expressions.UnknownExpression) -> str:
"""Return the error message for this UnknownExpression."""
return expr.msg
Expand Down Expand Up @@ -197,16 +248,16 @@ def visit_constant_composition(self, expr: expressions.ConstantComposition):
"""Visit a Constant Array."""
match expr.type.type:
case CustomType(text="wchar16") | CustomType(text="wchar32"):
val = "".join([x.value for x in expr.value])
val = "".join([x.value for x in expr.value]).translate(self.ESCAPE_TABLE)
return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(size=8, signed=False):
val = "".join([f"\\x{x.value:02X}" for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(8):
val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]).translate(self.ESCAPE_TABLE)
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case _:
return f'{", ".join([self.visit(x) for x in expr.value])}' # Todo: Should we print every member? Could get pretty big
return f'{", ".join([self.visit(x) for x in expr.value]).translate(self.ESCAPE_TABLE)}' # Todo: Should we print every member? Could get pretty big

def visit_variable(self, expr: expressions.Variable) -> str:
"""Return a string representation of the variable."""
Expand All @@ -215,6 +266,8 @@ def visit_variable(self, expr: expressions.Variable) -> str:
def visit_global_variable(self, expr: expressions.GlobalVariable):
"""Inline a global variable if its initial value is constant and not of void type"""
if inline_global_variable(expr):
if is_struct_string(expr.type):
return self.visit(get_data_of_struct_string(expr))
return self.visit(expr.initial_value)
return expr.name

Expand Down
6 changes: 5 additions & 1 deletion decompiler/backend/codevisitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,11 @@ def _condition_string(self, condition: ConditionVar) -> str:
def _format_integer_literal(self, type_info: Integer, value: int) -> str:
"""Format the integer based on the codegenerators settings."""

byte_format_handler = {"char": lambda x: f"'{chr(x)}'", "hex": lambda x: f"{hex(x)}", "dec": lambda x: f"{x}"}
byte_format_handler = {
"char": lambda x: f"'{chr(x).translate(self.ESCAPE_TABLE)}'",
"hex": lambda x: f"{hex(x)}",
"dec": lambda x: f"{x}",
}
if self._possibly_char_in_ascii_range(type_info, value):
if value_handler := byte_format_handler.get(self._byte_format, None):
if hint_handler := byte_format_handler.get(self._byte_format_hint, None):
Expand Down
22 changes: 21 additions & 1 deletion decompiler/backend/variabledeclarations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,16 @@
from collections import defaultdict
from typing import Iterable, Iterator, List

from decompiler.backend.cexpressiongenerator import CExpressionGenerator, inline_global_variable
from decompiler.backend.cexpressiongenerator import (
CExpressionGenerator,
get_data_of_struct_string,
inline_global_variable,
is_struct_string,
)
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.pseudo import GlobalVariable, Integer, Variable
from decompiler.structures.pseudo.complextypes import Struct
from decompiler.structures.pseudo.expressions import StructConstant
from decompiler.structures.pseudo.typing import ArrayType, CustomType, Pointer
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask
Expand Down Expand Up @@ -66,6 +73,16 @@ def _generate_definitions(global_variables: set[GlobalVariable]) -> Iterator[str
if not variable.type.type in [Integer.char(), Integer.uint8_t(), CustomType.wchar16(), CustomType.wchar32()]:
br, bl = "{", "}"
yield f"{base}{variable.type.type} {variable.name}[{hex(variable.type.elements)}] = {br}{CExpressionGenerator().visit(variable.initial_value)}{bl};"
case Struct():
if is_struct_string(variable.type):
yield base + f"struct {variable.type.name} {variable.name} = {CExpressionGenerator().visit(get_data_of_struct_string(variable))};"
continue
string = f"struct {variable.type.name} {variable.name}" + "{\n"
for m_type, m_value in zip(variable.type.members.values(), variable.initial_value.value.values()):
value = CExpressionGenerator().visit(m_value)
string += f"\t.{m_type.name} = {value};\n"
string += "}"
yield base + string
case _:
yield f"{base}{variable.type} {variable.name} = {CExpressionGenerator().visit(variable.initial_value)};"

Expand All @@ -88,3 +105,6 @@ def visit_global_variable(self, expr: GlobalVariable):
self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None))
if not expr.is_constant or expr.type == Pointer(CustomType.void()):
self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None))
if isinstance(expr.initial_value, StructConstant):
for member_value in expr.initial_value.value.values():
self.visit(member_value)
2 changes: 1 addition & 1 deletion decompiler/frontend/binaryninja/handlers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def lift_constant(self, constant: mediumlevelil.MediumLevelILConst, **kwargs):
"""Lift the given constant value."""
if constant.constant in [math.inf, -math.inf, math.nan]:
return NotUseableConstant(str(constant.constant))
if addr_in_section(constant.function.view, constant.constant):
if isinstance(constant.constant, int) and addr_in_section(constant.function.view, constant.constant):
return self.lift_constant_pointer(constant)
return Constant(constant.constant, vartype=self._lifter.lift(constant.expr_type))

Expand Down
88 changes: 73 additions & 15 deletions decompiler/frontend/binaryninja/handlers/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Callable, Optional, Tuple, Union

from binaryninja import BinaryView, DataVariable, Endianness, MediumLevelILInstruction, SectionSemantics, Type
from binaryninja.enums import NamedTypeReferenceClass
from binaryninja.types import (
ArrayType,
BoolType,
Expand All @@ -12,13 +13,15 @@
IntegerType,
NamedTypeReferenceType,
PointerType,
StructureType,
Type,
VoidType,
)
from decompiler.frontend.binaryninja.handlers.symbols import GLOBAL_VARIABLE_PREFIX
from decompiler.frontend.lifter import Handler
from decompiler.structures.pseudo import ArrayType as PseudoArrayType
from decompiler.structures.pseudo import (
ComplexTypeMember,
Constant,
ConstantComposition,
CustomType,
Expand All @@ -28,6 +31,8 @@
Integer,
OperationType,
Pointer,
Struct,
StructConstant,
Symbol,
UnaryOperation,
)
Expand Down Expand Up @@ -59,9 +64,12 @@
==> trust bninja lift normally
=> If a void*, then we try determine the value via get_unknown_pointer_value
- NamedTypeReferenceType
- (enum/structs
=> lifts struct members recursively
=> includes special handling of a BNinja bug when accessing certain PDB enum types
- StructType
- enum/structs
=> not supported currently
=> has a BNinja bug when accessing certain PDB enum types
=> implementation *very* similar to NamedTypeReferenceType
MISC:
- ._callers will be empty for each call of lift_global_variable
Expand All @@ -88,8 +96,11 @@ def __init__(self, lifter):
ArrayType: self._lift_array_type,
PointerType: self._lift_pointer_type,
NamedTypeReferenceType: self._lift_named_type_ref,
StructureType: self._lift_structure_type,
}
self._lifted_globals: dict[int, GlobalVariable] = {} # Cache for already lifted global variables, keys are addresses
self._lifted_globals: dict[tuple, GlobalVariable] = (
{}
) # Cache for already lifted global variables, keys are addresses + type (required to distinguish struct from its first member)
self._view: Optional[BinaryView] = None # Will be set in first call to lift_global_variable

def register(self):
Expand All @@ -101,9 +112,18 @@ def _get_gvar_name(self, bninjaName: Optional[str], addr: int) -> str:
lifted_names = [v.name for v in self._lifted_globals.values()]
if bninjaName is None:
return GLOBAL_VARIABLE_PREFIX + f"{addr:x}"
if bninjaName in lifted_names:
return bninjaName + "_" + f"{addr:x}"
return bninjaName
name = bninjaName.translate(
{
ord(" "): "_",
ord("'"): "",
ord("."): "_",
ord("`"): "",
ord('"'): "",
}
).strip()
if name in lifted_names:
return name + "_" + f"{addr:x}"
return name

def _build_global_variable(self, name: Optional[str], type: Type, addr: int, init_value, ssa_label: Optional[int]) -> GlobalVariable:
"""Wrapper for building global variables."""
Expand All @@ -117,10 +137,10 @@ def _build_global_variable(self, name: Optional[str], type: Type, addr: int, ini
case _:
raise TypeError(f"Type violation: '{init_value}'")

self._lifted_globals[addr] = GlobalVariable(
self._lifted_globals[(addr, type)] = GlobalVariable(
name=vname, vartype=type, initial_value=vinit_value, ssa_label=ssa_label, is_constant=addr_in_ro_section(self._view, addr)
)
return self._lifted_globals[addr]
return self._lifted_globals[(addr, type)]

def lift_global_variable(
self,
Expand All @@ -136,11 +156,12 @@ def lift_global_variable(
self._view = view

# If addr was already lifted: Return lifted GlobalVariable with updated SSA
if variable.address in self._lifted_globals.keys():
variable_identifier = (variable.address, self._lifter.lift(variable.type))
if variable_identifier in self._lifted_globals.keys():
return (
self._lifted_globals[variable.address].copy(ssa_label=parent.ssa_memory_version)
self._lifted_globals[variable_identifier].copy(ssa_label=parent.ssa_memory_version)
if parent
else self._lifted_globals[variable.address]
else self._lifted_globals[variable_identifier]
)

# BNinja error cases: nullptr/small numbers (0, -12...)
Expand Down Expand Up @@ -237,9 +258,46 @@ def _lift_pointer_type(

def _lift_named_type_ref(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_):
"""Lift a named custom type (Enum, Structs)"""
return Constant(
"Unknown value", self._lifter.lift(variable.type)
) # BNinja error, need to check with the issue to get the correct value + entry for structs
match variable.type.named_type_class:
case NamedTypeReferenceClass.StructNamedTypeClass:
struct_type = self._view.get_type_by_id(variable.type.type_id)
return self._lift_struct_helper(variable, parent, struct_type)

case NamedTypeReferenceClass.EnumNamedTypeClass:
try:
value = Constant(variable.value, self._lifter.lift(variable.type))
return self._build_global_variable(
variable.name,
value.type,
variable.address,
value,
parent.ssa_memory_version if parent else 0,
)
except Exception:
return Constant("Unknown value", self._lifter.lift(variable.type)) # BNinja error
case _:
raise NotImplementedError(f"No handler for '{variable.type.named_type_class}' in lifter")

def _lift_structure_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_):
"""Lift a struct"""
struct_type = variable.type
return self._lift_struct_helper(variable, parent, struct_type)

def _lift_struct_helper(self, variable, parent, struct_type):
"""This helper method for lifting structs does the heavy lifting.
A structs initial value is comprised of its membembers' initial values.
This method iterates over all struct members, interprets the corresponding memory locations as new data variables
and lifts them (recursively) to gain access to the members' initial values.
"""
values = {}
s_type = self._lifter.lift(struct_type)
for member_type in struct_type.members:
dv = DataVariable(self._view, variable.address + member_type.offset, member_type.type, False)
lift = self._lifter.lift(dv, view=self._view)
values[member_type.offset] = lift.initial_value
return self._build_global_variable(
variable.name, s_type, variable.address, StructConstant(values, s_type), parent.ssa_memory_version if parent else 0
)

def _get_unknown_value(self, variable: DataVariable):
"""Return string or bytes at dv.address(!) (dv.type must be void)"""
Expand Down Expand Up @@ -344,7 +402,7 @@ def _get_string_at(view: BinaryView, addr: int, width: int) -> Optional[str]:
def addr_in_section(view: BinaryView, addr: int) -> bool:
"""Returns True if address is contained in a section, False otherwise"""
for _, section in view.sections.items():
if addr >= section.start and addr <= section.end:
if addr >= section.start and addr < section.end:
return True
return False

Expand Down
8 changes: 7 additions & 1 deletion decompiler/frontend/binaryninja/handlers/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ def lift_named_type_reference_type(self, custom: NamedTypeReferenceType, **kwarg
return CustomType(str(custom), custom.width * self.BYTE_SIZE)

def lift_enum(self, binja_enum: EnumerationType, name: str = None, **kwargs) -> Enum:
"""Lift enum type."""
"""Lift enum type.
The cache lookup uses the hash of the Binary Ninja object instead of names, as names might collide."""
type_id = hash(binja_enum)
cached_type = self._lifter.complex_types.retrieve_by_id(type_id)
if cached_type is not None:
return cached_type
enum_name = self._get_data_type_name(binja_enum, keyword="enum", provided_name=name)
enum = Enum(binja_enum.width * self.BYTE_SIZE, enum_name, {})
for member in binja_enum.members:
Expand All @@ -90,6 +94,8 @@ def lift_enum_member(self, enum_member: EnumerationMember, **kwargs) -> ComplexT
return ComplexTypeMember(size=0, name=enum_member.name, offset=-1, type=Integer(32), value=int(enum_member.value))

def lift_struct(self, struct: StructureType, name: str = None, **kwargs) -> Union[Struct, Union_, Class, ComplexTypeName]:
"""lift struct/class and union types.
The cache lookup uses the hash of the Binary Ninja object instead of names, as names might collide."""
type_id = hash(struct)
cached_type = self._lifter.complex_types.retrieve_by_id(type_id)
if cached_type is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def _get_dangerous_relations_between_definition_and_target(self, alias_variable:
for basic_block in self._cfg:
for instruction in basic_block:
if isinstance(instruction, Relation) and instruction.destination.name == alias_variable.name:
relations |= {instruction}
relations.add(instruction)

return relations

Expand Down
Loading

0 comments on commit a165d03

Please sign in to comment.