From 43c19d9120ea28a7246b8ec02a02913e7443ce87 Mon Sep 17 00:00:00 2001 From: NeoQuix <83972469+NeoQuix@users.noreply.github.com> Date: Thu, 28 Mar 2024 10:01:59 +0100 Subject: [PATCH] Fix Global Variables BNinja 4.0 (#401) * Fix: Lifiting/Printing of global variables * Remove: Testing Code * Change init value to expr. * Fix BNinja4.0 * Fix: Lifter + Format * Fix: FrontEnd+Backend * Fix: Tests + Remove globals tests * Remove: void* & handling * Format: isort+black * Format: Black 24.3 * Fix: Unary Op, Backend + Comments * Fix: name for _builtin_memcpy etc. * Fix: Name collisions for sets * Format * Review changes * Review changes-2 * Add: Comment globals view Co-authored-by: Manuel Blatt <45859907+blattm@users.noreply.github.com> * fix format * Comment for bninja4.0 hack --------- Co-authored-by: Spartak Ehrlich Co-authored-by: Manuel Blatt <45859907+blattm@users.noreply.github.com> Co-authored-by: Manuel Blatt --- decompiler/backend/cexpressiongenerator.py | 69 ++- decompiler/backend/variabledeclarations.py | 94 ++-- .../binaryninja/handlers/constants.py | 55 ++- .../frontend/binaryninja/handlers/globals.py | 414 ++++++++++++------ .../frontend/binaryninja/handlers/symbols.py | 1 + .../frontend/binaryninja/handlers/types.py | 5 +- .../commons/expressionpropagationcommons.py | 6 +- .../dataflowanalysis/expressionpropagation.py | 2 +- .../expressionpropagationfunctioncall.py | 2 +- .../expressionpropagationmemory.py | 2 +- decompiler/structures/pseudo/__init__.py | 6 +- decompiler/structures/pseudo/expressions.py | 104 ++--- decompiler/structures/pseudo/typing.py | 32 ++ .../visitors/ast_dataflowobjectvisitor.py | 15 +- decompiler/structures/visitors/interfaces.py | 8 + .../structures/visitors/substitute_visitor.py | 8 + tests/backend/test_codegenerator.py | 27 +- .../test_condition_aware_refinement.py | 8 +- .../test_expression_propagation.py | 4 +- ...st_expression_propagation_function_call.py | 4 +- .../test_expression_propagation_mem.py | 2 +- .../test_insert_missing_definition.py | 4 +- .../preprocessing/test_mem_phi_converter.py | 2 +- tests/structures/pseudo/test_expressions.py | 41 +- tests/structures/pseudo/test_instructions.py | 4 +- tests/test_sample_binaries.py | 215 --------- 26 files changed, 547 insertions(+), 587 deletions(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 5f0775949..77cfbe77e 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -2,13 +2,37 @@ from itertools import chain, repeat from decompiler.structures import pseudo as expressions -from decompiler.structures.pseudo import Float, FunctionTypeDef, Integer, OperationType, Pointer, StringSymbol, Type +from decompiler.structures.pseudo import ( + ArrayType, + CustomType, + Float, + FunctionTypeDef, + GlobalVariable, + Integer, + OperationType, + Pointer, + Type, +) from decompiler.structures.pseudo import instructions as instructions from decompiler.structures.pseudo import operations as operations from decompiler.structures.pseudo.operations import MemberAccess from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface from decompiler.util.integer_util import normalize_int +MAX_GLOBAL_INIT_LENGTH = 128 + + +def inline_global_variable(var) -> bool: + if not var.is_constant: + return False + match var.type: + case ArrayType(): + if var.type.type in [Integer.char(), CustomType.wchar16(), CustomType.wchar32()]: + return True + case _: + return False + return False + class CExpressionGenerator(DataflowObjectVisitorInterface): """Generate C code for Expressions. @@ -145,17 +169,52 @@ def visit_unknown_expression(self, expr: expressions.UnknownExpression) -> str: def visit_constant(self, expr: expressions.Constant) -> str: """Return constant in a format that will be parsed correctly by a compiler.""" + if isinstance(expr, expressions.NotUseableConstant): + return expr.value + if isinstance(expr, expressions.Symbol): + return expr.name if isinstance(expr.type, Integer): value = self._get_integer_literal_value(expr) return self._format_integer_literal(expr.type, value) - if isinstance(expr, StringSymbol): - return expr.name + if isinstance(expr.type, Pointer): + match (expr.value): + case ( + str() + ): # Technically every string will be lifted as an ConstantArray. Will still leave this, if someone creates a string as a char* + string = expr.value if len(expr.value) <= MAX_GLOBAL_INIT_LENGTH else expr.value[:MAX_GLOBAL_INIT_LENGTH] + "..." + match expr.type.type: + case CustomType(text="wchar16") | CustomType(text="wchar32"): + return f'L"{string}"' + case _: + return f'"{string}"' + case bytes(): + val = "".join("\\x{:02x}".format(x) for x in expr.value) + return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' + return self._format_string_literal(expr) + def visit_constant_composition(self, expr: expressions.ConstantComposition): + """Visit a Constant Array.""" + match expr.type.type: + case CustomType(text="wchar16") | CustomType(text="wchar32"): + val = "".join([x.value for x in expr.value]) + return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' + case Integer(8): + val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH]) + return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."' + case _: + return f'{", ".join([self.visit(x) for x in expr.value])}' # Todo: Should we print every member? Could get pretty big + def visit_variable(self, expr: expressions.Variable) -> str: """Return a string representation of the variable.""" return f"{expr.name}" if (label := expr.ssa_label) is None else f"{expr.name}_{label}" + def visit_global_variable(self, expr: expressions.GlobalVariable): + """Inline a global variable if its initial value is constant and not of void type""" + if inline_global_variable(expr): + return self.visit(expr.initial_value) + return expr.name + def visit_register_pair(self, expr: expressions.Variable) -> str: """Return a string representation of the register pair and log.""" logging.error(f"generated code for register pair {expr}") @@ -168,6 +227,8 @@ def visit_list_operation(self, op: operations.ListOperation) -> str: def visit_unary_operation(self, op: operations.UnaryOperation) -> str: """Return a string representation of the given unary operation (e.g. !a or &a).""" operand = self._visit_bracketed(op.operand) if self._has_lower_precedence(op.operand, op) else self.visit(op.operand) + if op.operation == OperationType.address and isinstance(op.operand, GlobalVariable) and isinstance(op.operand.type, ArrayType): + return operand if isinstance(op, MemberAccess): operator_str = "->" if isinstance(op.struct_variable.type, Pointer) else self.C_SYNTAX[op.operation] return f"{operand}{operator_str}{op.member_name}" @@ -353,5 +414,7 @@ def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: parameter_names = ", ".join(str(parameter) for parameter in fun_type.parameters) declarations_without_return_type = [f"(* {var_name})({parameter_names})" for var_name in var_names] return f"{fun_type.return_type} {', '.join(declarations_without_return_type)}" + case ArrayType(): + return f"{var_type.type}* {', '.join(var_names)}" case _: return f"{var_type} {', '.join(var_names)}" diff --git a/decompiler/backend/variabledeclarations.py b/decompiler/backend/variabledeclarations.py index d4102791b..27e2152cd 100644 --- a/decompiler/backend/variabledeclarations.py +++ b/decompiler/backend/variabledeclarations.py @@ -3,22 +3,13 @@ from collections import defaultdict from typing import Iterable, Iterator, List -from decompiler.backend.cexpressiongenerator import CExpressionGenerator +from decompiler.backend.cexpressiongenerator import CExpressionGenerator, inline_global_variable from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree -from decompiler.structures.pseudo import ( - DataflowObject, - Expression, - ExternConstant, - ExternFunctionPointer, - GlobalVariable, - Operation, - Pointer, - Variable, -) +from decompiler.structures.pseudo import GlobalVariable, Integer, Variable +from decompiler.structures.pseudo.typing import ArrayType, CustomType, Pointer from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor from decompiler.task import DecompilerTask from decompiler.util.insertion_ordered_set import InsertionOrderedSet -from decompiler.util.serialization.bytes_serializer import convert_bytes class LocalDeclarationGenerator: @@ -60,57 +51,40 @@ def _chunks(lst: List, n: int) -> Iterator[List]: class GlobalDeclarationGenerator(BaseAstDataflowObjectVisitor): - @staticmethod - def from_asts(asts: Iterable[AbstractSyntaxTree]) -> str: - global_variables, extern_constants = GlobalDeclarationGenerator._get_global_variables_and_constants(asts) - return "\n".join(GlobalDeclarationGenerator.generate(global_variables.__iter__(), extern_constants)) + def __init__(self) -> None: + self._global_vars = InsertionOrderedSet() + super().__init__() @staticmethod - def _get_global_variables_and_constants(asts: Iterable[AbstractSyntaxTree]) -> tuple[set[GlobalVariable], set[ExternConstant]]: - global_variables = InsertionOrderedSet() - extern_constants = InsertionOrderedSet() - - # if this gets more complex, a visitor pattern should perhaps be used instead - def handle_obj(obj: DataflowObject): - match obj: - case GlobalVariable(): - global_variables.add(obj) - if isinstance(obj.initial_value, Expression): - for subexpression in obj.initial_value.subexpressions(): - handle_obj(subexpression) - - case ExternConstant(): - extern_constants.add(obj) - - for ast in asts: - for node in ast.nodes: - for obj in node.get_dataflow_objets(ast.condition_map): - for expression in obj.subexpressions(): - handle_obj(expression) - - return global_variables, extern_constants - - @staticmethod - def generate(global_variables: Iterable[GlobalVariable], extern_constants: Iterable[ExternConstant]) -> Iterator[str]: + def _generate_definitions(global_variables: set[GlobalVariable]) -> Iterator[str]: """Generate all definitions""" for variable in global_variables: - yield f"extern {variable.type} {variable.name} = {GlobalDeclarationGenerator.get_initial_value(variable)};" - for constant in sorted(extern_constants, key=lambda x: x.value): - yield f"extern {constant.type} {constant.value};" + base = f"extern {'const ' if variable.is_constant else ''}" + match variable.type: + case ArrayType(): + br, bl = "", "" + if not variable.type.type in [Integer.char(), CustomType.wchar16(), CustomType.wchar32()]: + br, bl = "{", "}" + yield f"{base}{variable.type.type} {variable.name}[{hex(variable.type.elements)}] = {br}{CExpressionGenerator().visit(variable.initial_value)}{bl};" + case _: + yield f"{base}{variable.type} {variable.name} = {CExpressionGenerator().visit(variable.initial_value)};" @staticmethod - def get_initial_value(variable: GlobalVariable) -> str: - """Get a string representation of the initial value of the given variable.""" - if isinstance(variable.initial_value, GlobalVariable): - return variable.initial_value.name - elif isinstance(variable.initial_value, ExternFunctionPointer): - return str(variable.initial_value.value) - if isinstance(variable.initial_value, bytes): - return str(convert_bytes(variable.initial_value, variable.type)) - if isinstance(operation := variable.initial_value, Operation): - for requirement in operation.requirements: - if isinstance(requirement, GlobalVariable): - requirement.unsubscript() - if isinstance(variable.type, Pointer) and isinstance(variable.initial_value, int): - return hex(variable.initial_value) - return str(variable.initial_value) + def from_asts(asts: Iterable[AbstractSyntaxTree]) -> str: + """Generate""" + globals = InsertionOrderedSet() + for ast in asts: + globals |= GlobalDeclarationGenerator().visit_ast(ast) + return "\n".join(GlobalDeclarationGenerator._generate_definitions(globals)) + + def visit_ast(self, ast: AbstractSyntaxTree) -> InsertionOrderedSet: + """Visit ast and return all collected global variables""" + super().visit_ast(ast) + return self._global_vars + + def visit_global_variable(self, expr: GlobalVariable): + """Visit global variables. Only collect ones which will not be inlined by CExprGenerator. Strip SSA label to remove duplicates""" + if not inline_global_variable(expr): + self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None)) + if not expr.is_constant or expr.type == Pointer(CustomType.void()): + self._global_vars.add(expr.copy(ssa_label=0, ssa_name=None)) diff --git a/decompiler/frontend/binaryninja/handlers/constants.py b/decompiler/frontend/binaryninja/handlers/constants.py index ce434e673..20b4bfbb9 100644 --- a/decompiler/frontend/binaryninja/handlers/constants.py +++ b/decompiler/frontend/binaryninja/handlers/constants.py @@ -1,10 +1,21 @@ """Module implementing the ConstantHandler for the binaryninja frontend.""" import math +from typing import Union -from binaryninja import BinaryView, DataVariable, SectionSemantics, SymbolType, Type, mediumlevelil +from binaryninja import DataVariable, SymbolType, Type, mediumlevelil from decompiler.frontend.lifter import Handler -from decompiler.structures.pseudo import Constant, GlobalVariable, Integer, NotUseableConstant, Pointer, StringSymbol +from decompiler.structures.pseudo import ( + Constant, + CustomType, + GlobalVariable, + Integer, + NotUseableConstant, + OperationType, + Pointer, + Symbol, + UnaryOperation, +) BYTE_SIZE = 8 @@ -36,36 +47,36 @@ def lift_integer_literal(value: int, **kwargs) -> Constant: return Constant(value, vartype=Integer.int32_t()) def lift_constant_data(self, pointer: mediumlevelil.MediumLevelILConstData, **kwargs) -> Constant: - """Lift const data as a non mute able constant string""" - return StringSymbol(str(pointer), pointer.address) + """Lift data as a non mute able constant string (register string)""" + return NotUseableConstant(str(pointer)) - def lift_constant_pointer(self, pointer: mediumlevelil.MediumLevelILConstPtr, **kwargs): + def lift_constant_pointer(self, pointer: mediumlevelil.MediumLevelILConstPtr, **kwargs) -> Union[GlobalVariable, Symbol]: """Lift the given constant pointer, e.g. &0x80000.""" view = pointer.function.view if variable := view.get_data_var_at(pointer.constant): - return self._lifter.lift(variable, view=view, parent=pointer) + res = self._lifter.lift(variable, view=view, parent=pointer) - if (symbol := view.get_symbol_at(pointer.constant)) and symbol.type != SymbolType.DataSymbol: + elif (symbol := view.get_symbol_at(pointer.constant)) and symbol.type != SymbolType.DataSymbol: return self._lifter.lift(symbol) - if function := view.get_function_at(pointer.constant): + elif function := view.get_function_at(pointer.constant): return self._lifter.lift(function.symbol) - variable = DataVariable(view, pointer.constant, Type.void(), False) - global_variable = self._lifter.lift(variable, view=view, parent=pointer) + else: + res = self._lifter.lift(DataVariable(view, pointer.constant, Type.void(), False), view=view, parent=pointer) - return self._replace_global_variable_with_value(global_variable, variable, view) + if isinstance(res, Constant): # BNinja Error case handling + return res - def _replace_global_variable_with_value(self, globalVariable: GlobalVariable, variable: DataVariable, view: BinaryView) -> StringSymbol: - """Replace global variable with it's value, if it's a char/wchar16/wchar32* and in a read only section""" - if not self._in_read_only_section(variable.address, view) or str(globalVariable.type) == "void *": - return globalVariable - return StringSymbol(globalVariable.initial_value, variable.address, vartype=Pointer(Integer.char(), view.address_size * BYTE_SIZE)) + if isinstance(res.type, Pointer) and res.type.type == CustomType.void(): + return res - def _in_read_only_section(self, addr: int, view: BinaryView) -> bool: - """Returns True if address is contained in a read only section, False otherwise""" - for _, section in view.sections.items(): - if addr >= section.start and addr <= section.end and section.semantics == SectionSemantics.ReadOnlyDataSectionSemantics: - return True - return False + if isinstance(pointer, mediumlevelil.MediumLevelILImport): # Temp fix for '&' + return res + + return UnaryOperation( + OperationType.address, + [res], + vartype=res.type, + ) diff --git a/decompiler/frontend/binaryninja/handlers/globals.py b/decompiler/frontend/binaryninja/handlers/globals.py index 175f3b5a0..b37b2d07b 100644 --- a/decompiler/frontend/binaryninja/handlers/globals.py +++ b/decompiler/frontend/binaryninja/handlers/globals.py @@ -2,7 +2,7 @@ from typing import Callable, Optional, Tuple, Union -from binaryninja import BinaryView, DataVariable, Endianness, MediumLevelILInstruction, Type +from binaryninja import BinaryView, DataVariable, Endianness, MediumLevelILInstruction, SectionSemantics, Type from binaryninja.types import ( ArrayType, BoolType, @@ -18,23 +18,60 @@ from decompiler.frontend.binaryninja.handlers.constants import BYTE_SIZE from decompiler.frontend.binaryninja.handlers.symbols import GLOBAL_VARIABLE_PREFIX from decompiler.frontend.lifter import Handler +from decompiler.structures.pseudo import ArrayType as PseudoArrayType from decompiler.structures.pseudo import ( Constant, + ConstantComposition, + CustomType, + Expression, GlobalVariable, ImportedFunctionSymbol, Integer, OperationType, Pointer, - StringSymbol, Symbol, UnaryOperation, ) -MAX_GLOBAL_STRINGBYTES_LENGTH = 129 +""" + Lift a given address inside of a binary by BNinjas DataVariable type. + If some code references a address, bninja stores the information about the address inside of a DataVariable (dv). + A dv has a type (which may be wrong/or set by a user) and a value (which can be anything). + + We lift according to the type (bninjas) of the dv: + - CharType, FloatType, IntegerType, BoolType + - basic C types (char, int, float, ...) + => just lift as the given type + ==> Addition since Version 4.0: Check if variable references something, if yes, try to lift as pointer + - VoidType + - when bninja does not know the size of a variable (e.g. int array) it represents it as a void dv + => C does not have a concept of void + => lift as a void* with raw escaped bytes as value (still not C conformant, but better) + ==> if we create a pointer, the caller (instruction) must remove the '&' operator + - ArrayType + - Strings (char [], wchar_16[], ...) + => Lift as given type (array) + => BNinja changes the .value field frequently and is not consistent (any; mostly bytes, list or string) + - PointerType + - pointer to something (basic type, void*, function pointer) + => If the pointer points to some basic type, there _should_ be a dv at the value address + ==> trust bninja lift normally + => If a void*, then we try determine the value via get_unknown_pointer_value + - NamedTypeReferenceType + - enum/structs + => not supported currently + => has a BNinja bug when accessing certain PDB enum types + + MISC: + - ._callers will be empty for each call of lift_global_variable + except when an caller calls the lifter with kwargs = {callers = [..]} + => get_unknown_value does exactly this to keep track of all callers for a chain of global variables + (The call stack will be lifter.lift, lift_global_variable, lifter.lift, lift_global_variable, ...) +""" class GlobalHandler(Handler): - """Handle for global variables.""" + """Handler for global variables.""" # Dict translating endianness between the binaryninja enum and pythons literals Endian = {Endianness.LittleEndian: "little", Endianness.BigEndian: "big"} @@ -47,178 +84,271 @@ def __init__(self, lifter): FloatType: self._lift_basic_type, BoolType: self._lift_basic_type, VoidType: self._lift_void_type, - ArrayType: self._lift_constant_type, + ArrayType: self._lift_array_type, PointerType: self._lift_pointer_type, - NamedTypeReferenceType: self._lift_named_type_ref, # Lift DataVariable with type NamedTypeRef + NamedTypeReferenceType: self._lift_named_type_ref, } + self._lifted_globals: dict[int, GlobalVariable] = {} # Cache for already lifted global variables, keys are addresses + self._view: Optional[BinaryView] = None # Will be set in first call to lift_global_variable def register(self): """Register the handler at its parent lifter.""" self._lifter.HANDLERS.update({DataVariable: self.lift_global_variable}) + def _get_gvar_name(self, bninjaName: Optional[str], addr: int) -> str: + """Return a unique name for a global variable.""" + lifted_names = [v.name for v in self._lifted_globals.values()] + if bninjaName is None: + return GLOBAL_VARIABLE_PREFIX + f"{addr:x}" + if bninjaName in lifted_names: + return bninjaName + "_" + f"{addr:x}" + return bninjaName + + def _build_global_variable(self, name: Optional[str], type: Type, addr: int, init_value, ssa_label: Optional[int]) -> GlobalVariable: + """Wrapper for building global variables.""" + vname = self._get_gvar_name(name, addr) + + match init_value: + case Expression(): + vinit_value = init_value + case int() | float() | bytes(): + vinit_value = Constant(value=init_value, vartype=type) + case _: + raise TypeError(f"Type violation: '{init_value}'") + + self._lifted_globals[addr] = GlobalVariable( + name=vname, vartype=type, initial_value=vinit_value, ssa_label=ssa_label, is_constant=addr_in_ro_section(self._view, addr) + ) + return self._lifted_globals[addr] + def lift_global_variable( - self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None, caller_addr: int = None, **kwargs - ) -> Union[Symbol, UnaryOperation, GlobalVariable, StringSymbol]: - """Lift global variables via datavariable type. Check bninja error case + recursive datavariable first""" - if not self._addr_in_section(view, variable.address): + self, + variable: DataVariable, + view: BinaryView, + parent: Optional[MediumLevelILInstruction] = None, + callers: list[int] = None, + **kwargs, + ) -> Union[Constant, Symbol, GlobalVariable]: + """Lift global variables via datavariable type""" + # Save view for all internal used functions + if not self._view: + self._view = view + + # If addr was already lifted: Return lifted GlobalVariable with updated SSA + if variable.address in self._lifted_globals.keys(): + return ( + self._lifted_globals[variable.address].copy(ssa_label=parent.ssa_memory_version) + if parent + else self._lifted_globals[variable.address] + ) + + # BNinja error cases: nullptr/small numbers (0, -12...) + if not addr_in_section(view, variable.address): return Constant(variable.address, vartype=Integer(view.address_size * BYTE_SIZE, False)) - if caller_addr == variable.address: + # Check if there is a cycle between GlobalVariables initial_value + if callers and variable.address in callers: return ( self._lifter.lift(variable.symbol) if variable.symbol else Symbol(GLOBAL_VARIABLE_PREFIX + f"{variable.address:x}", variable.address, vartype=Integer.uint32_t()) ) - return self._lift_datavariable_by_type[type(variable.type)](variable, view, parent) - - def _lift_constant_type( - self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None - ) -> StringSymbol: - """Lift constant data type (bninja only uses strings) into code""" # jump table ist auch constant - if str(variable).find("char const") != -1: - string = str(variable.value.rstrip(b"\x00"))[ - 2:-1 - ] # we want to keep escaped control chars (\n), therefore we take the raw string representation of bytes and purge b"" - return StringSymbol(f'"{string}"', variable.address, vartype=Pointer(Integer.char(), view.address_size * BYTE_SIZE)) - return StringSymbol( - f"&{variable.name}" if variable.name else GLOBAL_VARIABLE_PREFIX + f"{variable.address:x}", variable.address - ) # Else - - def _lift_pointer_type(self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None): + return self._lift_datavariable_by_type[type(variable.type)](variable, parent, callers=callers) + + def _lift_array_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_) -> GlobalVariable: + """Lift constant data type (strings and jump tables) into code""" + type = self._lifter.lift(variable.type) + match variable.value: + case bytes(): # BNinja corner case: C-Strings (8Bit) are represented as python Bytes + value = [Constant(x, type.type) for x in str(variable.value.rstrip(b"\x00"))[2:-1]] + case _: + value = [Constant(x, type.type) for x in variable.value] + + return self._build_global_variable( + name=variable.name, + type=type, + addr=variable.address, + init_value=ConstantComposition(value, type), + ssa_label=parent.ssa_memory_version if parent else 0, + ) + + def _lift_basic_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_) -> GlobalVariable: + """Lift basic C type found by BNinja (int, float, char, ...)""" + # If variable references something in address space, then lift it as a pointer (BNinja 4.0 "Error") + if [x for x in variable.data_refs_from]: + return self._lifter.lift( + DataVariable(self._view, variable.address, Type.pointer(self._view, Type.void()), False), view=self._view, parent=parent + ) # Don't need to supply `callers`, will be handled by `_lift_pointer_type` automatically + + type = self._lifter.lift(variable.type) + return self._build_global_variable( + name=self._lifter.lift(variable.symbol).name if variable.symbol else None, + type=type, + addr=variable.address, + init_value=Constant(variable.value, type), + ssa_label=parent.ssa_memory_version if parent else 0, + ) + + def _lift_void_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_) -> GlobalVariable: + "Lift unknown type, by checking the value at the given address. Will always be lifted as a pointer. Try to extract datavariable, string or bytes as value" + value, type = self._get_unknown_value(variable) + return self._build_global_variable( + name=self._lifter.lift(variable.symbol).name if variable.symbol else None, + type=type, + addr=variable.address, + init_value=value, + ssa_label=parent.ssa_memory_version if parent else 0, + ) + + def _lift_pointer_type( + self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, callers: list[int] = None, **_ + ) -> Union[GlobalVariable, Symbol]: """Lift pointer as: - 1. Function pointer: If bninja already knows it's a function pointer. + 1. Function pointer: If Bninja already knows it's a function pointer. 2. Type pointer: As normal type pointer (there _should_ be a datavariable at the pointers dest.) 3. Void pointer: Try to extract a datavariable (recover type of void* directly), string (char*) or raw bytes (void*) at the given address """ - if isinstance(variable.type.target, FunctionType): - return ImportedFunctionSymbol(variable.name, variable.address, vartype=Pointer(Integer.char(), view.address_size * BYTE_SIZE)) - if isinstance(variable.type.target, VoidType): - init_value, type = self._get_unknown_value(variable.value, view, variable.address) - if not isinstance(type, PointerType): # Fix type to be a pointer (happens when a datavariable is at the dest.) - type = Type.pointer(view.arch, type) - else: - init_value, type = ( - self._lifter.lift(view.get_data_var_at(variable.value), view=view, caller_addr=variable.address), - variable.type, - ) - return UnaryOperation( - OperationType.address, - [ - GlobalVariable( - name=self._lifter.lift(variable.symbol).name if variable.symbol else GLOBAL_VARIABLE_PREFIX + f"{variable.address:x}", - vartype=self._lifter.lift(type), - ssa_label=parent.ssa_memory_version if parent else 0, - initial_value=init_value, + match variable.type.target: + case FunctionType(): # BNinja knows it's a imported function pointer + return ImportedFunctionSymbol( + variable.name, variable.address, vartype=Pointer(Integer.char(), self._view.address_size * BYTE_SIZE) ) - ], - ) - - def _lift_basic_type( - self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None - ) -> UnaryOperation: - """Lift basic known type""" - return UnaryOperation( - OperationType.address, - [ - GlobalVariable( - name=self._lifter.lift(variable.symbol).name if variable.symbol else GLOBAL_VARIABLE_PREFIX + f"{variable.address:x}", - vartype=self._lifter.lift(variable.type), - ssa_label=parent.ssa_memory_version if parent else 0, - initial_value=Constant(variable.value), + case VoidType(): # BNinja knows it's a pointer pointing at something + # Extract the initial_value and type from the location where the pointer is pointing to + init_value, type = self._get_unknown_pointer_value(variable, callers) + case _: + if callers: + callers.append(variable.address) + else: + callers = [variable.address] + init_value, type = ( + self._lifter.lift(self._view.get_data_var_at(variable.value), view=self._view, callers=callers), + self._lifter.lift(variable.type), ) - ], - ) - - def _lift_void_type( - self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None - ) -> GlobalVariable: - "Lift unknown type, by checking the value at the given address. Will always be lifted as a pointer. Try to extract datavariable, string or bytes as value" - value, type = self._get_unknown_value(variable.address, view, variable.address) - return GlobalVariable( - name=self._lifter.lift(variable.symbol).name if variable.symbol else GLOBAL_VARIABLE_PREFIX + f"{variable.address:x}", - vartype=self._lifter.lift(type), + return self._build_global_variable( + name=self._lifter.lift(variable.symbol).name if variable.symbol else None, + type=type, + addr=variable.address, + init_value=init_value, ssa_label=parent.ssa_memory_version if parent else 0, - initial_value=value, ) - def _lift_named_type_ref( - self, variable: DataVariable, view: BinaryView, parent: Optional[MediumLevelILInstruction] = None - ) -> GlobalVariable: + def _lift_named_type_ref(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None, **_): """Lift a named custom type (Enum, Structs)""" return Constant( "Unknown value", self._lifter.lift(variable.type) - ) # BNinja error, need to check with the issue to get the correct value - - def _get_unknown_value(self, addr: int, view: BinaryView, caller_addr: int = 0): - """Return symbol, datavariable, address, string or raw bytes at given address.""" - if datavariable := view.get_data_var_at(addr): - return self._lifter.lift(datavariable, view=view, caller_addr=caller_addr), datavariable.type - if not self._addr_in_section(view, addr): - return addr, Type.pointer(view.arch, Type.void()) - if (data := self._get_different_string_types_at(addr, view)) and data[0] is not None: - data, type = data[0], Type.pointer(view.arch, data[1]) - else: - data, type = self._get_raw_bytes(addr, view), Type.pointer(view.arch, Type.void()) + ) # BNinja error, need to check with the issue to get the correct value + entry for structs - if len(data) > MAX_GLOBAL_STRINGBYTES_LENGTH: - data = data[:MAX_GLOBAL_STRINGBYTES_LENGTH] + '..."' + def _get_unknown_value(self, variable: DataVariable): + """Return string or bytes at dv.address(!) (dv.type must be void)""" + if (data := get_different_string_types_at(variable.address, self._view)) and data[0] is not None: + type = PseudoArrayType(self._lifter.lift(data[1]), len(data[0])) + data = ConstantComposition([Constant(x, type.type) for x in data[0]], type) + else: + data, type = get_raw_bytes(variable.address, self._view), Pointer(CustomType.void()) return data, type - def _get_raw_bytes(self, addr: int, view: BinaryView) -> str: - """Returns raw bytes as hex string after a given address to the next data structure or section""" - if (next_data_var := view.get_next_data_var_after(addr)) is not None: - data = view.read(addr, next_data_var.address - addr) + def _get_unknown_pointer_value(self, variable: DataVariable, callers: list[int] = None): + """Return symbol, datavariable, address, string or raw bytes for a value of a datavariable(!) (dv should be a pointer).""" + if not addr_in_section(self._view, variable.value): + type = Pointer(CustomType.void()) + return Constant(variable.value, type), type + + if datavariable := self._view.get_data_var_at(variable.value): + if callers: + callers.append(variable.address) + else: + callers = [variable.address] + type = self._lifter.lift(datavariable.type) + value = self._lifter.lift(datavariable, view=self._view, callers=callers) + if not isinstance(type, (Pointer, PseudoArrayType)): + type = Pointer(type, self._view.address_size * BYTE_SIZE) + value = UnaryOperation( + OperationType.address, + [value], + vartype=value.type, + ) + return value, type + + if (data := get_different_string_types_at(variable.value, self._view)) and data[ + 0 + ] is not None: # Implicit pointer removal if called from a pointer value, does NOT need to be a UnaryOperation + vtype = PseudoArrayType(self._lifter.lift(data[1]), len(data[0])) + vdata = ConstantComposition([Constant(x, vtype.type) for x in data[0]], vtype) + data = self._build_global_variable(None, vtype, variable.value, vdata, None) + type = Pointer(vtype, self._view.address_size * BYTE_SIZE) + return ( + UnaryOperation( + OperationType.address, + [data], + vartype=data.type, + ), + type, + ) else: - data = view.read(addr, view.get_sections_at(addr)[0].end) - - string = "".join("\\x{:02x}".format(x) for x in data) - return f'"{string}"' - - def _get_different_string_types_at(self, addr: int, view: BinaryView) -> Tuple[Optional[str], Type]: - """Extract string with char/wchar16/wchar32 type if there is one""" - types: list[Type] = [Type.char(), Type.wide_char(2), Type.wide_char(4)] - for type in types: - string = self._get_string_at(view, addr, type.width) - if string != None: - break - return string, type - - def _get_string_at(self, view: BinaryView, addr: int, width: int) -> Optional[str]: - """Read string with specified width from location. Explanation for the magic parsing: - - we read 1, 2 or 4 long integers which should be interpreted as a byte in ASCII range (while Loop; can't use chr() for checking) - - afterwards we convert bytes array manually to a string by removing the "bytearray(...)" parts from the string - - this string now consists of readable chars (A, b), escaped hex values (\\x17) and control chars (\n, \t) - - we consider a it a string, if it only consists of readable chars + control chars - """ - raw_bytes = bytearray() - match width: - case 1: - read = view.reader(addr).read8 - identifier = "" - case 2: - read = view.reader(addr).read16 - identifier = "L" - case 4: - read = view.reader(addr).read32 - identifier = "L" - case _: - raise ValueError("Width not supported for reading bytes") + data, type = get_raw_bytes(variable.value, self._view), Pointer(CustomType.void()) + return data, type + + +def get_raw_bytes(addr: int, view: BinaryView) -> bytes: + """Returns raw bytes as hex string after a given address to the next data structure or section""" + if (next_data_var := view.get_next_data_var_after(addr)) is not None: + return view.read(addr, next_data_var.address - addr) + return view.read(addr, view.get_sections_at(addr)[0].end) + - while (byte := read()) is not None and byte != 0x00: - if byte > 127: - return None - raw_bytes.append(byte) +def get_different_string_types_at(addr: int, view: BinaryView) -> Tuple[Optional[str], Type]: + """Extract string with char/wchar16/wchar32 type if there is one""" + types: list[Type] = [Type.char(), Type.wide_char(2), Type.wide_char(4)] + for type in types: + string = _get_string_at(view, addr, type.width) + if string != None: + break + return string, type - string = str(raw_bytes)[12:-2] - if len(string) < 2 or string.find("\\x") != -1: # escaped + +def _get_string_at(view: BinaryView, addr: int, width: int) -> Optional[str]: + """Read string with specified width from location. Explanation for the magic parsing: + - we read 1, 2 or 4 long integers which should be interpreted as a byte in ASCII range (while Loop; can't use chr() for checking) + - afterwards we convert bytes array manually to a string by removing the "bytearray(...)" parts from the string + - this string now consists of readable chars (A, b), escaped hex values (\\x17) and control chars (\n, \t) + - we consider a it a string, if it only consists of readable chars + control chars + """ + raw_bytes = bytearray() + match width: + case 1: + read = view.reader(addr).read8 + case 2: + read = view.reader(addr).read16 + case 4: + read = view.reader(addr).read32 + case _: + raise ValueError("Width not supported for reading bytes") + + while (byte := read()) is not None and byte != 0x00: + if byte > 127: return None + raw_bytes.append(byte) + + string = str(raw_bytes)[12:-2] + if len(string) < 2 or string.find("\\x") != -1: + return None + + return string + + +def addr_in_section(view: BinaryView, addr: int) -> bool: + """Returns True if address is contained in a section, False otherwise""" + for _, section in view.sections.items(): + if addr >= section.start and addr <= section.end: + return True + return False - return identifier + f'"{string}"' - def _addr_in_section(self, view: BinaryView, addr: int) -> bool: - """Returns True if address is contained in a section, False otherwise""" - for _, section in view.sections.items(): - if addr >= section.start and addr <= section.end: - return True - return False +def addr_in_ro_section(view: BinaryView, addr: int) -> bool: + """Returns True if address is contained in a read only section, False otherwise""" + for _, section in view.sections.items(): + if addr >= section.start and addr <= section.end and section.semantics == SectionSemantics.ReadOnlyDataSectionSemantics: + return True + return False diff --git a/decompiler/frontend/binaryninja/handlers/symbols.py b/decompiler/frontend/binaryninja/handlers/symbols.py index 29825c0e5..5d04bf579 100644 --- a/decompiler/frontend/binaryninja/handlers/symbols.py +++ b/decompiler/frontend/binaryninja/handlers/symbols.py @@ -26,6 +26,7 @@ def __init__(self, lifter: ObserverLifter): SymbolType.ImportedDataSymbol: Symbol, SymbolType.ExternalSymbol: ImportedFunctionSymbol, SymbolType.LibraryFunctionSymbol: Symbol, + SymbolType.SymbolicFunctionSymbol: FunctionSymbol, } def register(self): diff --git a/decompiler/frontend/binaryninja/handlers/types.py b/decompiler/frontend/binaryninja/handlers/types.py index 4caf8a6e6..daf2902fd 100644 --- a/decompiler/frontend/binaryninja/handlers/types.py +++ b/decompiler/frontend/binaryninja/handlers/types.py @@ -21,6 +21,7 @@ WideCharType, ) from decompiler.frontend.lifter import Handler +from decompiler.structures.pseudo import ArrayType as PseudoArrayType from decompiler.structures.pseudo import CustomType, Float, FunctionTypeDef, Integer, Pointer, UnknownType, Variable from decompiler.structures.pseudo.complextypes import Class, ComplexTypeMember, ComplexTypeName, Enum, Struct from decompiler.structures.pseudo.complextypes import Union as Union_ @@ -171,9 +172,9 @@ def lift_pointer(self, pointer: PointerType, **kwargs) -> Pointer: """Lift the given pointer type as a pointer on the nested type.""" return Pointer(self._lifter.lift(pointer.target, parent=pointer), pointer.width * self.BYTE_SIZE) - def lift_array(self, array: ArrayType, **kwargs) -> Pointer: + def lift_array(self, array: ArrayType, **kwargs) -> PseudoArrayType: """Lift an array as a pointer of the given type, omitting the size information.""" - return Pointer(self._lifter.lift(array.element_type)) + return PseudoArrayType(self._lifter.lift(array.element_type), array.count) def lift_function_type(self, function_type: FunctionType, **kwargs) -> FunctionTypeDef: """Lift an anonymous function signature such as void*(int, long).""" diff --git a/decompiler/pipeline/commons/expressionpropagationcommons.py b/decompiler/pipeline/commons/expressionpropagationcommons.py index 5bc493952..12a7f2459 100644 --- a/decompiler/pipeline/commons/expressionpropagationcommons.py +++ b/decompiler/pipeline/commons/expressionpropagationcommons.py @@ -435,16 +435,16 @@ def _is_aliased_variable(expression: Expression) -> bool: return isinstance(expression, Variable) and expression.is_aliased @staticmethod - def _contains_global_variable(expression: Assignment) -> bool: + def _contains_writeable_global_variable(expression: Assignment) -> bool: """ :param expression: Assignment expression to be tested :return: true if any requirement of expression is a GlobalVariable """ for expr in expression.destination.requirements: - if isinstance(expr, GlobalVariable): + if isinstance(expr, GlobalVariable) and not expr.is_constant: return True for expr in expression.value.requirements: - if isinstance(expr, GlobalVariable): + if isinstance(expr, GlobalVariable) and not expr.is_constant: return True return False diff --git a/decompiler/pipeline/dataflowanalysis/expressionpropagation.py b/decompiler/pipeline/dataflowanalysis/expressionpropagation.py index 5ff82bb46..b32d15fb0 100644 --- a/decompiler/pipeline/dataflowanalysis/expressionpropagation.py +++ b/decompiler/pipeline/dataflowanalysis/expressionpropagation.py @@ -31,7 +31,7 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ or self._defines_unknown_expression(definition) or self._contains_aliased_variables(definition) or self._is_address_assignment(definition) - or self._contains_global_variable(definition) + or self._contains_writeable_global_variable(definition) or self._operation_is_propagated_in_phi(target, definition) or self._is_invalid_propagation_into_address_operation(target, definition) or self._is_dereference_assignment(definition) diff --git a/decompiler/pipeline/dataflowanalysis/expressionpropagationfunctioncall.py b/decompiler/pipeline/dataflowanalysis/expressionpropagationfunctioncall.py index 159b55c6d..a733fc7df 100644 --- a/decompiler/pipeline/dataflowanalysis/expressionpropagationfunctioncall.py +++ b/decompiler/pipeline/dataflowanalysis/expressionpropagationfunctioncall.py @@ -105,7 +105,7 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ or self._defines_unknown_expression(definition) or self._contains_aliased_variables(definition) or self._is_address_assignment(definition) - or self._contains_global_variable(definition) + or self._contains_writeable_global_variable(definition) or self._operation_is_propagated_in_phi(target, definition) or self._is_invalid_propagation_into_address_operation(target, definition) or self._is_dereference_assignment(definition) diff --git a/decompiler/pipeline/dataflowanalysis/expressionpropagationmemory.py b/decompiler/pipeline/dataflowanalysis/expressionpropagationmemory.py index b19ca3172..6d89b6d5b 100644 --- a/decompiler/pipeline/dataflowanalysis/expressionpropagationmemory.py +++ b/decompiler/pipeline/dataflowanalysis/expressionpropagationmemory.py @@ -51,7 +51,7 @@ def _definition_can_be_propagated_into_target(self, definition: Assignment, targ or self._is_call_assignment(definition) or self._is_address_into_dereference(definition, target) or self._defines_unknown_expression(definition) - or self._contains_global_variable(definition) + or self._contains_writeable_global_variable(definition) or self._operation_is_propagated_in_phi(target, definition) or self._is_invalid_propagation_into_address_operation(target, definition) or self._is_aliased_postponed_for_propagation(target, definition) diff --git a/decompiler/structures/pseudo/__init__.py b/decompiler/structures/pseudo/__init__.py index 0df23d585..2024faef5 100644 --- a/decompiler/structures/pseudo/__init__.py +++ b/decompiler/structures/pseudo/__init__.py @@ -2,17 +2,15 @@ from .delogic_logic import DelogicConverter from .expressions import ( Constant, + ConstantComposition, DataflowObject, Expression, - ExternConstant, - ExternFunctionPointer, FunctionSymbol, GlobalVariable, ImportedFunctionSymbol, IntrinsicSymbol, NotUseableConstant, RegisterPair, - StringSymbol, Symbol, Tag, UnknownExpression, @@ -34,5 +32,5 @@ Return, ) from .operations import BinaryOperation, Call, Condition, ListOperation, Operation, OperationType, TernaryExpression, UnaryOperation -from .typing import CustomType, Float, FunctionTypeDef, Integer, Pointer, Type, TypeParser, UnknownType +from .typing import ArrayType, CustomType, Float, FunctionTypeDef, Integer, Pointer, Type, TypeParser, UnknownType from .z3_logic import Z3Converter diff --git a/decompiler/structures/pseudo/expressions.py b/decompiler/structures/pseudo/expressions.py index 0458ee34f..5539dfc14 100644 --- a/decompiler/structures/pseudo/expressions.py +++ b/decompiler/structures/pseudo/expressions.py @@ -35,7 +35,7 @@ from ...util.insertion_ordered_set import InsertionOrderedSet from .complextypes import Enum -from .typing import CustomType, Type, UnknownType +from .typing import ArrayType, CustomType, Type, UnknownType T = TypeVar("T") DecompiledType = TypeVar("DecompiledType", bound=Type) @@ -172,7 +172,7 @@ class Constant(Expression[DecompiledType]): def __init__( self, - value: Union[int, float, str], + value: Union[int, float, str, bytes], vartype: DecompiledType = UnknownType(), pointee: Optional[Constant] = None, tags: Optional[Tuple[Tag, ...]] = None, @@ -229,40 +229,8 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: return visitor.visit_constant(self) -class ExternConstant(Constant): - """Represents an external Constant. eg. stdout/stderr/stdin""" - - def __init__(self, value: Union[str, int, float], vartype: Type = UnknownType(), tags: Optional[Tuple[Tag, ...]] = None): - """Init a new constant expression""" - super().__init__(value, vartype, tags=tags) - - def __str__(self) -> str: - """Return a string as external symbols are already strings""" - return self.value - - def copy(self) -> ExternConstant: - """Generate an ExternConstant with the same value and type.""" - return ExternConstant(self.value, self._type.copy(), self.tags) - - -class ExternFunctionPointer(Constant): - """Represents an extern fixed function pointer.""" - - def __init__(self, value: Union[str, int, float], vartype: Type = UnknownType(), tags: Optional[Tuple[Tag, ...]] = None): - """Init a new function pointer expression""" - super().__init__(value, vartype, tags=tags) - - def __str__(self) -> str: - """Return a string as external symbols are already strings""" - return f"{self.value}" - - def copy(self) -> ExternFunctionPointer: - """Generate an ExternConstant with the same value and type.""" - return ExternFunctionPointer(self.value, self._type.copy(), self.tags) - - class NotUseableConstant(Constant): - """Represents a non useable constant like 'inf' or 'NaN' as a string""" + """Represents a non useable constant like 'inf', 'NaN', or the value of builtin functions as a string""" def __init__(self, value: str, tags: Optional[Tuple[Tag, ...]] = None): super().__init__(value, CustomType("double", 0), tags=tags) @@ -307,22 +275,6 @@ def copy(self) -> Symbol: return Symbol(self.name, self.value, self._type.copy(), self.tags) -class StringSymbol(Symbol): - """Represents a global string constant (const char[size]). Special chars should be escaped!""" - - def __str__(self): - """Return raw string.""" - return self._name - - def __repr__(self): - """Return the global string with its address.""" - return f"string {self.name} at {self.value}" - - def copy(self) -> StringSymbol: - """Generate an StringSymbol with the same name, value and type.""" - return StringSymbol(self.name, self.value, self._type.copy(), self.tags) - - class FunctionSymbol(Symbol): """Represents a function name""" @@ -427,31 +379,35 @@ class GlobalVariable(Variable): """Represents a global variable that comes from MLIL_CONST_PTR. MLIL_CONST_PTR represents the following types of pointers: - Pointers in .text/.bss/.rodata/.data/symbol table. - - Function call, and thereby function pointers.""" + - Function call, and thereby function pointers. + """ def __init__( self, name: str, - vartype: Type = UnknownType(), + vartype: Type, + initial_value: Expression, ssa_label: int = None, is_aliased: bool = True, ssa_name: Optional[Variable] = None, - initial_value: Union[float, int, str, GlobalVariable] = None, + is_constant: bool = False, tags: Optional[Tuple[Tag, ...]] = None, ): """Init a new global variable. Compared to Variable, it has an additional field initial_value. :param initial_value: Can be a number, string or GlobalVariable.""" super().__init__(name, vartype, ssa_label, is_aliased, ssa_name, tags=tags) self.initial_value = initial_value + self.is_constant = is_constant def copy( self, name: str = None, vartype: Type = None, + initial_value: Expression = None, ssa_label: int = None, is_aliased: bool = None, ssa_name: Optional[Variable] = None, - initial_value: Union[float, int, str, GlobalVariable] = None, + is_constant: bool = None, tags: Optional[Tuple[Tag, ...]] = None, ) -> GlobalVariable: """Provide a copy of the current Variable.""" @@ -459,24 +415,24 @@ def copy( return self.__class__( self._name[:] if name is None else name, self._type.copy() if vartype is None else vartype, + self.initial_value.copy() if initial_value is None else initial_value.copy(), self.ssa_label if ssa_label is None else ssa_label, self.is_aliased if is_aliased is None else is_aliased, self.ssa_name if ssa_name is None else ssa_name, - self._get_init_value_copy(initial_value), + self.is_constant if is_constant is None else is_constant, self.tags if tags is None else tags, ) + def __iter__(self) -> Iterator[Expression]: + yield self.initial_value + def __str__(self) -> str: """Return a string representation of the global variable.""" return f"{self._name}" if (label := self.ssa_label) is None else f"{self._name}#{label}" - def _get_init_value_copy(self, initial_value: Union[float, int, str, GlobalVariable] = None) -> Union[float, int, str, GlobalVariable]: - """When copying, the original global variable object is responsible for copying initial value properly, not the caller of copy.""" - if initial_value: - initial_value_copy = initial_value if not isinstance(initial_value, DataflowObject) else initial_value.copy() - else: - initial_value_copy = self.initial_value if not isinstance(self.initial_value, DataflowObject) else self.initial_value.copy() - return initial_value_copy + def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: + """Invoke the appropriate visitor for this Expression.""" + return visitor.visit_global_variable(self) class RegisterPair(Variable): @@ -540,3 +496,25 @@ def copy(self) -> RegisterPair: def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: """Invoke the appropriate visitor for this Expression.""" return visitor.visit_register_pair(self) + + +class ConstantComposition(Constant): + def __init__(self, value: list[Constant], vartype: DecompiledType = UnknownType(), tags: Optional[Tuple[Tag, ...]] = None): + super().__init__( + value, + vartype, + None, + tags, + ) + + def __str__(self) -> str: + """Return a string representation of the ConstantComposition""" + return "{" + ",".join([str(x) for x in self.value]) + "}" + + def copy(self) -> ConstantComposition: + """Generate a copy of the UnknownExpression with the same message.""" + return ConstantComposition([x.copy() for x in self.value], self._type.copy()) + + def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: + """Invoke the appropriate visitor for this Expression.""" + return visitor.visit_constant_composition(self) diff --git a/decompiler/structures/pseudo/typing.py b/decompiler/structures/pseudo/typing.py index f27086bcc..cb2d2404e 100644 --- a/decompiler/structures/pseudo/typing.py +++ b/decompiler/structures/pseudo/typing.py @@ -178,6 +178,28 @@ def copy(self, **kwargs) -> Pointer: return Pointer(self.type.copy(), self.size) +@dataclass(frozen=True, order=True) +class ArrayType(Type): + """Class representing arrays.""" + + type: Type + elements: int + + def __init__(self, basetype: Type, elements: int): + """Custom constructor to change the order of the parameters.""" + object.__setattr__(self, "type", basetype) + object.__setattr__(self, "size", basetype.size * elements) + object.__setattr__(self, "elements", elements) + + def __str__(self) -> str: + """Return a nice string representation.""" + return f"{self.type} [{self.elements}]" + + def copy(self, **kwargs) -> Pointer: + """Generate a copy of the current pointer.""" + return ArrayType(self.type.copy(), self.elements) + + @dataclass(frozen=True, order=True) class CustomType(Type): """Class representing a non-basic type.""" @@ -199,6 +221,16 @@ def void(cls) -> CustomType: """Return a void type representing a nil value.""" return cls("void", 0) + @classmethod + def wchar16(cls) -> CustomType: + """Return wchar type representing a 16-bit Unicode character""" + return cls("wchar16", 16) + + @classmethod + def wchar32(cls) -> CustomType: + """Return wchar type representing a 32-bit Unicode character""" + return cls("wchar32", 32) + def __str__(self) -> str: """Return the given string representation.""" return self.text diff --git a/decompiler/structures/visitors/ast_dataflowobjectvisitor.py b/decompiler/structures/visitors/ast_dataflowobjectvisitor.py index b4b1b9a20..e1d95b0bf 100644 --- a/decompiler/structures/visitors/ast_dataflowobjectvisitor.py +++ b/decompiler/structures/visitors/ast_dataflowobjectvisitor.py @@ -14,7 +14,14 @@ VirtualRootNode, ) from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree -from decompiler.structures.pseudo.expressions import Constant, DataflowObject, RegisterPair, UnknownExpression, Variable +from decompiler.structures.pseudo.expressions import ( + Constant, + ConstantComposition, + DataflowObject, + RegisterPair, + UnknownExpression, + Variable, +) from decompiler.structures.pseudo.instructions import Assignment, Break, Comment, Continue, GenericBranch, MemPhi, Phi, Return from decompiler.structures.pseudo.operations import BinaryOperation, Call, Condition, ListOperation, TernaryExpression, UnaryOperation from decompiler.structures.visitors.interfaces import ASTVisitorInterface, DataflowObjectVisitorInterface @@ -86,9 +93,15 @@ def visit_unknown_expression(self, expression: UnknownExpression): def visit_constant(self, expression: Constant): pass + def visit_constant_composition(self, expression: ConstantComposition): + pass + def visit_variable(self, expression: Variable): pass + def visit_global_variable(self, expression: Variable): + pass + def visit_register_pair(self, expression: RegisterPair): pass diff --git a/decompiler/structures/visitors/interfaces.py b/decompiler/structures/visitors/interfaces.py index dea82fe2f..6870c0135 100644 --- a/decompiler/structures/visitors/interfaces.py +++ b/decompiler/structures/visitors/interfaces.py @@ -70,10 +70,18 @@ def visit_unknown_expression(self, expr: expressions.UnknownExpression): def visit_constant(self, expr: expressions.Constant): """Visit a Constant.""" + @abstractmethod + def visit_constant_composition(self, expr: expressions.ConstantComposition): + """Visit a Constant.""" + @abstractmethod def visit_variable(self, expr: expressions.Variable): """Visit a Variable.""" + @abstractmethod + def visit_global_variable(self, expr: expressions.GlobalVariable): + """Visit a GlobalVariable.""" + @abstractmethod def visit_register_pair(self, expr: expressions.RegisterPair): """Visit a RegisterPair.""" diff --git a/decompiler/structures/visitors/substitute_visitor.py b/decompiler/structures/visitors/substitute_visitor.py index 1cd71fec5..2be82ec0a 100644 --- a/decompiler/structures/visitors/substitute_visitor.py +++ b/decompiler/structures/visitors/substitute_visitor.py @@ -8,10 +8,12 @@ Comment, Condition, Constant, + ConstantComposition, Continue, DataflowObject, Expression, GenericBranch, + GlobalVariable, ListOperation, MemPhi, Operation, @@ -106,9 +108,15 @@ def visit_unknown_expression(self, expr: UnknownExpression) -> Optional[Dataflow def visit_constant(self, expr: Constant) -> Optional[DataflowObject]: return self._mapper(expr) + def visit_constant_composition(self, expr: ConstantComposition): + pass + def visit_variable(self, expr: Variable) -> Optional[DataflowObject]: return self._mapper(expr) + def visit_global_variable(self, expr: GlobalVariable) -> Optional[DataflowObject]: + return self._mapper(expr) + def visit_register_pair(self, expr: RegisterPair) -> Optional[DataflowObject]: if (low_replacement := expr.low.accept(self)) is not None: expr._low = _assert_type(low_replacement, Variable) diff --git a/tests/backend/test_codegenerator.py b/tests/backend/test_codegenerator.py index 021160101..1002902d7 100644 --- a/tests/backend/test_codegenerator.py +++ b/tests/backend/test_codegenerator.py @@ -15,8 +15,6 @@ from decompiler.structures.pseudo.expressions import ( Constant, DataflowObject, - ExternConstant, - ExternFunctionPointer, FunctionSymbol, GlobalVariable, ImportedFunctionSymbol, @@ -33,7 +31,7 @@ OperationType, UnaryOperation, ) -from decompiler.structures.pseudo.typing import CustomType, Float, Integer, Pointer, Type +from decompiler.structures.pseudo.typing import CustomType, Float, Integer, Pointer, Type, UnknownType from decompiler.task import DecompilerTask from decompiler.util.options import Options @@ -1305,9 +1303,7 @@ class TestGlobalVisitor: @pytest.mark.parametrize( "op", [ - ListOperation([Variable("var_5"), GlobalVariable("test")]), - Call(ExternFunctionPointer("function_pointer_name"), [Constant(15), ExternConstant("boo")]), - BinaryOperation(OperationType.plus, [GlobalVariable("var_global"), ExternConstant("var_extern")]), + ListOperation([Variable("var_5"), GlobalVariable("test", UnknownType(), Constant(0))]), ], ) def test_operation(self, op): @@ -1321,22 +1317,3 @@ def test_operation(self, op): ) assert len(GlobalDeclarationGenerator.from_asts([ast])) != 0 - - def test_nested_global_variable(self): - """Ensure that GlobalVariableVisitor can visit global variables nested within a global variable""" - - var1 = ExternFunctionPointer("ExternFunction") - var2 = GlobalVariable("var_glob1", initial_value=var1) - var3 = GlobalVariable("var_glob2", initial_value=var2) - var4 = GlobalVariable("var_glob3", initial_value=var3) - - ast = AbstractSyntaxTree( - CodeNode( - [Assignment(var_a, var4)], - LogicCondition.initialize_true(LogicCondition.generate_new_context()), - ), - {}, - ) - - global_variables, _ = GlobalDeclarationGenerator._get_global_variables_and_constants([ast]) - assert len(global_variables) == 3 diff --git a/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py b/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py index 05fa37dd9..4fd26b615 100644 --- a/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py +++ b/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py @@ -7,10 +7,10 @@ from decompiler.pipeline.controlflowanalysis.restructuring import PatternIndependentRestructuring from decompiler.structures.ast.ast_nodes import CaseNode, CodeNode, ConditionNode, SeqNode, SwitchNode, WhileLoopNode from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, FalseCase, SwitchCase, TrueCase, UnconditionalEdge -from decompiler.structures.pseudo.expressions import Constant, Expression, FunctionSymbol, ImportedFunctionSymbol, StringSymbol, Variable +from decompiler.structures.pseudo.expressions import Constant, Expression, FunctionSymbol, GlobalVariable, ImportedFunctionSymbol, Variable from decompiler.structures.pseudo.instructions import Assignment, Branch, Break, Continue, IndirectBranch, Return from decompiler.structures.pseudo.operations import BinaryOperation, Call, Condition, ListOperation, OperationType, UnaryOperation -from decompiler.structures.pseudo.typing import CustomType, Integer, Pointer, Type, UnknownType +from decompiler.structures.pseudo.typing import ArrayType, CustomType, Integer, Pointer, Type, UnknownType from decompiler.task import DecompilerTask from decompiler.util.options import Options @@ -5530,7 +5530,7 @@ def test_nested_cases_unnecessary_condition_not_all_irrelevant_2(task): Assignment(var_1_15, var_2_1), Assignment(arg1_15, Constant(1, Integer.int32_t())), Assignment( - var_3_12, StringSymbol("The Input is 7 and you choose week number %d", 8949, Pointer(Integer.int32_t(), 32)) + var_3_12, GlobalVariable("g_str", ArrayType(Integer.char(), 44), "The Input is 7 and you choose week number %d", 0) ), ], ), @@ -5550,7 +5550,7 @@ def test_nested_cases_unnecessary_condition_not_all_irrelevant_2(task): [ Assignment(var_1_15, var_2_13), Assignment(arg1_15, var_2_13), - Assignment(var_3_12, StringSymbol("Monday", 8521, Pointer(Integer.int32_t(), 32))), + Assignment(var_3_12, GlobalVariable("g_str", ArrayType(Integer.char(), 6), "Monday", 0)), ], ), BasicBlock(19, [Assignment(ListOperation([]), print_call("common case", 13))]), diff --git a/tests/pipeline/dataflowanalysis/test_expression_propagation.py b/tests/pipeline/dataflowanalysis/test_expression_propagation.py index 4e4c08b12..c544d606c 100644 --- a/tests/pipeline/dataflowanalysis/test_expression_propagation.py +++ b/tests/pipeline/dataflowanalysis/test_expression_propagation.py @@ -402,7 +402,7 @@ def test_globals_not_propagated_1(): | y#0 = global_x#0 + 0x5 | +------------------------+ """ - global_var = GlobalVariable("global_x", ssa_label=0) + global_var = GlobalVariable("global_x", UnknownType(), Constant(0), ssa_label=0) y = Variable("y", ssa_label=0) instructions = [_assign(global_var, Constant(5)), _assign(y, _add(global_var, Constant(5)))] original = _assign(y, _add(global_var, Constant(5))) @@ -427,7 +427,7 @@ def test_globals_not_propagated_2(): | z#0 = y#0 | +------------------------+ """ - global_var = GlobalVariable("global_x", ssa_label=0) + global_var = GlobalVariable("global_x", UnknownType(), Constant(0), ssa_label=0) y = Variable("y", ssa_label=0) z = Variable("z", ssa_label=0) instructions = [_assign(y, _add(global_var, Constant(5))), _assign(z, y)] diff --git a/tests/pipeline/dataflowanalysis/test_expression_propagation_function_call.py b/tests/pipeline/dataflowanalysis/test_expression_propagation_function_call.py index c68f6c4be..fd19c9752 100644 --- a/tests/pipeline/dataflowanalysis/test_expression_propagation_function_call.py +++ b/tests/pipeline/dataflowanalysis/test_expression_propagation_function_call.py @@ -6,13 +6,13 @@ from decompiler.structures.pseudo.expressions import Constant, Expression, FunctionSymbol, GlobalVariable, Variable from decompiler.structures.pseudo.instructions import Assignment, Return from decompiler.structures.pseudo.operations import Call, ListOperation, OperationType, UnaryOperation -from decompiler.structures.pseudo.typing import Integer +from decompiler.structures.pseudo.typing import Integer, UnknownType from decompiler.task import DecompilerTask from decompiler.util.options import Options int32 = Integer.int32_t() int64 = Integer.int64_t() -global_x = GlobalVariable("global_x") +global_x = GlobalVariable("global_x", UnknownType(), Constant(0)) x = Variable("x") y = Variable("y") z = Variable("z") diff --git a/tests/pipeline/dataflowanalysis/test_expression_propagation_mem.py b/tests/pipeline/dataflowanalysis/test_expression_propagation_mem.py index f3cb69d44..f1afc6fe8 100644 --- a/tests/pipeline/dataflowanalysis/test_expression_propagation_mem.py +++ b/tests/pipeline/dataflowanalysis/test_expression_propagation_mem.py @@ -437,7 +437,7 @@ def test_not_propagating_globals(): def graphs_with_globals_dereference(): y = Variable("y", ssa_label=0) x = Variable("x", ssa_label=0) - ptr = GlobalVariable("ptr", vartype=Pointer(int32)) + ptr = GlobalVariable("ptr", vartype=Pointer(int32), initial_value=Constant(0x42)) in_n0 = BasicBlock(0, [_assign(x, _deref(_add(ptr, Constant(4)))), _assign(y, _add(x, Constant(4))), _ret(x)]) in_cfg = ControlFlowGraph() diff --git a/tests/pipeline/preprocessing/test_insert_missing_definition.py b/tests/pipeline/preprocessing/test_insert_missing_definition.py index 75452c7c3..f292f2985 100644 --- a/tests/pipeline/preprocessing/test_insert_missing_definition.py +++ b/tests/pipeline/preprocessing/test_insert_missing_definition.py @@ -1039,7 +1039,7 @@ def test_missing_definitions_for_global_variables_are_correct(): """ vars = [Variable("var", Integer.int32_t(), i) for i in range(10)] - globals = [GlobalVariable("g", Integer.int32_t(), ssa_label=i, initial_value=42) for i in range(10)] + globals = [GlobalVariable("g", Integer.int32_t(), ssa_label=i, initial_value=Constant(42)) for i in range(10)] instructions_0 = [ Assignment(ListOperation([]), Call(function_symbol("rand"), [], writes_memory=1)), Assignment(vars[0], globals[1]), @@ -1060,7 +1060,7 @@ def test_missing_definitions_for_global_variables_are_correct(): assert inserted_definition == expected_inserted_definition assert inserted_definition.writes_memory == 1 assert isinstance(inserted_definition.value, GlobalVariable) and isinstance(inserted_definition.destination, GlobalVariable) - assert inserted_definition.value.initial_value == inserted_definition.destination.initial_value == 42 + assert inserted_definition.value.initial_value == inserted_definition.destination.initial_value == Constant(42) def test_relation_and_assignment_insertion_after_memory_changing_instructions(): diff --git a/tests/pipeline/preprocessing/test_mem_phi_converter.py b/tests/pipeline/preprocessing/test_mem_phi_converter.py index a19a0ba83..41c953b6f 100644 --- a/tests/pipeline/preprocessing/test_mem_phi_converter.py +++ b/tests/pipeline/preprocessing/test_mem_phi_converter.py @@ -1163,7 +1163,7 @@ def cfg_with_single_global_variable(x) -> Tuple[ControlFlowGraph, ControlFlowGra """ cfg = ControlFlowGraph() mem0, mem1, mem2, mem3 = generate_mem_phi_variables(4) - g = [GlobalVariable("g", Integer.char(), i, initial_value=42) for i in range(4)] + g = [GlobalVariable("g", Integer.char(), initial_value=Constant(42), ssa_label=i) for i in range(4)] n1 = BasicBlock(1, [Assignment(x[0], x[1])]) n2 = BasicBlock(2, [MemPhi(mem1, [mem0, mem3]), Assignment(x[2], g[1])]) n3 = BasicBlock(3, []) diff --git a/tests/structures/pseudo/test_expressions.py b/tests/structures/pseudo/test_expressions.py index c86401cd6..506c2b7a9 100644 --- a/tests/structures/pseudo/test_expressions.py +++ b/tests/structures/pseudo/test_expressions.py @@ -5,8 +5,6 @@ from decompiler.structures.pseudo.complextypes import ComplexTypeMember, Enum from decompiler.structures.pseudo.expressions import ( Constant, - ExternConstant, - ExternFunctionPointer, FunctionSymbol, GlobalVariable, ImportedFunctionSymbol, @@ -86,41 +84,40 @@ def test_iter(self): class TestGlobalVariable: def test_initial_value(self): - assert GlobalVariable("var_1", initial_value=42).initial_value == 42 + assert GlobalVariable("var_1", UnknownType(), initial_value=Constant(42)).initial_value == Constant(42) def test_defaults(self): - global_var = GlobalVariable("var_1", Integer.char()) - assert global_var.initial_value is None + global_var = GlobalVariable("var_1", Integer.char(), Constant(42)) assert global_var.ssa_label is None assert global_var.is_aliased is True def test_copy(self): - original = GlobalVariable("var_1", Integer.char(), ssa_label=3, initial_value=42) + original = GlobalVariable("var_1", Integer.char(), ssa_label=3, initial_value=Constant(42)) copy = original.copy() assert isinstance(copy, GlobalVariable) assert id(original) != id(copy) and original == copy assert copy.type == Integer.char() assert copy.ssa_label == original.ssa_label == 3 - assert copy.initial_value == original.initial_value == 42 + assert copy.initial_value == original.initial_value == Constant(42) assert copy.is_aliased and original.is_aliased def test_copy_with_replacement(self): - original = GlobalVariable("var_1", Integer.char(), ssa_label=3, initial_value=42) + original = GlobalVariable("var_1", Integer.char(), ssa_label=3, initial_value=Constant(42)) copy = original.copy(ssa_label=4) assert isinstance(copy, GlobalVariable) assert id(original) != id(copy) and original != copy assert copy.type == Integer.char() assert copy.ssa_label == 4 - assert copy.initial_value == original.initial_value == 42 + assert copy.initial_value == original.initial_value == Constant(42) assert copy.is_aliased and original.is_aliased def test_initial_value_is_copied_correctly(self): - g1 = GlobalVariable("g1", Integer.char(), ssa_label=3, initial_value=42) + g1 = GlobalVariable("g1", Integer.char(), ssa_label=3, initial_value=Constant(42)) g1_copy = g1.copy() - assert g1_copy.initial_value == g1.initial_value == 42 - g1_copy_with_replacement = g1.copy(initial_value=84) - assert g1_copy_with_replacement.initial_value == 84 - some_glob = GlobalVariable("g", Integer.char()) + assert g1_copy.initial_value == g1.initial_value == Constant(42) + g1_copy_with_replacement = g1.copy(initial_value=Constant(84)) + assert g1_copy_with_replacement.initial_value == Constant(84) + some_glob = GlobalVariable("g", Integer.char(), Constant(32)) g2 = GlobalVariable("g2", Integer.char(), ssa_label=3, initial_value=some_glob) g2_copy = g2.copy() assert g2.initial_value == g2_copy.initial_value == some_glob @@ -226,22 +223,6 @@ def test_copy(self): assert id(original) != id(copy) and original == copy -class TestExternConstant: - def test_copy(self): - original = ExternConstant("v1") - copy = original.copy() - assert isinstance(copy, ExternConstant) - assert id(original) != id(copy) and original == copy - - -class TestExternFunctionPointer: - def test_copy(self): - original = ExternFunctionPointer("MyFunction") - copy = original.copy() - assert isinstance(copy, ExternFunctionPointer) - assert id(original) != id(copy) and original == copy - - class TestRegisterPair: reg_pair = RegisterPair(Variable("v1", i32, 3), Variable("v2", i32, 4), i64) diff --git a/tests/structures/pseudo/test_instructions.py b/tests/structures/pseudo/test_instructions.py index 236522ee4..b7562deae 100644 --- a/tests/structures/pseudo/test_instructions.py +++ b/tests/structures/pseudo/test_instructions.py @@ -471,12 +471,12 @@ def test_create_phi_for_variables(self): a6, a3, a4, a5 = (Variable(a.name, a.type, i, is_aliased=True) for i in ssa_labels) b6, b3, b4, b5 = (Variable(b.name, b.type, i, is_aliased=True) for i in ssa_labels) c6, c3, c4, c5 = (Variable(c.name, c.type, i, is_aliased=True) for i in ssa_labels) - g6, g3, g4, g5 = (GlobalVariable("g", Integer.char(), i, initial_value=42) for i in ssa_labels) + g6, g3, g4, g5 = (GlobalVariable("g", Integer.char(), initial_value=Constant(42), ssa_label=i) for i in ssa_labels) g6_loc, g3_loc, g4_loc, g5_loc = (Variable("g", Integer.char(), is_aliased=True) for i in ssa_labels) phi_a = Phi(a6, [a3, a4, a5]) phi_b = Phi(b6, [b3, b4, b5]) phi_c = Phi(c6, [c3, c4, c5]) - g = GlobalVariable("g", Integer.char(), initial_value=42) + g = GlobalVariable("g", Integer.char(), initial_value=Constant(42)) phi_g = Phi(g6, [g3, g4, g5]) phi_g_loc = Phi(g6_loc, [g3_loc, g4_loc, g5_loc]) assert set(phis) == {phi_a, phi_b, phi_c} diff --git a/tests/test_sample_binaries.py b/tests/test_sample_binaries.py index 3b6c84e7b..8e64e6611 100644 --- a/tests/test_sample_binaries.py +++ b/tests/test_sample_binaries.py @@ -37,221 +37,6 @@ def test_var_decls(): assert output.count("int arg1") == 1 -def test_global_table(): - """Test that global tables appear as bytes.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_table"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Make sure the global variable table.xxx is generated - assert output.count("extern void * table") == 1 - # Make sure the contents of this table variable are bytes - assert output.count("\\\\x20\\\\x14\\\\x13\\\\x63\\\\x63") == 1 - # Make sure that table is referenced by its name, not by address - assert output.count("&table") == 0 - assert output.count("table") > 1 - - -def test_global_indirect_ptrs(): - """Test that indirect pointers in globals are dereferenced correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_indirect_ptrs"] - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - assert output.count("g_3 = ") == 1 - assert output.count("g_2 = &(g_3)") == 1 - - -def test_global_addr(): - """Test that global variables are lifted correctly + address operator working""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_addr_add"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global variables correct - assert output.count("a = 0x0") == 1 - assert output.count("b = 0x0") == 1 - # Asssert call correct; function signatur: int _add(int*, int*) - assert output.count("_add(&a, &b") == 1 - - -def test_global_ptr(): - """Test that global pointers are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_ptr_add"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global pointer correct - assert output.count("c = 0x0") == 1 - assert output.count("d = 0x0") == 1 - # Assert call correct - len(re.findall("var_[0-9]+= d", output)) == 1 - len(re.findall("var_[0-9]+= c", output)) == 1 - len(re.findall("_add(var_[0-9]+, var_[0-9]+)", output)) == 1 - - -def test_global_ptr_addr(): - """Test that global pointer and variables are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_addr_ptr_add"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global pointer correct - assert output.count("e = 0x17") == 1 - assert output.count("f = 0x42") == 1 - assert output.count("h = 0x0") == 1 - assert output.count("unsigned int * g = &(e)") == 1 - # Assert call correct - len(re.findall("h = &f", output)) == 1 - len(re.findall("var_[0-9]+= h", output)) == 1 - len(re.findall("var_[0-9]+= g", output)) == 1 - len(re.findall("_add(var_[0-9]+, var_[0-9]+)", output)) == 1 - - -def test_global_struct(): - """Test that global structs are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_add_struct"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global pointer correct - assert output.count("void * i") == 1 - # Assert call correct - len(re.findall("add_struct(i)", output)) == 1 - - -def test_global_strings(): - """Test that global strings are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_strings"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global pointer correct - assert output.count('char * j = "Hello Decompiler!"') == 1 - assert output.count('char * k = "Hello Void*!"') == 1 - # Assert call correct - assert output.count("Hello World!") == 1 - - -def test_global_fkt_ptr(): - """Test that global function pointers are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_fkt_ptr"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global variables correct - assert output.count("a = 0x0") == 1 - assert output.count("b = 0x0") == 1 - assert output.count("l = 0x0") == 1 - # Assert call correct - len(re.findall("var_[0-9]+(&a, &b, &a)", output)) == 1 - - -def test_global_indirect_ptr2(): - """Test that global indirect pointers are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_indirect_ptrs2"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global variables correct - assert output.count("p = 0xffffffbe") == 2 # should be one, still one lifter issue - assert output.count("o = &(p)") == 1 - assert output.count("n = &(o)") == 1 - assert output.count("m = &(n)") == 1 - # Assert call correct - len(re.findall("var_[0-9]+ = m", output)) == 1 - len(re.findall("_add(\*\*var_[0-9]+, &p)", output)) == 1 - - -def test_global_recursive_ptr(): - """Test that global recursiv pointers are lifted correctly.""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_recursive_ptr"] - - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # Assert global variables correct - assert output.count("void * q = q") == 1 - # Assert call correct - len(re.findall("var_[0-9]+ = q", output)) == 2 - len(re.findall("_add(var_[0-9]+, var_[0-9]+)", output)) == 1 - - -def test_global_import_address_symbol(): - """Test that ImportAddressSymbols from Binja gets displayed correctly.""" - base_args = ["python", "decompile.py", "tests/samples/others/app1.so"] - args1 = base_args + ["test_case"] - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - # TODO add tests for " = &g_x" after solving issue with deleting stack strings/arrays - # since for the moment we delete all variables storing stack string components, - # e.g. var_e0#1 = &g_22 - - # test occurences of global variables in decompiled code - # test ptr versions (_got to _data) - assert output.count("g_22 = &(g_22)") == 1 - assert output.count("g_26 = &(g_26)") == 1 - assert output.count("g_29 = &(g_29)") == 1 - assert output.count("g_30 = &(g_30)") == 1 - assert output.count("g_32 = &(g_32)") == 1 - assert output.count("g_35 = &(g_35)") == 1 - assert output.count("g_38 = &(g_38)") == 1 - - # test _data values - assert output.count("g_22 = 0xd3e9") == 1 - assert output.count("g_26 = 0x9d") == 1 - assert output.count("g_29 = 0x10001") == 1 - assert output.count("g_30 = 0xec") == 1 - assert output.count("g_32 = 0x5e13cd4f") == 1 - assert output.count("g_35 = 0xff") == 1 - assert output.count("g_38 = 0x7cb0be9") == 1 - - # test types and initial values (dec or hex) are correct in declarations - assert re.search(r"unsigned short\s*g_22\s*=\s*54249", output) or re.search(r"unsigned short\s*g_22\s*=\s*0xd3e9", output) - assert re.search(r"unsigned char\s*g_26\s*=\s*157", output) or re.search(r"unsigned char\s*g_26\s*=\s*0x9d", output) - assert re.search(r"unsigned int\s*g_29\s*=\s*65537", output) or re.search(r"unsigned int\s*g_29\s*=\s*0x10001", output) - assert re.search(r"unsigned char\s*g_30\s*=\s*236", output) or re.search(r"unsigned char\s*g_30\s*=\s*0xec", output) - assert re.search(r"unsigned int\s*g_32\s*=\s*1578356047", output) or re.search(r"unsigned int\s*g_32\s*=\s*0x5e13cd4f", output) - assert re.search(r"unsigned char\s*g_35\s*=\s*255", output) or re.search(r"unsigned char\s*g_35\s*=\s*0xff", output) - assert re.search(r"unsigned int\s*g_38\s*=\s*130747369", output) or re.search(r"unsigned int\s*g_38\s*=\s*0x7cb0be9", output) - - -def test_string_with_pointer_compare(): - """Test that if(ptr == "String") works with logic engine""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_string_compare"] - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - assert output.count("Hello Decompiler") == 1 # it's enough to test if the output has the string. Would crash if not possible in if - - -def test_w_char(): - """Test that w_char strings are correctly found and propagated""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_w_char"] - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - assert output.count('L"Hello wchar32_t string"') == 1 - assert output.count('printf(/* format */ "%ls\\\\n", L"Inlined wchar32_t string")') == 1 - - -def test_string_length(): - """Test that long strings/bytes are cut after a certain number of chars""" - base_args = ["python", "decompile.py", "tests/samples/bin/systemtests/64/0/globals"] - args1 = base_args + ["global_string_length"] - output = str(subprocess.run(args1, check=True, capture_output=True).stdout) - - assert output.count("...") == 2 - - def test_tailcall_display(): """Test that we display tailcalls correctly.""" args = ["python", "decompile.py", "tests/coreutils/binaries/sha224sum", "rpl_fseeko"]