From b2726933b81e0b77b10c2ac81ed6e761dcf9b01c Mon Sep 17 00:00:00 2001 From: Eva-Maria Behner Date: Fri, 8 Nov 2024 11:28:47 +0100 Subject: [PATCH] more updates go rust --- .../binaryninja/handlers/constants.py | 5 +- .../binaryninja/rust_string_detection.py | 2 +- .../bitfieldcomparisonunrolling.py | 8 +- .../preprocessing/remove_go_prologue.py | 66 ++++----------- .../remove_noreturn_boilerplate.py | 49 ++--------- .../preprocessing/remove_stack_canary.py | 6 +- decompiler/pipeline/preprocessing/util.py | 83 +++++++++++++------ decompiler/structures/pseudo/expressions.py | 4 - decompiler/structures/pseudo/operations.py | 4 - decompiler/util/default.json | 4 +- 10 files changed, 92 insertions(+), 139 deletions(-) diff --git a/decompiler/frontend/binaryninja/handlers/constants.py b/decompiler/frontend/binaryninja/handlers/constants.py index e2a77309a..fc270a0c9 100644 --- a/decompiler/frontend/binaryninja/handlers/constants.py +++ b/decompiler/frontend/binaryninja/handlers/constants.py @@ -8,14 +8,13 @@ from decompiler.frontend.lifter import Handler from decompiler.structures.pseudo import ( Constant, - CustomType, + FunctionSymbol, GlobalVariable, Integer, NotUseableConstant, OperationType, - Pointer, Symbol, - UnaryOperation, FunctionSymbol, + UnaryOperation, ) BYTE_SIZE = 8 diff --git a/decompiler/frontend/binaryninja/rust_string_detection.py b/decompiler/frontend/binaryninja/rust_string_detection.py index 57b54813f..ea2bf99f1 100644 --- a/decompiler/frontend/binaryninja/rust_string_detection.py +++ b/decompiler/frontend/binaryninja/rust_string_detection.py @@ -22,7 +22,7 @@ def __init__(self, binary_view: BinaryView, options: Options): self._string_slicer_path = options.getstring("rust-string-detection.string_slicer_path", fallback="") self._debug_submodules = options.getboolean("logging.debug-submodules", fallback=False) - def is_rust_binary(self): + def is_rust_binary(self) -> bool: """ Simple heurstic to determine, whether the binary is a Rust binary. diff --git a/decompiler/pipeline/expressions/bitfieldcomparisonunrolling.py b/decompiler/pipeline/expressions/bitfieldcomparisonunrolling.py index bb7326d45..d311e0184 100644 --- a/decompiler/pipeline/expressions/bitfieldcomparisonunrolling.py +++ b/decompiler/pipeline/expressions/bitfieldcomparisonunrolling.py @@ -103,13 +103,13 @@ def _get_folded_case(self, block: BasicBlock) -> Optional[FoldedCase]: if not isinstance(branch_instruction := block[-1], Branch): return None match branch_instruction.condition: - case Condition(OperationType.equal, subexpr, Constant(value=0x0)): + case Condition(operation=OperationType.equal, left=subexpr, right=Constant(value=0x0)): edge_type_to_case_node = FalseCase - case Condition(OperationType.not_equal, subexpr, Constant(value=0x0)): + case Condition(operation=OperationType.not_equal, left=subexpr, right=Constant(value=0x0)): edge_type_to_case_node = TrueCase - case Condition(OperationType.equal, Constant(value=0x0), subexpr): + case Condition(operation=OperationType.equal, left=Constant(value=0x0), right=subexpr): edge_type_to_case_node = FalseCase - case Condition(OperationType.not_equal, Constant(value=0x0), subexpr): + case Condition(operation=OperationType.not_equal, left=Constant(value=0x0), right=subexpr): edge_type_to_case_node = TrueCase case _: return None diff --git a/decompiler/pipeline/preprocessing/remove_go_prologue.py b/decompiler/pipeline/preprocessing/remove_go_prologue.py index 83e09289f..c0227324c 100644 --- a/decompiler/pipeline/preprocessing/remove_go_prologue.py +++ b/decompiler/pipeline/preprocessing/remove_go_prologue.py @@ -1,16 +1,15 @@ """Module for removing go idioms""" import logging -from typing import Optional, Tuple +from typing import Optional, Set, Tuple -from decompiler.pipeline.preprocessing.util import _unused_addresses, match_expression +from decompiler.pipeline.preprocessing.util import get_constant_condition, is_noreturn_node, match_expression from decompiler.pipeline.stage import PipelineStage from decompiler.structures.graphs.basicblock import BasicBlock from decompiler.structures.graphs.branches import ConditionalEdge, FalseCase, TrueCase, UnconditionalEdge -from decompiler.structures.pseudo.expressions import Constant, Variable +from decompiler.structures.pseudo.expressions import Variable from decompiler.structures.pseudo.instructions import Assignment, Branch, Comment, Phi -from decompiler.structures.pseudo.operations import Call, Condition, OperationType, UnaryOperation -from decompiler.structures.pseudo.typing import Integer +from decompiler.structures.pseudo.operations import Call, OperationType, UnaryOperation from decompiler.task import DecompilerTask @@ -32,7 +31,7 @@ def run(self, task: DecompilerTask): else: logging.info("No Go function prologue found") - def _get_r14_name(self, task: DecompilerTask): + def _get_r14_name(self, task: DecompilerTask) -> str | None: """ Returns the variable name of the parameter stored in r14, e.g. 'arg1'. If no such parameter exists, None is returned. @@ -46,7 +45,7 @@ def _get_r14_name(self, task: DecompilerTask): return None return task.function_parameters[r14_parameter_index].name - def _is_root_single_indirect_successor(self, node: BasicBlock): + def _is_root_single_indirect_successor(self, node: BasicBlock) -> bool: """ Helper function used to verify the graph structure. @@ -67,7 +66,7 @@ def _is_root_single_indirect_successor(self, node: BasicBlock): return False - def _find_morestack_node_in_loop(self, node: BasicBlock): + def _find_morestack_node_in_loop(self, node: BasicBlock) -> BasicBlock: """ Helper function used to verify the graph structure. @@ -139,7 +138,7 @@ def _verify_graph_structure_loopless(self) -> Optional[Tuple[BasicBlock, BasicBl return start_node, morestack_node - def _find_morestack_node_loopless(self, node, visited): + def _find_morestack_node_loopless(self, node: BasicBlock, visited: Set[BasicBlock]) -> BasicBlock | None: """ Helper function used to verify the graph structure. @@ -162,28 +161,11 @@ def _find_morestack_node_loopless(self, node, visited): return None # zero successors, check for no return - if self._is_noreturn_node(node): + if is_noreturn_node(node): return node return None - def _get_called_functions(self, instructions): - """ - Helper method to iterate over all called functions in a list of instructions. - """ - for instruction in instructions: - if isinstance(instruction, Assignment) and isinstance(instruction.value, Call): - yield instruction.value.function - - def _is_noreturn_node(self, node: BasicBlock) -> bool: - """ - Helper method to check if `node` contains just one call to a non-returning function. - """ - called_functions = list(self._get_called_functions(node.instructions)) - if len(called_functions) != 1: - return False - return called_functions[0].can_return == False - def _verify_graph_structure_loop(self) -> Optional[Tuple[BasicBlock, BasicBlock]]: """ Verify the graph structure. This method returns morestack_node and start_node if graph structure matches go prologue, otherwise None. @@ -214,7 +196,6 @@ def _verify_graph_structure_loop(self) -> Optional[Tuple[BasicBlock, BasicBlock] morestack_node = None start_node = None for successor in successors: - # if root in self._cfg.get_successors(successor): if self._is_root_single_indirect_successor(successor): morestack_node = self._find_morestack_node_in_loop(successor) else: @@ -231,7 +212,7 @@ def _verify_graph_structure_loop(self) -> Optional[Tuple[BasicBlock, BasicBlock] return start_node, morestack_node - def _match_r14(self, variable: Variable): + def _match_r14(self, variable: Variable) -> bool: """ This method is used to check if `variable` corresponds to r14 which has a special meaning in Go prologues. @@ -266,7 +247,7 @@ def _check_root_node(self) -> bool: # check if rhs of condition compares an address (e.g. of __return_addr) left_expression = root_node_if.condition.left match left_expression: - case UnaryOperation(OperationType.address): + case UnaryOperation(operation=OperationType.address): pass case _: return False @@ -352,24 +333,22 @@ def _remove_go_prologue(self, start_node: BasicBlock, morestack_node: BasicBlock # This should never happen raise ValueError("If condition with broken out edges") - root_node_if.substitute(root_node_if.condition, self._get_constant_condition(new_condition)) + root_node_if.substitute(root_node_if.condition, get_constant_condition(new_condition)) # Handle incoming edges to morestack_node from non-returning functions # We can't simply delete edges without causing problems to Phi functions. - # Therefore we replace the unconditional edge with a conditional one. + # Therefore, we replace the unconditional edge with a conditional one. # The added condition at the end of the block makes sure the edge is never taken. # A conditional edge to a newly created "return_node" is added as well. # The return_node does nothing. # After dead code elmination, this will just have the effect of deleting the edge. - return_node = BasicBlock(_unused_addresses(cfg=self._cfg, amount=1)[0], [], self._cfg) - self._cfg.add_node(return_node) + return_node = self._cfg.create_block() unconditional_in_edges = [edge for edge in self._cfg.get_in_edges(morestack_node) if isinstance(edge, UnconditionalEdge)] for edge in unconditional_in_edges: - # edge.source.add_instruction(Return([])) self._cfg.remove_edge(edge) self._cfg.add_edge(FalseCase(edge.source, edge.sink)) self._cfg.add_edge(TrueCase(edge.source, return_node)) - condition = self._get_constant_condition(True) + condition = get_constant_condition(True) edge.source.add_instruction(Branch(condition)) if unconditional_in_edges: @@ -383,20 +362,7 @@ def _remove_go_prologue(self, start_node: BasicBlock, morestack_node: BasicBlock logging.info(comment_string) - def _get_constant_condition(self, value: bool): - """ - Helper method creating a Pseudo condition that always evaluates to `True` or `False`, depending on `value`. - """ - int_value = 1 if value else 0 - return Condition( - OperationType.equal, - [ - Constant(1, Integer.int32_t()), - Constant(int_value, Integer.int32_t()), - ], - ) - - def _check_and_remove_go_prologue(self): + def _check_and_remove_go_prologue(self) -> bool: """ Detect and remove the typical go function prologue diff --git a/decompiler/pipeline/preprocessing/remove_noreturn_boilerplate.py b/decompiler/pipeline/preprocessing/remove_noreturn_boilerplate.py index 0e2e087bf..1d92eaaea 100644 --- a/decompiler/pipeline/preprocessing/remove_noreturn_boilerplate.py +++ b/decompiler/pipeline/preprocessing/remove_noreturn_boilerplate.py @@ -2,15 +2,12 @@ from typing import Iterator, List -from decompiler.pipeline.preprocessing.util import _unused_addresses +from decompiler.pipeline.preprocessing.util import get_constant_condition, is_noreturn_node from decompiler.pipeline.stage import PipelineStage from decompiler.structures.graphs.basicblock import BasicBlock from decompiler.structures.graphs.branches import ConditionalEdge, FalseCase, TrueCase from decompiler.structures.graphs.rootedgraph import RootedGraph -from decompiler.structures.pseudo.expressions import Constant -from decompiler.structures.pseudo.instructions import Assignment, Branch, Comment -from decompiler.structures.pseudo.operations import Call, Condition, OperationType -from decompiler.structures.pseudo.typing import Integer +from decompiler.structures.pseudo.instructions import Branch, Comment from decompiler.task import DecompilerTask from networkx import MultiDiGraph, dominance_frontiers @@ -28,32 +25,15 @@ def run(self, task: DecompilerTask): self._cfg = task.graph self._aggressive_removal_postdominators_merged_sinks() - def _get_called_functions(self, instructions): - """ - Helper method to iterate over all called functions in a list of instructions. - """ - for instruction in instructions: - if isinstance(instruction, Assignment) and isinstance(instruction.value, Call): - yield instruction.value.function - def _get_noreturn_nodes(self) -> Iterator[BasicBlock]: """ Iterate leaf nodes of cfg, yield nodes containing a call to a non-returning funtion. """ leaf_nodes = [x for x in self._cfg.nodes if self._cfg.out_degree(x) == 0] for node in leaf_nodes: - if self._is_noreturn_node(node): + if is_noreturn_node(node): yield node - def _is_noreturn_node(self, node: BasicBlock) -> bool: - """ - Check if node contains call to a non-returning function. - """ - called_functions = list(self._get_called_functions(node.instructions)) - if len(called_functions) != 1: - return False - return called_functions[0].can_return == False - def _patch_condition_edges(self, edges: List[ConditionalEdge]) -> None: """ This method removes whatever was detected to be boilerplate. @@ -63,9 +43,9 @@ def _patch_condition_edges(self, edges: List[ConditionalEdge]) -> None: for edge in edges: match edge: case TrueCase(): - condition = self._get_constant_condition(False) + condition = get_constant_condition(False) case FalseCase(): - condition = self._get_constant_condition(True) + condition = get_constant_condition(True) case _: continue instructions = edge.source.instructions @@ -74,19 +54,6 @@ def _patch_condition_edges(self, edges: List[ConditionalEdge]) -> None: instructions.append(Comment("Removed potential boilerplate code")) instructions.append(Branch(condition)) - def _get_constant_condition(self, value: bool): - """ - Helper method creating a Pseudo condition that always evaluates to `True` or `False`, depending on `value`. - """ - int_value = 1 if value else 0 - return Condition( - OperationType.equal, - [ - Constant(1, Integer.int32_t()), - Constant(int_value, Integer.int32_t()), - ], - ) - def _aggressive_removal_postdominators_merged_sinks(self): """ Finds and removes boilerplate code, using the heuristic described below @@ -107,9 +74,9 @@ def _aggressive_removal_postdominators_merged_sinks(self): returning_leaf_nodes = [node for node in leaf_nodes if node not in noreturn_nodes] # create a virtual merged end node (for post dominance calculation) # additionally we create another virtual node so that different non-returning nodes are 'merged' - unused_addresses = _unused_addresses(cfg=self._cfg, amount=2) - virtual_end_node = BasicBlock(address=unused_addresses[0]) - virtual_merged_noreturn_node = BasicBlock(address=unused_addresses[1]) + min_used_address = min(block.address for block in self._cfg) + virtual_end_node = BasicBlock(min_used_address - 1) + virtual_merged_noreturn_node = BasicBlock(min_used_address - 2) # reverse CFG and add virtual nodes to it reversed_cfg_view: MultiDiGraph = self._cfg._graph.reverse(copy=False) reversed_cfg_shallow_copy = MultiDiGraph(reversed_cfg_view) diff --git a/decompiler/pipeline/preprocessing/remove_stack_canary.py b/decompiler/pipeline/preprocessing/remove_stack_canary.py index 037257146..700295f76 100644 --- a/decompiler/pipeline/preprocessing/remove_stack_canary.py +++ b/decompiler/pipeline/preprocessing/remove_stack_canary.py @@ -5,9 +5,9 @@ from decompiler.pipeline.preprocessing.util import match_expression from decompiler.pipeline.stage import PipelineStage from decompiler.structures.graphs.branches import BasicBlockEdgeCondition -from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph, UnconditionalEdge -from decompiler.structures.pseudo.instructions import Assignment, Branch -from decompiler.structures.pseudo.operations import Call, OperationType +from decompiler.structures.graphs.cfg import BasicBlock, UnconditionalEdge +from decompiler.structures.pseudo.instructions import Branch +from decompiler.structures.pseudo.operations import OperationType from decompiler.task import DecompilerTask diff --git a/decompiler/pipeline/preprocessing/util.py b/decompiler/pipeline/preprocessing/util.py index a068f5928..4ef4bd84d 100644 --- a/decompiler/pipeline/preprocessing/util.py +++ b/decompiler/pipeline/preprocessing/util.py @@ -1,13 +1,14 @@ """Helper functions for modules in the preprocessing pipeline.""" from collections import defaultdict -from typing import Callable, DefaultDict, Dict, List, Optional, Set, Tuple +from typing import Callable, DefaultDict, Dict, Iterator, List, Optional, Set, Tuple from decompiler.structures.graphs.cfg import BasicBlock, ControlFlowGraph from decompiler.structures.maps import DefMap, UseMap +from decompiler.structures.pseudo import Integer from decompiler.structures.pseudo.expressions import Constant, Expression, Variable -from decompiler.structures.pseudo.instructions import Assignment -from decompiler.structures.pseudo.operations import BinaryOperation, OperationType, UnaryOperation +from decompiler.structures.pseudo.instructions import Assignment, Instruction +from decompiler.structures.pseudo.operations import BinaryOperation, Call, Condition, OperationType, UnaryOperation def _init_maps(cfg: ControlFlowGraph) -> Tuple[DefMap, UseMap]: @@ -26,22 +27,6 @@ def _init_maps(cfg: ControlFlowGraph) -> Tuple[DefMap, UseMap]: return def_map, use_map -def _unused_addresses(cfg: ControlFlowGraph, amount: int = 1) -> List[int]: - """Returns a list with the specified amount of addresses, which are not used by any block of the given CFG.""" - used_addresses = {c.address for c in cfg.nodes} - address = -1 - - addresses = list() - - for _ in range(amount): - while address in used_addresses: - address -= 1 - used_addresses.add(address) - addresses.append(address) - - return addresses - - def _init_basicblocks_of_definition(cfg: ControlFlowGraph) -> Dict[Variable, BasicBlock]: """ We compute for each variable the basic blocks where it is defined. This must be unique, since we are in SSA-Form @@ -78,8 +63,10 @@ def _init_basicblocks_usages_variable(cfg: ControlFlowGraph) -> DefaultDict[Vari def _get_last_definition(node: BasicBlock, var: Variable, max_instr_num: int) -> Optional[Tuple[int, Expression]]: - """This helper method finds a variable's last definition within a Block. Only instructions up to `max_instr_num` are considered. - It returns the instructions position and the assigned value if a definition exists and none otherwise.""" + """ + This helper method finds a variable's last definition within a Block. Only instructions up to `max_instr_num` are considered. + It returns the instructions position and the assigned value if a definition exists and none otherwise. + """ for index in reversed(range(max_instr_num + 1)): instruction = node.instructions[index] if isinstance(instruction, Assignment) and instruction.destination == var: @@ -87,8 +74,11 @@ def _get_last_definition(node: BasicBlock, var: Variable, max_instr_num: int) -> return None -def match_expression(node: BasicBlock, expression: Expression, pattern, instr_num=None): - """This function checks whether the given `expression` matches the specified `pattern`. +def match_expression( + node: BasicBlock, expression: Expression, pattern: Tuple | Callable[[Variable], bool] | str, instr_num: int | None = None +) -> bool: + """ + This function checks whether the given `expression` matches the specified `pattern`. The function uses recursion to check whether the provided `expression` matches the given `pattern`. It also considers the instructions defined earlier in the provided `node` (a `BasicBlock`) to resolve variable definitions. @@ -122,10 +112,12 @@ def match_expression(node: BasicBlock, expression: Expression, pattern, instr_nu - It also handles simple dereferences when the offset is zero. - The function returns `False` if no match is found according to the above rules. """ - if not isinstance(pattern, tuple): + if not isinstance(pattern, Tuple): if isinstance(pattern, Callable): + assert isinstance(expression, Variable), "The callable must get a Variable as Input." return pattern(expression) else: + # pattern is a sting in this case return isinstance(expression, Variable) and expression.name == pattern if instr_num is None: @@ -139,13 +131,50 @@ def match_expression(node: BasicBlock, expression: Expression, pattern, instr_nu definition_instruction_num, defined_value = last_def # important: dont use inner_pattern here return match_expression(node, defined_value, pattern, definition_instruction_num) - case UnaryOperation(OperationType.dereference, BinaryOperation(OperationType.plus, inner_expression, Constant(value=deref_offset))): + case UnaryOperation( + operation=OperationType.dereference, + operand=BinaryOperation(operation=OperationType.plus, left=inner_expression, right=Constant(value=deref_offset)), + ): return match_expression(node, inner_expression, inner_pattern, instr_num) case UnaryOperation( - OperationType.dereference, BinaryOperation(OperationType.minus, inner_expression, Constant(value=neg_deref_offset)) + operation=OperationType.dereference, + operand=BinaryOperation(operation=OperationType.minus, left=inner_expression, right=Constant(value=neg_deref_offset)), ): return match_expression(node, inner_expression, inner_pattern, instr_num) - case UnaryOperation(OperationType.dereference, inner_expression) if deref_offset == 0: + case UnaryOperation(operation=OperationType.dereference, operand=inner_expression) if deref_offset == 0: return match_expression(node, inner_expression, inner_pattern, instr_num) return False + + +def _get_called_functions(instructions: List[Instruction]) -> Iterator[Expression]: + """ + Helper method to iterate over all called functions in a list of instructions. + """ + for instruction in instructions: + if isinstance(instruction, Assignment) and isinstance(instruction.value, Call): + yield instruction.value.function + + +def is_noreturn_node(node: BasicBlock) -> bool: + """ + Helper method to check if `node` contains just one call to a non-returning function. + """ + called_functions = list(_get_called_functions(node.instructions)) + if len(called_functions) != 1: + return False + return called_functions[0].can_return == False + + +def get_constant_condition(value: bool) -> Condition: + """ + Helper method creating a Pseudo condition that always evaluates to `True` or `False`, depending on `value`. + """ + int_value = 1 if value else 0 + return Condition( + OperationType.equal, + [ + Constant(1, Integer.int32_t()), + Constant(int_value, Integer.int32_t()), + ], + ) diff --git a/decompiler/structures/pseudo/expressions.py b/decompiler/structures/pseudo/expressions.py index ffc14a1b6..4db1389df 100644 --- a/decompiler/structures/pseudo/expressions.py +++ b/decompiler/structures/pseudo/expressions.py @@ -164,8 +164,6 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: class Constant(Expression[DecompiledType]): """Represents a constant expression type.""" - __match_args__ = ("value", "vartype") - def __init__( self, value: Union[int, float, str, bytes], @@ -348,8 +346,6 @@ def copy(self) -> IntrinsicSymbol: class Variable(Expression[DecompiledType]): """Represents a variable based expression.""" - __match_args__ = ("name", "vartype") - def __init__( self, name: str, diff --git a/decompiler/structures/pseudo/operations.py b/decompiler/structures/pseudo/operations.py index e6c66c0b1..fd6c6c4ff 100644 --- a/decompiler/structures/pseudo/operations.py +++ b/decompiler/structures/pseudo/operations.py @@ -329,8 +329,6 @@ def substitute(self, replacee: Variable, replacement: Variable): class UnaryOperation(Operation): """Represents an expression with a single operand.""" - __match_args__ = ("operation", "operand") - def __init__( self, operation: OperationType, @@ -475,8 +473,6 @@ def is_write_access(self) -> bool: class BinaryOperation(Operation): """Class representing operations with two operands.""" - __match_args__ = ("operation", "left", "right") - def __eq__(self, __value): return isinstance(__value, BinaryOperation) and super().__eq__(__value) diff --git a/decompiler/util/default.json b/decompiler/util/default.json index d33048622..5329c87de 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -111,7 +111,7 @@ }, { "dest": "remove-go-prologue.remove_prologue", - "default": true, + "default": false, "title": "Remove Go function prologues", "type": "boolean", "description": "remove go funcion prologues", @@ -121,7 +121,7 @@ }, { "dest": "remove-noreturn-boilerplate.remove_noreturn_boilerplate", - "default": true, + "default": false, "title": "Generic no-return boilerplate removal", "type": "boolean", "description": "remove boilerplate leading to non-returning functions",