From 1b328e4e2c294037343c5f4a811c16d68579a6b6 Mon Sep 17 00:00:00 2001 From: Eva-Maria Behner Date: Wed, 24 Apr 2024 15:55:42 +0200 Subject: [PATCH] modify to always pair two --- .../condition_based_refinement.py | 147 +++++++++++------- decompiler/structures/ast/syntaxforest.py | 35 ++--- decompiler/structures/ast/syntaxgraph.py | 3 +- .../test_condition_aware_refinement.py | 8 +- 4 files changed, 115 insertions(+), 78 deletions(-) diff --git a/decompiler/pipeline/controlflowanalysis/restructuring_commons/condition_based_refinement.py b/decompiler/pipeline/controlflowanalysis/restructuring_commons/condition_based_refinement.py index e5e645bf6..b5a0b016e 100644 --- a/decompiler/pipeline/controlflowanalysis/restructuring_commons/condition_based_refinement.py +++ b/decompiler/pipeline/controlflowanalysis/restructuring_commons/condition_based_refinement.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from itertools import chain, combinations -from typing import Dict, Iterator, List, Optional, Set, Tuple +from typing import Dict, Iterator, List, Optional, Set, Tuple, Literal from decompiler.structures.ast.ast_nodes import AbstractSyntaxTreeNode, ConditionNode, SeqNode from decompiler.structures.ast.reachability_graph import SiblingReachability @@ -27,7 +27,10 @@ class Formula: @property def is_if_else_formula(self) -> bool: - """Check whether condition of formula belongs to an if-else condition.""" + """ + Check whether condition of formula belongs to an if-else condition. + The condition-node can only be grouped if it has not reaching-condition. + """ return self.ast_node.reaching_condition.is_true and not self.ast_node.is_single_branch @property @@ -83,7 +86,7 @@ class Symbol: class ConditionCandidates: """A graph implementation handling conditions for the condition-based refinement algorithm.""" - def __init__(self, candidates: List[AbstractSyntaxTreeNode]) -> None: + def __init__(self, sequence_node: SeqNode) -> None: """ Init for the condition-candidates. @@ -93,7 +96,12 @@ def __init__(self, candidates: List[AbstractSyntaxTreeNode]) -> None: - unconsidered_nodes: a set of all nodes that we still have to consider for grouping into conditions. - logic_graph: representation of all logic-formulas relevant """ - self._candidates: Dict[AbstractSyntaxTreeNode, Formula] = {c: Formula(c) for c in candidates} + self.sequence_node: SeqNode = sequence_node + self._candidates: Dict[AbstractSyntaxTreeNode, Formula] = { + child: Formula(child) + for child in sequence_node.children + if not child.reaching_condition.is_true or isinstance(child, ConditionNode) + } self._unconsidered_nodes: InsertionOrderedSet[AbstractSyntaxTreeNode] = InsertionOrderedSet() self._logic_graph: DiGraph = DiGraph() self._initialize_logic_graph() @@ -139,6 +147,8 @@ def maximum_subexpression_size(self) -> int: def get_symbol_names_of(self, node: AbstractSyntaxTreeNode) -> Set[str]: """Return all symbols that are used in the formula of the given ast-node.""" + if node not in self._candidates: + return set() return {symbol.name for symbol in self._auxiliary_graph.successors(self._candidates[node])} def get_next_subexpression(self) -> Iterator[Tuple[AbstractSyntaxTreeNode, LogicCondition]]: @@ -171,6 +181,17 @@ def add_ast_node(self, condition_node: ConditionNode): self._logic_graph.add_edge(clause, symbol := Symbol(symbol_name)) self._logic_graph.add_edge(formula, symbol, auxiliary=True) + def update(self, ast_node: ConditionNode): + """update the graph for the given condition-node.""" + assert ast_node in self._candidates, "The condition node must be a candidate." + formula = self._candidates[ast_node] + if not ast_node.is_single_branch and not all(isinstance(clause, ClauseFormula) for clause in self._formula_graph.successors(formula)): + assert len(clauses := formula.clauses()) == 1, "A non-single condition node should have one formula clause" + self._logic_graph.remove_nodes_from(list(self._formula_graph.successors(formula))) + self._logic_graph.add_edge(ast_node, clauses[0]) + for symbol in self._auxiliary_graph.successors(formula): + self._logic_graph.add_edge(clauses[0], symbol) + @property def _auxiliary_graph(self) -> DiGraph: """Return a read-only view of the logic-graph containing only the auxiliary-edges, i.e., the edges between formulas and symbols.""" @@ -341,79 +362,97 @@ def _structure_sequence_node(self, sequence_node: SeqNode) -> Set[SeqNode]: """ newly_created_sequence_nodes: Set[SeqNode] = set() sibling_reachability: SiblingReachability = self.asforest.get_sibling_reachability_of_children_of(sequence_node) - self._condition_candidates = ConditionCandidates( - [child for child in sequence_node.children if not child.reaching_condition.is_true or isinstance(child, ConditionNode)] - ) + self._condition_candidates = ConditionCandidates(sequence_node) for child, subexpression in self._condition_candidates.get_next_subexpression(): - true_cluster, false_cluster, existing_if_else_conditions = self._cluster_by_condition(subexpression, child) - all_cluster_nodes = true_cluster + false_cluster - - if len(all_cluster_nodes) < 2: - continue - if self._can_place_condition_node_with_branches(all_cluster_nodes, sibling_reachability): - for existing_if_else_cond in existing_if_else_conditions: - if existing_if_else_cond in true_cluster: - true_cluster.remove(existing_if_else_cond) - true_cluster.append(existing_if_else_cond.true_branch_child) - false_cluster.append(existing_if_else_cond.false_branch_child) - else: - false_cluster.remove(existing_if_else_cond) - true_cluster.append(existing_if_else_cond.false_branch_child) - false_cluster.append(existing_if_else_cond.true_branch_child) - self.asforest.transform_branch_to_reaching_conditions(existing_if_else_cond) - condition_node = self.asforest.create_condition_node_with(subexpression, true_cluster, false_cluster) - if len(true_cluster) > 1: - newly_created_sequence_nodes.add(condition_node.true_branch_child) - if len(false_cluster) > 1: - newly_created_sequence_nodes.add(condition_node.false_branch_child) - sibling_reachability.merge_siblings_to(condition_node, all_cluster_nodes) - sequence_node._sorted_children = sibling_reachability.sorted_nodes() - for true_condition_branch in (b for b in true_cluster + false_cluster if b.is_single_branch and b.condition.is_true): - self.asforest.replace_condition_node_by_single_branch(true_condition_branch) - self._condition_candidates.add_ast_node(condition_node) - self._condition_candidates.remove_ast_nodes(all_cluster_nodes) + newly_created_sequence_nodes.update(self._cluster_by_condition(subexpression, child, sibling_reachability)) return newly_created_sequence_nodes def _cluster_by_condition( - self, sub_expression: LogicCondition, node_with_subexpression: AbstractSyntaxTreeNode - ) -> Tuple[List[AbstractSyntaxTreeNode], List[AbstractSyntaxTreeNode], List[ConditionNode]]: + self, + sub_expression: LogicCondition, + current_node: AbstractSyntaxTreeNode, + sibling_reachability: SiblingReachability, + ) -> List[SeqNode]: """ Cluster the nodes in sequence_nodes according to the input condition. :param sub_expression: The condition for which we check whether it or its negation is a subexpression of the list of input nodes. - :param node_with_subexpression: The node of which the given sub_expression is a sub-expression + :param current_node: The node of which the given sub_expression is a sub-expression :param condition_candidates: class-object handling all condition candidates. :return: A 2-tuple, where the first list is the set of nodes that have condition as subexpression, the second list is the set of nodes that have the negated condition as subexpression. """ - true_children = [] - false_children = [] symbols_of_condition = set(sub_expression.get_symbols_as_string()) negated_condition: Optional[LogicCondition] = None - existing_if_else_condition: List[ConditionNode] = [] - for ast_node in self._condition_candidates.candidates: + for ast_node in [candidate for candidate in self._condition_candidates.candidates if candidate != current_node]: if symbols_of_condition - self._condition_candidates.get_symbol_names_of(ast_node): continue condition, is_if_else_node = self._condition_candidates.get_condition(ast_node) - if ( - ast_node == node_with_subexpression - or (not is_if_else_node and self._is_subexpression_of_cnf_formula(sub_expression, condition)) - or (is_if_else_node and sub_expression.is_equivalent_to(condition)) + if self._is_possible_branch(condition, is_if_else_node, sub_expression) and self._can_place_condition_node_with_branches( + [current_node, ast_node], sibling_reachability ): - true_children.append(ast_node) - if is_if_else_node: - existing_if_else_condition.append(ast_node) + current_node = self._add_condition_node_if_needed(current_node, sub_expression, sibling_reachability) + self._cluster_to_condition(current_node, ast_node, "true", sibling_reachability) else: negated_condition = self._get_negated_condition_of(sub_expression, negated_condition) - if (not is_if_else_node and self._is_subexpression_of_cnf_formula(negated_condition, condition)) or ( - is_if_else_node and negated_condition.is_equivalent_to(condition) + if self._is_possible_branch(condition, is_if_else_node, negated_condition) and self._can_place_condition_node_with_branches( + [current_node, ast_node], sibling_reachability ): - false_children.append(ast_node) - if is_if_else_node: - existing_if_else_condition.append(ast_node) + current_node = self._add_condition_node_if_needed(current_node, sub_expression, sibling_reachability) + self._cluster_to_condition(current_node, ast_node, "false", sibling_reachability) + + if isinstance(current_node, ConditionNode): + return [branch.child for branch in current_node.children if isinstance(branch.child, SeqNode)] + return [] + + def _add_condition_node_if_needed(self, node_with_subexpression, sub_expression, sibling_reachability): + if self._need_to_add_condition_node(node_with_subexpression, sub_expression): + tmp = node_with_subexpression + node_with_subexpression = self.asforest.create_condition_node_with(sub_expression, [node_with_subexpression], []) + sibling_reachability.merge_siblings_to(node_with_subexpression, [tmp]) + self._condition_candidates.add_ast_node(node_with_subexpression) + self._condition_candidates.remove_ast_nodes([tmp]) + return node_with_subexpression + + def _need_to_add_condition_node(self, node_with_subexpression, sub_expression): + return not isinstance(node_with_subexpression, ConditionNode) or not sub_expression.is_equal_to(node_with_subexpression.condition) + + def _is_possible_branch(self, condition, is_if_else_node, sub_expression) -> bool: + return (not is_if_else_node and self._is_subexpression_of_cnf_formula(sub_expression, condition)) or ( + is_if_else_node and sub_expression.is_equivalent_to(condition) + ) - return true_children, false_children, existing_if_else_condition + def _cluster_to_condition( + self, + clustering_node: ConditionNode, + new_node: AbstractSyntaxTreeNode, + branch: Literal["true", "false"], + sibling_reachability: SiblingReachability, + ): + true_cluster, false_cluster = None, None + if isinstance(new_node, ConditionNode) and new_node.reaching_condition.is_true: + if new_node.false_branch: + true_cluster = new_node.true_branch_child + false_cluster = new_node.false_branch_child + else: + true_cluster = new_node.true_branch_child + assert true_cluster.reaching_condition.is_true, "single-branch Condition nodes should not have a RC at this point." + true_cluster.reaching_condition = new_node.true_branch.branch_condition.copy() + true_expression = clustering_node.condition if branch == "true" else ~clustering_node.condition + true_cluster.reaching_condition.substitute_by_true(true_expression) + else: + true_cluster = new_node + true_expression = clustering_node.condition if branch == "true" else ~clustering_node.condition + true_cluster.reaching_condition.substitute_by_true(true_expression) + if branch == "false": + true_cluster, false_cluster = false_cluster, true_cluster + self.asforest.add_branches_to_condition_node(clustering_node, true_cluster, false_cluster) + + sibling_reachability.merge_siblings_to(clustering_node, [new_node]) + self._condition_candidates.update(clustering_node) + self._condition_candidates.remove_ast_nodes([new_node]) + self._condition_candidates.sequence_node._sorted_children = sibling_reachability.sorted_nodes() @staticmethod def _get_negated_condition_of(condition: LogicCondition, negated_condition: Optional[LogicCondition]) -> LogicCondition: diff --git a/decompiler/structures/ast/syntaxforest.py b/decompiler/structures/ast/syntaxforest.py index ed70ab964..3c245bdb8 100644 --- a/decompiler/structures/ast/syntaxforest.py +++ b/decompiler/structures/ast/syntaxforest.py @@ -331,24 +331,23 @@ def __create_branch_for(self, branch_nodes: List[AbstractSyntaxTreeNode], condit self._remove_edge(branch.parent, branch) return branch - def transform_branch_to_reaching_conditions(self, condition_node: ConditionNode): - """Transform a branch into a sequence-node having the branch-children as children with the according reaching-condition.""" - condition_node.clean() - parent = condition_node.parent - new_seq_node = self._add_sequence_node_before(condition_node) - - self._add_edge(new_seq_node, condition_node.true_branch_child) - condition_node.true_branch_child.reaching_condition = condition_node.condition - nodes = [condition_node.true_branch_child] - if condition_node.false_branch: - self._add_edge(new_seq_node, condition_node.false_branch_child) - condition_node.false_branch_child.reaching_condition = ~condition_node.condition - nodes.append(condition_node.false_branch_child) - self._remove_nodes_from([condition_node, condition_node.true_branch, condition_node.false_branch]) - - new_seq_node._sorted_children = tuple(nodes) - new_seq_node.clean() - parent.clean() + def add_branches_to_condition_node(self, condition_node: ConditionNode, true_branch: AbstractSyntaxTreeNode = None, false_branch: Optional[AbstractSyntaxTreeNode] = None): + """TODO""" + if true_branch: + self._remove_edge(true_branch.parent, true_branch) + new_seq_node = self._add_sequence_node_before(condition_node.true_branch_child) + self._add_edge(new_seq_node, true_branch) + new_seq_node.clean() + if false_branch: + self._remove_edge(false_branch.parent, false_branch) + if condition_node.false_branch is None: + false_node = self.factory.create_false_node() + self._add_node(false_node) + self._add_edges_from(((condition_node, false_node), (false_node, false_branch))) + else: + new_seq_node = self._add_sequence_node_before(condition_node.false_branch_child) + self._add_edge(new_seq_node, false_branch) + new_seq_node.clean() def create_switch_node_with(self, expression: Expression, cases: List[Tuple[CaseNode, AbstractSyntaxTreeNode]]) -> SwitchNode: """Create a switch node with the given expression and the given list of case nodes.""" diff --git a/decompiler/structures/ast/syntaxgraph.py b/decompiler/structures/ast/syntaxgraph.py index c76c83213..6f6a42fa4 100644 --- a/decompiler/structures/ast/syntaxgraph.py +++ b/decompiler/structures/ast/syntaxgraph.py @@ -333,8 +333,7 @@ def replace_condition_node_by_single_branch(self, node: ConditionNode): """This function replaces the given AST- condition node by its single child in the AST.""" assert isinstance(node, ConditionNode), f"This transformation works only for condition nodes!" assert len(node.children) == 1, f"This works only if the Condition node has only one child!" - if (len(node.children) == 1 and node.true_branch is None) or node.condition.is_false: - node.switch_branches() + node.clean() self._replace_subtree(node, node.true_branch_child) def replace_variable_in_subtree(self, head: AbstractSyntaxTreeNode, replacee: Variable, replacement: Variable): diff --git a/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py b/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py index 5eb73d15b..62dc48bf8 100644 --- a/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py +++ b/tests/pipeline/controlflowanalysis/restructuring_commons/test_condition_aware_refinement.py @@ -5719,10 +5719,10 @@ def test_intersecting_cases(task): PatternIndependentRestructuring().run(task) assert len(list(task.ast.get_switch_nodes_post_order())) == 2 - assert isinstance(seq_node := task.ast.root, SeqNode) and len(children := seq_node.children) == 6 - assert isinstance(children[0], CodeNode) and isinstance(children[5], CodeNode) - assert all(isinstance(child, ConditionNode) for child in children[1:4]) - assert isinstance(children[4], SwitchNode) and isinstance(children[3].true_branch_child, SwitchNode) + assert isinstance(seq_node := task.ast.root, SeqNode) and len(children := seq_node.children) == 5 + assert isinstance(children[0], CodeNode) and isinstance(children[4], CodeNode) + assert all(isinstance(child, ConditionNode) for child in children[1:3]) + assert isinstance(children[3], SwitchNode) and isinstance(children[2].true_branch_child, SwitchNode) def test_missing_cases_switch_in_sequence(task):