From f850af03438ba90a4e8f3a6025149942952efb2c Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:37:11 +0200 Subject: [PATCH 01/25] Implement conditional out of ssa --- decompiler/pipeline/ssa/dependency_graph.py | 110 ++++++++---------- .../pipeline/ssa/outofssatranslation.py | 24 ++-- decompiler/pipeline/ssa/variable_renaming.py | 96 ++++++++++++++- decompiler/util/to_dot_converter.py | 8 +- 4 files changed, 162 insertions(+), 76 deletions(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index 928d84fa3..ccbf90c33 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -1,74 +1,64 @@ -from typing import Iterable, List, Optional, Set +from functools import reduce +from typing import Iterator from decompiler.structures.graphs.cfg import ControlFlowGraph -from decompiler.structures.interferencegraph import InterferenceGraph +from decompiler.structures.pseudo import Expression, Operation, OperationType from decompiler.structures.pseudo.expressions import Variable from decompiler.structures.pseudo.instructions import Assignment -from decompiler.structures.pseudo.operations import Call -from networkx import DiGraph, weakly_connected_components +from networkx import MultiDiGraph -def _non_call_assignments(cfg: ControlFlowGraph) -> Iterable[Assignment]: - """Yield all interesting assignments for the dependency graph.""" - for instr in cfg.instructions: - if isinstance(instr, Assignment) and isinstance(instr.destination, Variable) and not isinstance(instr.value, Call): - yield instr - - -class DependencyGraph(DiGraph): - def __init__(self, interference_graph: Optional[InterferenceGraph] = None): - super().__init__() - self.add_nodes_from(interference_graph.nodes) - self.interference_graph = interference_graph - - @classmethod - def from_cfg(cls, cfg: ControlFlowGraph, interference_graph: InterferenceGraph): - """ +def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph: + """ Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. - Add an edge the definition to at most one requirement for each instruction. - - All variables that where not defined via Phi-functions before have out-degree at most 1, because they are defined at most once + - All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once. - Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function. """ - dependency_graph = cls(interference_graph) - for instruction in _non_call_assignments(cfg): - defined_variable = instruction.destination - if isinstance(instruction.value, Variable): - if dependency_graph._variables_can_have_same_name(defined_variable, instruction.value): - dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="high") - elif len(instruction.requirements) == 1: - if dependency_graph._variables_can_have_same_name(defined_variable, instruction.requirements[0]): - dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="medium") - else: - if non_interfering_variable := dependency_graph._non_interfering_requirements(instruction.requirements, defined_variable): - dependency_graph.add_edge(defined_variable, non_interfering_variable, strength="low") - return dependency_graph + dependency_graph = MultiDiGraph() - def _non_interfering_requirements(self, requirements: List[Variable], defined_variable: Variable) -> Optional[Variable]: - """Get the unique non-interfering requirement if it exists, otherwise we return None.""" - non_interfering_requirement = None - for required_variable in requirements: - if self._variables_can_have_same_name(defined_variable, required_variable): - if non_interfering_requirement: - return None - non_interfering_requirement = required_variable - return non_interfering_requirement + for variable in _collect_variables(cfg): + dependency_graph.add_node((variable,)) + for instruction in _assignments_in_cfg(cfg): + defined_variables = instruction.definitions + for used_variable, score in _expression_dependencies(instruction.value).items(): + if score > 0: + dependency_graph.add_edges_from((((dvar,), (used_variable,)) for dvar in defined_variables), score=score) - def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool: - """ - Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they - do not interfere. + return dependency_graph - :param source: The potential source vertex. - :param sink: The potential sink vertex - :return: True, if the given variables can have the same name, and false otherwise. - """ - if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased: - return False - if source.is_aliased and sink.is_aliased and source.name != sink.name: - return False - return True - def get_components(self) -> Iterable[Set[Variable]]: - """Returns the weakly connected components of the dependency graph.""" - for component in weakly_connected_components(self): - yield set(component) +def _collect_variables(cfg: ControlFlowGraph) -> Iterator[Variable]: + for instruction in cfg.instructions: + for subexpression in instruction.subexpressions(): + if isinstance(subexpression, Variable): + yield subexpression + + +def _assignments_in_cfg(cfg: ControlFlowGraph) -> Iterator[Assignment]: + """Yield all interesting assignments for the dependency graph.""" + for instr in cfg.instructions: + if isinstance(instr, Assignment): + yield instr + + +def _expression_dependencies(expression: Expression) -> dict[Variable, float]: + match expression: + case Variable(): + return {expression: 1.0} + case Operation(): + operation_type_penalty = { + OperationType.call: 0, + OperationType.address: 0, + OperationType.dereference: 0, + OperationType.member_access: 0, + }.get(expression.operation, 0.5) + + operands_dependencies = (_expression_dependencies(operand) for operand in expression.operands) + dependencies: dict[Variable, float] = reduce(dict.__or__, operands_dependencies, {}) + for var in dependencies: + dependencies[var] /= len(dependencies) + dependencies[var] *= operation_type_penalty + return dependencies + case _: + return {} diff --git a/decompiler/pipeline/ssa/outofssatranslation.py b/decompiler/pipeline/ssa/outofssatranslation.py index cd76fe4c5..2a5d071bd 100644 --- a/decompiler/pipeline/ssa/outofssatranslation.py +++ b/decompiler/pipeline/ssa/outofssatranslation.py @@ -4,12 +4,12 @@ from collections import defaultdict from configparser import NoOptionError from enum import Enum -from typing import DefaultDict, List +from typing import Callable, DefaultDict, List from decompiler.pipeline.ssa.phi_cleaner import PhiFunctionCleaner from decompiler.pipeline.ssa.phi_dependency_resolver import PhiDependencyResolver from decompiler.pipeline.ssa.phi_lifting import PhiFunctionLifter -from decompiler.pipeline.ssa.variable_renaming import MinimalVariableRenamer, SimpleVariableRenamer +from decompiler.pipeline.ssa.variable_renaming import ConditionalVariableRenamer, MinimalVariableRenamer, SimpleVariableRenamer from decompiler.pipeline.stage import PipelineStage from decompiler.structures.graphs.cfg import BasicBlock from decompiler.structures.interferencegraph import InterferenceGraph @@ -98,12 +98,11 @@ def _out_of_ssa(self) -> None: -> There are different optimization levels """ - try: - self.out_of_ssa_strategy[self._optimization](self) - except KeyError: - error_message = f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far." - logging.error(error_message) - raise NotImplementedError(error_message) + strategy = self.out_of_ssa_strategy.get(self._optimization, None) + if strategy is None: + raise NotImplementedError(f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far.") + + strategy(self) def _simple_out_of_ssa(self) -> None: """ @@ -158,12 +157,15 @@ def _conditional_out_of_ssa(self) -> None: This is a more advanced algorithm for out of SSA: - We first remove the circular dependency of the Phi-functions - Then, we remove the Phi-functions by lifting them to their predecessor basic blocks. - - Afterwards, we rename the variables, by considering their dependency on each other. + - Afterwards, we rename the variables by considering their dependency on each other. """ - pass + PhiDependencyResolver(self._phi_functions_of).resolve() + self.interference_graph = InterferenceGraph(self.task.graph) + PhiFunctionLifter(self.task.graph, self.interference_graph, self._phi_functions_of).lift() + ConditionalVariableRenamer(self.task, self.interference_graph).rename() # This translator maps the optimization levels to the functions. - out_of_ssa_strategy = { + out_of_ssa_strategy: dict[SSAOptions, Callable[["OutOfSsaTranslation"], None]] = { SSAOptions.simple: _simple_out_of_ssa, SSAOptions.minimization: _minimization_out_of_ssa, SSAOptions.lift_minimal: _lift_minimal_out_of_ssa, diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 910e0d6cf..dc11b01e2 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -1,5 +1,6 @@ """Module for renaming variables in Out of SSA.""" +import itertools import logging from collections import defaultdict from dataclasses import dataclass, field @@ -7,14 +8,17 @@ from operator import attrgetter from typing import DefaultDict, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union +import networkx +from decompiler.pipeline.ssa.dependency_graph import dependency_graph_from_cfg from decompiler.structures.interferencegraph import InterferenceGraph from decompiler.structures.pseudo.expressions import GlobalVariable, Variable from decompiler.structures.pseudo.instructions import BaseAssignment, Instruction, Relation from decompiler.structures.pseudo.typing import Type from decompiler.task import DecompilerTask +from decompiler.util.decoration import DecoratedGraph from decompiler.util.insertion_ordered_set import InsertionOrderedSet from decompiler.util.lexicographical_bfs import LexicographicalBFS -from networkx import Graph, connected_components +from networkx import Graph, MultiDiGraph, connected_components @dataclass @@ -334,3 +338,93 @@ def _classes_of(self, neighborhood: Iterable[Variable]) -> Iterable[Variable]: for neighbor in neighborhood: if neighbor in self._variable_classes_handler.color_class_of: yield self._variable_classes_handler.color_class_of[neighbor] + + +class ConditionalVariableRenamer(VariableRenamer): + """ + A minimal renaming strategy, that renames the SSA-variables such that the total number of non SSA-variables is (almost) minimal. + Therefore, we construct color-classes by using lexicographical BFS on the interference graph. When the interference graph is chordal + this leads to a minimum number of possible variables. + """ + + def __init__(self, task, interference_graph: InterferenceGraph): + """ + self._color_classes is a dictionary where the set of keys is the set of colors + and to each color we assign the set of variables of this color. + """ + super().__init__(task, interference_graph.copy()) + + dependency_graph = dependency_graph_from_cfg(task.graph) + + mapping = {} + for variable in self.interference_graph.nodes(): + contracted = tuple(self._variables_contracted_to[variable]) + for var in contracted: + mapping[(var,)] = contracted + + # Merge nodes which need to be contracted from self._variables_contracted_to + dependency_graph = networkx.relabel_nodes(dependency_graph, mapping) + + # counter = 0 + # self._decorate_graph(dependency_graph, f"dep{counter}.svg") + + dependency_graph.edge = dependency_graph.edges(data=True) + while True: + for u, v, _ in sorted(dependency_graph.edges(data=True), key=lambda edge: edge[2]["score"], reverse=True): + if u == v: # self loop + continue + + variables = u + v + if interference_graph.are_interfering(*variables): + continue + if u[0].type != v[0].type: + continue + if u[0].is_aliased != v[0].is_aliased: + continue + + break + else: + # We didn't find any remaining nodes to contract, break outer loop + break + + networkx.relabel_nodes(dependency_graph, {u: (*u, *v), v: (*u, *v)}, copy=False) + # counter += 1 + # self._decorate_graph(dependency_graph, f"dep{counter}.svg") + + # counter += 1 + # self._decorate_graph(dependency_graph, f"dep{counter}.svg") + + self._variable_classes_handler = VariableClassesHandler(defaultdict(set)) + for (i, vars) in enumerate(dependency_graph.nodes): + for var in vars: + self._variable_classes_handler.add_variable_to_class(var, i) + + self.compute_new_name_for_each_variable() + + def _decorate_graph(self, dependency_graph: MultiDiGraph, path: str): + decorated_graph = MultiDiGraph() + for node in dependency_graph.nodes: + decorated_graph.add_node(node, label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) + for u, v, data in dependency_graph.edges.data(): + decorated_graph.add_edge(u, v, label=f"{data['score']}") + for nodes in networkx.weakly_connected_components(dependency_graph): + for node_1, node_2 in combinations(nodes, 2): + if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): + decorated_graph.add_edge(node_1, node_2, color="red", dir="none") + + DecoratedGraph(decorated_graph).export_plot(path, type="svg") + + def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool: + """ + Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they + do not interfere. + + :param source: The potential source vertex. + :param sink: The potential sink vertex + :return: True, if the given variables can have the same name, and false otherwise. + """ + if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased: + return False + if source.is_aliased and sink.is_aliased and source.name != sink.name: + return False + return True diff --git a/decompiler/util/to_dot_converter.py b/decompiler/util/to_dot_converter.py index b1d459d71..9229aca1a 100644 --- a/decompiler/util/to_dot_converter.py +++ b/decompiler/util/to_dot_converter.py @@ -2,14 +2,14 @@ from networkx import DiGraph -HEADER = "strict digraph {" +HEADER = "digraph {" FOOTER = "}" class ToDotConverter: """Class in charge of writing a networkx DiGraph into dot-format""" - ATTRIBUTES = {"color", "fillcolor", "label", "shape", "style"} + ATTRIBUTES = {"color", "fillcolor", "label", "shape", "style", "dir"} def __init__(self, graph: DiGraph): self._graph = graph @@ -24,9 +24,9 @@ def _create_dot(self) -> str: """Create dot-file content.""" content = HEADER + "\n" for node, data in self._graph.nodes(data=True): - content += f"{node} [{self._get_attributes(data)}]; \n" + content += f"{hash(node)} [{self._get_attributes(data)}]; \n" for source, sink, data in self._graph.edges(data=True): - content += f"{source} -> {sink} [{self._get_attributes(data)}]; \n" + content += f"{hash(source)} -> {hash(sink)} [{self._get_attributes(data)}]; \n" content += FOOTER return content From dd3a54314a4f0712fe741680d380d41892a11e99 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:44:56 +0200 Subject: [PATCH 02/25] black --- decompiler/pipeline/ssa/dependency_graph.py | 10 +++++----- decompiler/pipeline/ssa/outofssatranslation.py | 4 +++- decompiler/pipeline/ssa/variable_renaming.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index ccbf90c33..d73917eda 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -10,11 +10,11 @@ def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph: """ - Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. - - Add an edge the definition to at most one requirement for each instruction. - - All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once. - - Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function. - """ + Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. + - Add an edge the definition to at most one requirement for each instruction. + - All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once. + - Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function. + """ dependency_graph = MultiDiGraph() for variable in _collect_variables(cfg): diff --git a/decompiler/pipeline/ssa/outofssatranslation.py b/decompiler/pipeline/ssa/outofssatranslation.py index 2a5d071bd..e6007fd58 100644 --- a/decompiler/pipeline/ssa/outofssatranslation.py +++ b/decompiler/pipeline/ssa/outofssatranslation.py @@ -100,7 +100,9 @@ def _out_of_ssa(self) -> None: """ strategy = self.out_of_ssa_strategy.get(self._optimization, None) if strategy is None: - raise NotImplementedError(f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far.") + raise NotImplementedError( + f"The Out of SSA according to the optimization level {self._optimization.value} is not implemented so far." + ) strategy(self) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index dc11b01e2..1b4caa2e0 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -395,7 +395,7 @@ def __init__(self, task, interference_graph: InterferenceGraph): # self._decorate_graph(dependency_graph, f"dep{counter}.svg") self._variable_classes_handler = VariableClassesHandler(defaultdict(set)) - for (i, vars) in enumerate(dependency_graph.nodes): + for i, vars in enumerate(dependency_graph.nodes): for var in vars: self._variable_classes_handler.add_variable_to_class(var, i) From 14c0397d1229d37b7e1c78ad730d1f49325e9236 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 24 Apr 2024 14:00:42 +0200 Subject: [PATCH 03/25] Change dot stuff --- decompiler/pipeline/ssa/variable_renaming.py | 4 ++-- decompiler/util/to_dot_converter.py | 4 ++-- tests/util/test_decoration.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 1b4caa2e0..813745087 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -404,13 +404,13 @@ def __init__(self, task, interference_graph: InterferenceGraph): def _decorate_graph(self, dependency_graph: MultiDiGraph, path: str): decorated_graph = MultiDiGraph() for node in dependency_graph.nodes: - decorated_graph.add_node(node, label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) + decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) for u, v, data in dependency_graph.edges.data(): decorated_graph.add_edge(u, v, label=f"{data['score']}") for nodes in networkx.weakly_connected_components(dependency_graph): for node_1, node_2 in combinations(nodes, 2): if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): - decorated_graph.add_edge(node_1, node_2, color="red", dir="none") + decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") DecoratedGraph(decorated_graph).export_plot(path, type="svg") diff --git a/decompiler/util/to_dot_converter.py b/decompiler/util/to_dot_converter.py index 9229aca1a..8275e70e1 100644 --- a/decompiler/util/to_dot_converter.py +++ b/decompiler/util/to_dot_converter.py @@ -24,9 +24,9 @@ def _create_dot(self) -> str: """Create dot-file content.""" content = HEADER + "\n" for node, data in self._graph.nodes(data=True): - content += f"{hash(node)} [{self._get_attributes(data)}]; \n" + content += f"{node} [{self._get_attributes(data)}]; \n" for source, sink, data in self._graph.edges(data=True): - content += f"{hash(source)} -> {hash(sink)} [{self._get_attributes(data)}]; \n" + content += f"{source} -> {sink} [{self._get_attributes(data)}]; \n" content += FOOTER return content diff --git a/tests/util/test_decoration.py b/tests/util/test_decoration.py index e055eeeb3..ed2761e4d 100644 --- a/tests/util/test_decoration.py +++ b/tests/util/test_decoration.py @@ -191,7 +191,7 @@ def test_convert_to_dot(self, simple_graph): content = dot_converter._create_dot() assert ( content - == """strict digraph { + == """digraph { 0 [shape="box", color="blue", label="0.\\na#0 = 0x2\\nb#0 = foo(a#0)\\nif(a#0 < b#0)"]; 1 [shape="box", color="blue", label="1.\\nb#2 = a#0 - b#0"]; 2 [shape="box", color="blue", label="2.\\nb#1 = ϕ(b#0,b#2)\\nreturn b#1"]; @@ -207,7 +207,7 @@ def test_convert_to_dot_with_string(self, graph_with_string): content = dot_converter._create_dot() assert ( content - == """strict digraph { + == """digraph { 0 [shape="box", color="blue", label="0.\\na#0 = 0x2\\nb#0 = foo(a#0)\\nif(a#0 < b#0)"]; 1 [shape="box", color="blue", label="1.\\nb#2 = a#0 - b#0"]; 2 [shape="box", color="blue", label="2.\\nb#1 = ϕ(b#0,b#2)\\nprintf(\\"The result is : %i\\", b#1)\\nreturn b#1"]; @@ -470,7 +470,7 @@ def test_dotviz_output(self, ast_condition): [ x in data for x in [ - r"strict digraph {", + r"digraph {", r'[style="filled", fillcolor="#e6f5c9", label="0. SeqNode\n\nSequence"];', r'[style="filled", fillcolor="#e6f5c9", label="1. ConditionNode\n\nif (true)"]', r'[style="filled", fillcolor="#e6f5c9", label="2. SeqNode\n\nSequence"];', @@ -584,7 +584,7 @@ def test_convert_to_dot_if(self, ast_condition): content = dot_converter._create_dot() assert ( content - == """strict digraph { + == """digraph { 0 [style="filled", fillcolor="#e6f5c9", label="0. SeqNode\\n\\nSequence"]; 1 [style="filled", fillcolor="#e6f5c9", label="1. ConditionNode\\n\\nif (true)"]; 2 [style="filled", fillcolor="#e6f5c9", label="2. SeqNode\\n\\nSequence"]; @@ -606,7 +606,7 @@ def test_convert_to_dot_switch(self, ast_switch): content = dot_converter._create_dot() assert ( content - == """strict digraph { + == """digraph { 0 [style="filled", fillcolor="#e6f5c9", label="0. SeqNode\\n\\nSequence"]; 1 [style="filled", fillcolor="#fdcdac", label="1. SwitchNode\\n\\nswitch (0x29)"]; 2 [style="filled", fillcolor="#e6f5c9", label="2. CaseNode\\n\\ncase 0x0:"]; From 31b8dfedb3780acc67b0cdccb94b8576390e5c83 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 2 May 2024 12:08:46 +0200 Subject: [PATCH 04/25] Add some test --- .../pipeline/SSA/test_out_of_ssa_renaming.py | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/tests/pipeline/SSA/test_out_of_ssa_renaming.py b/tests/pipeline/SSA/test_out_of_ssa_renaming.py index 4c7dab76d..17de33147 100644 --- a/tests/pipeline/SSA/test_out_of_ssa_renaming.py +++ b/tests/pipeline/SSA/test_out_of_ssa_renaming.py @@ -1,7 +1,13 @@ """Pytest for renaming SSA-variables to non-SSA-variables.""" +import string from decompiler.pipeline.ssa.phi_lifting import PhiFunctionLifter -from decompiler.pipeline.ssa.variable_renaming import MinimalVariableRenamer, SimpleVariableRenamer, VariableRenamer +from decompiler.pipeline.ssa.variable_renaming import ( + ConditionalVariableRenamer, + MinimalVariableRenamer, + SimpleVariableRenamer, + VariableRenamer, +) from decompiler.structures.interferencegraph import InterferenceGraph from tests.pipeline.SSA.utils_out_of_ssa_tests import * @@ -492,6 +498,23 @@ def test_minimal_renaming_basic_relation(graph_with_relations_easy, variable): } +def test_conditional_renaming_basic_relation(graph_with_relations_easy, variable): + """Checks that conditional renaming can handle relations.""" + task, interference_graph = graph_with_relations_easy + minimal_variable_renamer = MinimalVariableRenamer(task, interference_graph) + + var_18 = [Variable("var_18", Integer(32, True), i, True, None) for i in range(4)] + var_10_1 = Variable("var_10", Pointer(Integer(32, True), 32), 1, False, None) + variable[0].is_aliased = True + variable[1]._type = Pointer(Integer(32, True), 32) + + assert minimal_variable_renamer.renaming_map == { + var_10_1: variable[1], + var_18[2]: variable[0], + var_18[3]: variable[0], + } + + @pytest.fixture() def graph_with_relation() -> Tuple[DecompilerTask, InterferenceGraph]: """ @@ -772,3 +795,61 @@ def test_minimal_renaming_relation(graph_with_relation, variable): var_1c[3]: variable[1], var_1c[4]: variable[1], } + + +def test_conditional_renaming_relation(graph_with_relation, variable): + """Test for relations with simple renaming""" + task, interference_graph = graph_with_relation + conditional_variable_renamer = ConditionalVariableRenamer(task, interference_graph) + + var_28 = Variable("var_28", Pointer(Integer(32, True), 32), 1, False, None) + var_1c = [Variable("var_1c", Integer(32, True), i, True, None) for i in range(5)] + edx_3 = Variable("edx_3", Integer(32, True), 4, False, None) + eax_7 = Variable("eax_7", Integer(32, True), 8, False, None) + variable[0].is_aliased = True + variable[1]._type = Pointer(Integer(32, True), 32) + variable[2].is_aliased = True + + assert conditional_variable_renamer.renaming_map == { + var_28: variable[1], + edx_3: variable[3], + eax_7: variable[3], + var_1c[0]: variable[0], + var_1c[2]: variable[0], + var_1c[3]: variable[2], + var_1c[4]: variable[2], + } + + +def test_conditional_renaming(): + """Test that conditional renaming only combines related variables""" + orig_variables = [Variable(letter, Integer.int32_t()) for letter in string.ascii_lowercase] + new_variables = [Variable(f"var_{index}", Integer.int32_t()) for index in range(10)] + + cfg = ControlFlowGraph() + cfg.add_node( + BasicBlock( + 0, + [ + Assignment(orig_variables[0], Constant(0, Integer.int32_t())), + Assignment(ListOperation([]), Call(FunctionSymbol("fun", 0), [orig_variables[0]])), + Assignment(orig_variables[1], Constant(1, Integer.int32_t())), + Assignment(ListOperation([]), Call(FunctionSymbol("fun", 0), [orig_variables[1]])), + Assignment(orig_variables[2], orig_variables[1]), + Assignment(ListOperation([]), Call(FunctionSymbol("fun", 0), [orig_variables[2]])), + Assignment(orig_variables[3], Constant(3, Integer.int32_t())), + Assignment(ListOperation([]), Call(FunctionSymbol("fun", 0), [orig_variables[3]])), + ], + ) + ) + + task = decompiler_task(cfg, SSAOptions.conditional) + interference_graph = InterferenceGraph(cfg) + renamer = ConditionalVariableRenamer(task, interference_graph) + + assert renamer.renaming_map == { + orig_variables[0]: new_variables[0], + orig_variables[1]: new_variables[2], + orig_variables[2]: new_variables[2], + orig_variables[3]: new_variables[1], + } From 5aaa2fa0778df0d66a6f1fa7718e1a4936f1c537 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 9 May 2024 15:02:13 +0200 Subject: [PATCH 05/25] black --- tests/pipeline/SSA/test_out_of_ssa_renaming.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pipeline/SSA/test_out_of_ssa_renaming.py b/tests/pipeline/SSA/test_out_of_ssa_renaming.py index 17de33147..499a1d9d8 100644 --- a/tests/pipeline/SSA/test_out_of_ssa_renaming.py +++ b/tests/pipeline/SSA/test_out_of_ssa_renaming.py @@ -1,4 +1,5 @@ """Pytest for renaming SSA-variables to non-SSA-variables.""" + import string from decompiler.pipeline.ssa.phi_lifting import PhiFunctionLifter From f02b7a7b19ed57a85e2c8c8df8921347b210934e Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 14:45:19 +0200 Subject: [PATCH 06/25] Adjust _expression_dependencies() --- decompiler/pipeline/ssa/dependency_graph.py | 32 +++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index d73917eda..51576b603 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -1,4 +1,3 @@ -from functools import reduce from typing import Iterator from decompiler.structures.graphs.cfg import ControlFlowGraph @@ -47,18 +46,27 @@ def _expression_dependencies(expression: Expression) -> dict[Variable, float]: case Variable(): return {expression: 1.0} case Operation(): - operation_type_penalty = { - OperationType.call: 0, - OperationType.address: 0, - OperationType.dereference: 0, - OperationType.member_access: 0, - }.get(expression.operation, 0.5) + if expression.operation in { + OperationType.call, + OperationType.address, + OperationType.dereference, + OperationType.member_access, + }: + return {} + + operands_dependencies = list(filter(lambda d: d, (_expression_dependencies(operand) for operand in expression.operands))) + dependencies: dict[Variable, float] = {} + for deps in operands_dependencies: + for var in deps: + score = deps[var] + score /= len(operands_dependencies) + score *= 0.5 # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) + + if var not in dependencies: + dependencies[var] = score + else: + dependencies[var] += score - operands_dependencies = (_expression_dependencies(operand) for operand in expression.operands) - dependencies: dict[Variable, float] = reduce(dict.__or__, operands_dependencies, {}) - for var in dependencies: - dependencies[var] /= len(dependencies) - dependencies[var] *= operation_type_penalty return dependencies case _: return {} From 91e5db23e36b069011070e1d2c94e760cfefc7e0 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 14:47:16 +0200 Subject: [PATCH 07/25] Fix docstring of ConditionalVariableRenamer --- decompiler/pipeline/ssa/variable_renaming.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 813745087..bec38fc62 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -342,9 +342,9 @@ def _classes_of(self, neighborhood: Iterable[Variable]) -> Iterable[Variable]: class ConditionalVariableRenamer(VariableRenamer): """ - A minimal renaming strategy, that renames the SSA-variables such that the total number of non SSA-variables is (almost) minimal. - Therefore, we construct color-classes by using lexicographical BFS on the interference graph. When the interference graph is chordal - this leads to a minimum number of possible variables. + A renaming strategy that renames the SSA-variables, such that only variables that have a relation with each other can get the same name. + Therefore, we construct a dependency-graph with weights, telling us how likely these two variables are the same variable, i.e., + copy-assignments are more likely to be identically than complicated computations. """ def __init__(self, task, interference_graph: InterferenceGraph): From a46bae44a0f2cd0e840869c0c8c7005fdbd3537f Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 14:49:12 +0200 Subject: [PATCH 08/25] Add a type hint --- decompiler/pipeline/ssa/variable_renaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index bec38fc62..a0cca569f 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -356,7 +356,7 @@ def __init__(self, task, interference_graph: InterferenceGraph): dependency_graph = dependency_graph_from_cfg(task.graph) - mapping = {} + mapping: dict[tuple[Variable], tuple[Variable, ...]] = {} for variable in self.interference_graph.nodes(): contracted = tuple(self._variables_contracted_to[variable]) for var in contracted: From aeeaa9cab4c16cb81d56bd45bf0c7364aa916e3d Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 14:51:06 +0200 Subject: [PATCH 09/25] Remove some debug stuff --- decompiler/pipeline/ssa/variable_renaming.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index a0cca569f..bc79fdd59 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -365,9 +365,6 @@ def __init__(self, task, interference_graph: InterferenceGraph): # Merge nodes which need to be contracted from self._variables_contracted_to dependency_graph = networkx.relabel_nodes(dependency_graph, mapping) - # counter = 0 - # self._decorate_graph(dependency_graph, f"dep{counter}.svg") - dependency_graph.edge = dependency_graph.edges(data=True) while True: for u, v, _ in sorted(dependency_graph.edges(data=True), key=lambda edge: edge[2]["score"], reverse=True): @@ -388,11 +385,6 @@ def __init__(self, task, interference_graph: InterferenceGraph): break networkx.relabel_nodes(dependency_graph, {u: (*u, *v), v: (*u, *v)}, copy=False) - # counter += 1 - # self._decorate_graph(dependency_graph, f"dep{counter}.svg") - - # counter += 1 - # self._decorate_graph(dependency_graph, f"dep{counter}.svg") self._variable_classes_handler = VariableClassesHandler(defaultdict(set)) for i, vars in enumerate(dependency_graph.nodes): From 1641a5ff95ac5c213f42136b325c92b6a921d099 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 14:57:10 +0200 Subject: [PATCH 10/25] Use _variables_can_have_same_name --- decompiler/pipeline/ssa/variable_renaming.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index bc79fdd59..3d5ce3fd4 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -370,13 +370,7 @@ def __init__(self, task, interference_graph: InterferenceGraph): for u, v, _ in sorted(dependency_graph.edges(data=True), key=lambda edge: edge[2]["score"], reverse=True): if u == v: # self loop continue - - variables = u + v - if interference_graph.are_interfering(*variables): - continue - if u[0].type != v[0].type: - continue - if u[0].is_aliased != v[0].is_aliased: + if not self._variables_can_have_same_name(u, v): continue break @@ -406,7 +400,7 @@ def _decorate_graph(self, dependency_graph: MultiDiGraph, path: str): DecoratedGraph(decorated_graph).export_plot(path, type="svg") - def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool: + def _variables_can_have_same_name(self, source: tuple[Variable, ...], sink: tuple[Variable, ...]) -> bool: """ Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they do not interfere. @@ -415,8 +409,12 @@ def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> boo :param sink: The potential sink vertex :return: True, if the given variables can have the same name, and false otherwise. """ - if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased: + if ( + self.interference_graph.are_interfering(*(source + sink)) + or source[0].type != sink[0].type + or source[0].is_aliased != sink[0].is_aliased + ): return False - if source.is_aliased and sink.is_aliased and source.name != sink.name: + if source[0].is_aliased and sink[0].is_aliased and source[0].name != sink[0].name: return False return True From 780bf8b0b881809895aac75f85968f5d5c4b632f Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 15:08:26 +0200 Subject: [PATCH 11/25] Simply syntax and remove an unused statement --- decompiler/pipeline/ssa/variable_renaming.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 3d5ce3fd4..358151e35 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -365,9 +365,8 @@ def __init__(self, task, interference_graph: InterferenceGraph): # Merge nodes which need to be contracted from self._variables_contracted_to dependency_graph = networkx.relabel_nodes(dependency_graph, mapping) - dependency_graph.edge = dependency_graph.edges(data=True) while True: - for u, v, _ in sorted(dependency_graph.edges(data=True), key=lambda edge: edge[2]["score"], reverse=True): + for u, v, _ in sorted(dependency_graph.edges(data="score"), key=lambda edge: edge[2], reverse=True): if u == v: # self loop continue if not self._variables_can_have_same_name(u, v): From 055404ae1c234b2d9fbe71c8f2b0ce0b52427280 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 15:17:43 +0200 Subject: [PATCH 12/25] Restructure ConditionalVariableRenamer --- decompiler/pipeline/ssa/variable_renaming.py | 48 ++++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 358151e35..55631c037 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -10,6 +10,7 @@ import networkx from decompiler.pipeline.ssa.dependency_graph import dependency_graph_from_cfg +from decompiler.structures.graphs.cfg import ControlFlowGraph from decompiler.structures.interferencegraph import InterferenceGraph from decompiler.structures.pseudo.expressions import GlobalVariable, Variable from decompiler.structures.pseudo.instructions import BaseAssignment, Instruction, Relation @@ -353,18 +354,27 @@ def __init__(self, task, interference_graph: InterferenceGraph): and to each color we assign the set of variables of this color. """ super().__init__(task, interference_graph.copy()) + self._generate_renaming_map(task.graph) - dependency_graph = dependency_graph_from_cfg(task.graph) + def _generate_renaming_map(self, cfg: ControlFlowGraph): + dependency_graph = dependency_graph_from_cfg(cfg) + dependency_graph = self.merge_contracted_variables(dependency_graph) + self.create_variable_classes(dependency_graph) + self.compute_new_name_for_each_variable() + + def merge_contracted_variables(self, dependency_graph): + """Merge nodes which need to be contracted from self._variables_contracted_to""" mapping: dict[tuple[Variable], tuple[Variable, ...]] = {} for variable in self.interference_graph.nodes(): contracted = tuple(self._variables_contracted_to[variable]) for var in contracted: mapping[(var,)] = contracted - # Merge nodes which need to be contracted from self._variables_contracted_to - dependency_graph = networkx.relabel_nodes(dependency_graph, mapping) + return networkx.relabel_nodes(dependency_graph, mapping) + def create_variable_classes(self, dependency_graph): + """Create the variable classes based on the given dependency graph.""" while True: for u, v, _ in sorted(dependency_graph.edges(data="score"), key=lambda edge: edge[2], reverse=True): if u == v: # self loop @@ -384,29 +394,14 @@ def __init__(self, task, interference_graph: InterferenceGraph): for var in vars: self._variable_classes_handler.add_variable_to_class(var, i) - self.compute_new_name_for_each_variable() - - def _decorate_graph(self, dependency_graph: MultiDiGraph, path: str): - decorated_graph = MultiDiGraph() - for node in dependency_graph.nodes: - decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) - for u, v, data in dependency_graph.edges.data(): - decorated_graph.add_edge(u, v, label=f"{data['score']}") - for nodes in networkx.weakly_connected_components(dependency_graph): - for node_1, node_2 in combinations(nodes, 2): - if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): - decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") - - DecoratedGraph(decorated_graph).export_plot(path, type="svg") - def _variables_can_have_same_name(self, source: tuple[Variable, ...], sink: tuple[Variable, ...]) -> bool: """ - Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they + Two sets of variables can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they do not interfere. :param source: The potential source vertex. :param sink: The potential sink vertex - :return: True, if the given variables can have the same name, and false otherwise. + :return: True, if the given sets of variables can have the same name, and false otherwise. """ if ( self.interference_graph.are_interfering(*(source + sink)) @@ -417,3 +412,16 @@ def _variables_can_have_same_name(self, source: tuple[Variable, ...], sink: tupl if source[0].is_aliased and sink[0].is_aliased and source[0].name != sink[0].name: return False return True + + def _decorate_graph(self, dependency_graph: MultiDiGraph) -> DecoratedGraph: + decorated_graph = MultiDiGraph() + for node in dependency_graph.nodes: + decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) + for u, v, data in dependency_graph.edges.data(): + decorated_graph.add_edge(u, v, label=f"{data['score']}") + for nodes in networkx.weakly_connected_components(dependency_graph): + for node_1, node_2 in combinations(nodes, 2): + if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): + decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") + + return DecoratedGraph(decorated_graph) From bb16b0b96dd17c42d07021efab5fdbb9a63833d6 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 22 May 2024 15:22:40 +0200 Subject: [PATCH 13/25] Move dependency graph decoration function --- decompiler/pipeline/ssa/dependency_graph.py | 19 +++++++++++++++++++ decompiler/pipeline/ssa/variable_renaming.py | 13 ------------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index 51576b603..622776219 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -1,12 +1,31 @@ +import itertools +from itertools import combinations from typing import Iterator +import networkx from decompiler.structures.graphs.cfg import ControlFlowGraph +from decompiler.structures.interferencegraph import InterferenceGraph from decompiler.structures.pseudo import Expression, Operation, OperationType from decompiler.structures.pseudo.expressions import Variable from decompiler.structures.pseudo.instructions import Assignment +from decompiler.util.decoration import DecoratedGraph from networkx import MultiDiGraph +def _decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: + decorated_graph = MultiDiGraph() + for node in dependency_graph.nodes: + decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) + for u, v, data in dependency_graph.edges.data(): + decorated_graph.add_edge(u, v, label=f"{data['score']}") + for nodes in networkx.weakly_connected_components(dependency_graph): + for node_1, node_2 in combinations(nodes, 2): + if any(interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): + decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") + + return DecoratedGraph(decorated_graph) + + def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph: """ Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 55631c037..c20582107 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -412,16 +412,3 @@ def _variables_can_have_same_name(self, source: tuple[Variable, ...], sink: tupl if source[0].is_aliased and sink[0].is_aliased and source[0].name != sink[0].name: return False return True - - def _decorate_graph(self, dependency_graph: MultiDiGraph) -> DecoratedGraph: - decorated_graph = MultiDiGraph() - for node in dependency_graph.nodes: - decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) - for u, v, data in dependency_graph.edges.data(): - decorated_graph.add_edge(u, v, label=f"{data['score']}") - for nodes in networkx.weakly_connected_components(dependency_graph): - for node_1, node_2 in combinations(nodes, 2): - if any(self.interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): - decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") - - return DecoratedGraph(decorated_graph) From 39f36bfdf9608a3de029d5232ca6686a372e6139 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 29 May 2024 15:49:24 +0200 Subject: [PATCH 14/25] Add docstring to _decorate_dependency_graph --- decompiler/pipeline/ssa/dependency_graph.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index 622776219..ba9b84447 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -13,6 +13,14 @@ def _decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: + """ + Creates a decorated graph from the given dependency and interference graphs. + + This function constructs a new graph where: + - Variables are represented as nodes. + - Dependencies between variables are represented as directed edges. + - Interferences between variables are represented as red, undirected edges. + """ decorated_graph = MultiDiGraph() for node in dependency_graph.nodes: decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) From 5a04d230db8c3103a0decfd74b6fdaddda647369 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 29 May 2024 15:50:19 +0200 Subject: [PATCH 15/25] Add docstring to _collect_variables --- decompiler/pipeline/ssa/dependency_graph.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index ba9b84447..e741079a8 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -55,6 +55,9 @@ def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph: def _collect_variables(cfg: ControlFlowGraph) -> Iterator[Variable]: + """ + Yields all variables contained in the given control flow graph. + """ for instruction in cfg.instructions: for subexpression in instruction.subexpressions(): if isinstance(subexpression, Variable): From 2e22e65f0b7477f778b860c6eb0e0d9d73928ad5 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 29 May 2024 15:52:02 +0200 Subject: [PATCH 16/25] Add docstring to _expression_dependencies --- decompiler/pipeline/ssa/dependency_graph.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index e741079a8..1d8789856 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -72,6 +72,15 @@ def _assignments_in_cfg(cfg: ControlFlowGraph) -> Iterator[Assignment]: def _expression_dependencies(expression: Expression) -> dict[Variable, float]: + """ + Calculate the dependencies of an expression in terms of its constituent variables. + + This function analyzes the given `expression` and returns a dictionary mapping each + `Variable` to a float score representing its contribution or dependency weight within + the expression. + The scoring mechanism accounts for different types of operations and + penalizes nested operations to reflect their complexity. + """ match expression: case Variable(): return {expression: 1.0} From 1b03a61b0281d229029dda577bd1e5cdc9a326e7 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 29 May 2024 16:25:10 +0200 Subject: [PATCH 17/25] Add docstring to _generate_renaming_map --- decompiler/pipeline/ssa/variable_renaming.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index c20582107..6294142d8 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -357,6 +357,16 @@ def __init__(self, task, interference_graph: InterferenceGraph): self._generate_renaming_map(task.graph) def _generate_renaming_map(self, cfg: ControlFlowGraph): + """ + Generate the renaming map for SSA variables. + + This function constructs a dependency graph from the given CFG, merges contracted variables, + creates variable classes, and computes new names for each variable. The process ensures that + only variables with specific relationships can share the same name, as determined by the + dependency graph. + + :param cfg: The control flow graph from which the dependency graph is derived. + """ dependency_graph = dependency_graph_from_cfg(cfg) dependency_graph = self.merge_contracted_variables(dependency_graph) From 7d9ec34828ac5f74df38b5b2e4a085646f31be89 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:11:01 +0200 Subject: [PATCH 18/25] Fix debug decorating of dependency graph --- decompiler/pipeline/ssa/dependency_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index 1d8789856..d83a2b64d 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -12,7 +12,7 @@ from networkx import MultiDiGraph -def _decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: +def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: """ Creates a decorated graph from the given dependency and interference graphs. @@ -25,7 +25,7 @@ def _decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_grap for node in dependency_graph.nodes: decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) for u, v, data in dependency_graph.edges.data(): - decorated_graph.add_edge(u, v, label=f"{data['score']}") + decorated_graph.add_edge(hash(u), hash(v), label=f"{data['score']}") for nodes in networkx.weakly_connected_components(dependency_graph): for node_1, node_2 in combinations(nodes, 2): if any(interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): From f5050c0fe2f0005b19e3f790546cf1c16d2ed4e1 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:12:00 +0200 Subject: [PATCH 19/25] Merge parallel edges in conditional renaming --- decompiler/pipeline/ssa/variable_renaming.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 6294142d8..42f0c8ce1 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -1,6 +1,5 @@ """Module for renaming variables in Out of SSA.""" -import itertools import logging from collections import defaultdict from dataclasses import dataclass, field @@ -16,7 +15,6 @@ from decompiler.structures.pseudo.instructions import BaseAssignment, Instruction, Relation from decompiler.structures.pseudo.typing import Type from decompiler.task import DecompilerTask -from decompiler.util.decoration import DecoratedGraph from decompiler.util.insertion_ordered_set import InsertionOrderedSet from decompiler.util.lexicographical_bfs import LexicographicalBFS from networkx import Graph, MultiDiGraph, connected_components @@ -373,7 +371,7 @@ def _generate_renaming_map(self, cfg: ControlFlowGraph): self.create_variable_classes(dependency_graph) self.compute_new_name_for_each_variable() - def merge_contracted_variables(self, dependency_graph): + def merge_contracted_variables(self, dependency_graph: MultiDiGraph): """Merge nodes which need to be contracted from self._variables_contracted_to""" mapping: dict[tuple[Variable], tuple[Variable, ...]] = {} for variable in self.interference_graph.nodes(): @@ -383,10 +381,15 @@ def merge_contracted_variables(self, dependency_graph): return networkx.relabel_nodes(dependency_graph, mapping) - def create_variable_classes(self, dependency_graph): + def create_variable_classes(self, dependency_graph: MultiDiGraph): """Create the variable classes based on the given dependency graph.""" while True: - for u, v, _ in sorted(dependency_graph.edges(data="score"), key=lambda edge: edge[2], reverse=True): + merged_edges: dict[frozenset[tuple[Variable, ...]], float] = defaultdict(lambda: 0) + for u, v, score in dependency_graph.edges(data="score"): + if u != v: + merged_edges[frozenset([u, v])] += score + + for (u, v), _ in sorted(merged_edges.items(), key=lambda edge: edge[1], reverse=True): if u == v: # self loop continue if not self._variables_can_have_same_name(u, v): From 8e7a65c0dd1c7d2573d5a4e59c412a49228002df Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 5 Jun 2024 17:25:12 +0200 Subject: [PATCH 20/25] Extract operation penalty in dependency_graph and change to 0.9 --- decompiler/pipeline/ssa/dependency_graph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index d83a2b64d..a2e962baf 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -11,6 +11,9 @@ from decompiler.util.decoration import DecoratedGraph from networkx import MultiDiGraph +# Multiplicative constant applied to dependency scores when encountering operations, to penalize too much nesting. +operation_penalty = 0.9 + def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: """ @@ -99,7 +102,7 @@ def _expression_dependencies(expression: Expression) -> dict[Variable, float]: for var in deps: score = deps[var] score /= len(operands_dependencies) - score *= 0.5 # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) + score *= operation_penalty # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) if var not in dependencies: dependencies[var] = score From 37eb715f2bd7c7fd7a13e266a4b825362ebc72ad Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 6 Jun 2024 11:06:51 +0200 Subject: [PATCH 21/25] Update replace variable code in variable_renaming.py --- decompiler/pipeline/ssa/variable_renaming.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/decompiler/pipeline/ssa/variable_renaming.py b/decompiler/pipeline/ssa/variable_renaming.py index 42f0c8ce1..8280cacb6 100644 --- a/decompiler/pipeline/ssa/variable_renaming.py +++ b/decompiler/pipeline/ssa/variable_renaming.py @@ -124,10 +124,11 @@ def rename(self): def _replace_variable_in_instruction(self, variable: Variable, instruction: Instruction) -> None: """Replace the given variable in the given instruction""" - if variable.ssa_label is None: + if variable not in self.renaming_map: return replacement_variable = self.renaming_map[variable].copy() - replacement_variable.ssa_name = variable.copy() + if variable.ssa_label is not None: + replacement_variable.ssa_name = variable.copy() instruction.substitute(variable, replacement_variable) if isinstance(instruction, Relation): instruction.rename(variable, replacement_variable) From c77738d0384204947f24b5f99e7453d68645b9ff Mon Sep 17 00:00:00 2001 From: Rihi <19492038+rihi@users.noreply.github.com> Date: Fri, 7 Jun 2024 10:06:53 +0200 Subject: [PATCH 22/25] Fix constant naming in dependency_graph.py Co-authored-by: ebehner --- decompiler/pipeline/ssa/dependency_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index a2e962baf..e8f0c9367 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -12,7 +12,7 @@ from networkx import MultiDiGraph # Multiplicative constant applied to dependency scores when encountering operations, to penalize too much nesting. -operation_penalty = 0.9 +OPERATION_PENALTY = 0.9 def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: From 5d34076d29717067d58051a7b0e6a06bfa735c5a Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Fri, 7 Jun 2024 10:14:34 +0200 Subject: [PATCH 23/25] Fix renaming change --- decompiler/pipeline/ssa/dependency_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/pipeline/ssa/dependency_graph.py b/decompiler/pipeline/ssa/dependency_graph.py index e8f0c9367..941f67f05 100644 --- a/decompiler/pipeline/ssa/dependency_graph.py +++ b/decompiler/pipeline/ssa/dependency_graph.py @@ -102,7 +102,7 @@ def _expression_dependencies(expression: Expression) -> dict[Variable, float]: for var in deps: score = deps[var] score /= len(operands_dependencies) - score *= operation_penalty # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) + score *= OPERATION_PENALTY # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) if var not in dependencies: dependencies[var] = score From b9785e1aec5d72c1a627ce69f85ab29b9a416eda Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:28:36 +0200 Subject: [PATCH 24/25] Add test for parallel edges --- .../pipeline/SSA/test_out_of_ssa_renaming.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/pipeline/SSA/test_out_of_ssa_renaming.py b/tests/pipeline/SSA/test_out_of_ssa_renaming.py index 499a1d9d8..f0580da28 100644 --- a/tests/pipeline/SSA/test_out_of_ssa_renaming.py +++ b/tests/pipeline/SSA/test_out_of_ssa_renaming.py @@ -10,6 +10,7 @@ VariableRenamer, ) from decompiler.structures.interferencegraph import InterferenceGraph +from decompiler.structures.pseudo import Expression, Float, GlobalVariable from tests.pipeline.SSA.utils_out_of_ssa_tests import * @@ -854,3 +855,43 @@ def test_conditional_renaming(): orig_variables[2]: new_variables[2], orig_variables[3]: new_variables[1], } + +def test_conditional_parallel_edges(): + """ + Test that conditional renaming prioritizes paralles edges of single edges, whose sum of + weights is bigger than the weight of the single edge + """ + + def _v(name: str) -> Variable: + return Variable(name, Float.float()) + def _c(value: float) -> Constant: + return Constant(value, Float.float()) + def _op(exp: Expression) -> BinaryOperation: + return BinaryOperation(OperationType.plus_float, [exp, _c(0)]) + + cfg = ControlFlowGraph() + cfg.add_node(b1 := BasicBlock(1, [ + Assignment(_v("b"), _op(BinaryOperation(OperationType.plus_float, [_v("a0"), GlobalVariable("g0", Float.float(), _c(0))]))), + Assignment(_v("c"), _v("b")), + Assignment(_v("a1"), BinaryOperation(OperationType.plus_float, [_op(_v("b")), _v("c")])), + Assignment(_v("a0"), _v("a1")) # lifted phi function + ])) + cfg.add_node(b0 := BasicBlock(0, [ + # Phi(_v("a0"), [_c(0), _v("a1")], origin_block={b1: _v("a1")}), + Branch(Condition(OperationType.less, [_v("a0"), _c(100)])) + ])) + cfg.add_node(b2 := BasicBlock(2, [Return([])])) + + cfg.add_edge(TrueCase(b0, b1)) + cfg.add_edge(FalseCase(b0, b2)) + cfg.add_edge(UnconditionalEdge(b1, b0)) + + task = decompiler_task(cfg, SSAOptions.conditional) + interference_graph = InterferenceGraph(cfg) + renamer = ConditionalVariableRenamer(task, interference_graph) + + assert frozenset(frozenset(c) for c in renamer._variable_classes_handler.variable_class.values()) == frozenset({ + frozenset({GlobalVariable("g0", Float.float(), _c(0))}), + frozenset({_v("c")}), + frozenset({_v("a0"), _v("a1"), _v("b")}) + }) From 658ad3552f8e2ef4f18abe3e4a9ede2c52289989 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 19 Jun 2024 13:30:12 +0200 Subject: [PATCH 25/25] Run black to obfuscate code --- .../pipeline/SSA/test_out_of_ssa_renaming.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/pipeline/SSA/test_out_of_ssa_renaming.py b/tests/pipeline/SSA/test_out_of_ssa_renaming.py index f0580da28..bd24c15ee 100644 --- a/tests/pipeline/SSA/test_out_of_ssa_renaming.py +++ b/tests/pipeline/SSA/test_out_of_ssa_renaming.py @@ -856,6 +856,7 @@ def test_conditional_renaming(): orig_variables[3]: new_variables[1], } + def test_conditional_parallel_edges(): """ Test that conditional renaming prioritizes paralles edges of single edges, whose sum of @@ -864,22 +865,34 @@ def test_conditional_parallel_edges(): def _v(name: str) -> Variable: return Variable(name, Float.float()) + def _c(value: float) -> Constant: return Constant(value, Float.float()) + def _op(exp: Expression) -> BinaryOperation: return BinaryOperation(OperationType.plus_float, [exp, _c(0)]) cfg = ControlFlowGraph() - cfg.add_node(b1 := BasicBlock(1, [ - Assignment(_v("b"), _op(BinaryOperation(OperationType.plus_float, [_v("a0"), GlobalVariable("g0", Float.float(), _c(0))]))), - Assignment(_v("c"), _v("b")), - Assignment(_v("a1"), BinaryOperation(OperationType.plus_float, [_op(_v("b")), _v("c")])), - Assignment(_v("a0"), _v("a1")) # lifted phi function - ])) - cfg.add_node(b0 := BasicBlock(0, [ - # Phi(_v("a0"), [_c(0), _v("a1")], origin_block={b1: _v("a1")}), - Branch(Condition(OperationType.less, [_v("a0"), _c(100)])) - ])) + cfg.add_node( + b1 := BasicBlock( + 1, + [ + Assignment(_v("b"), _op(BinaryOperation(OperationType.plus_float, [_v("a0"), GlobalVariable("g0", Float.float(), _c(0))]))), + Assignment(_v("c"), _v("b")), + Assignment(_v("a1"), BinaryOperation(OperationType.plus_float, [_op(_v("b")), _v("c")])), + Assignment(_v("a0"), _v("a1")), # lifted phi function + ], + ) + ) + cfg.add_node( + b0 := BasicBlock( + 0, + [ + # Phi(_v("a0"), [_c(0), _v("a1")], origin_block={b1: _v("a1")}), + Branch(Condition(OperationType.less, [_v("a0"), _c(100)])) + ], + ) + ) cfg.add_node(b2 := BasicBlock(2, [Return([])])) cfg.add_edge(TrueCase(b0, b1)) @@ -890,8 +903,6 @@ def _op(exp: Expression) -> BinaryOperation: interference_graph = InterferenceGraph(cfg) renamer = ConditionalVariableRenamer(task, interference_graph) - assert frozenset(frozenset(c) for c in renamer._variable_classes_handler.variable_class.values()) == frozenset({ - frozenset({GlobalVariable("g0", Float.float(), _c(0))}), - frozenset({_v("c")}), - frozenset({_v("a0"), _v("a1"), _v("b")}) - }) + assert frozenset(frozenset(c) for c in renamer._variable_classes_handler.variable_class.values()) == frozenset( + {frozenset({GlobalVariable("g0", Float.float(), _c(0))}), frozenset({_v("c")}), frozenset({_v("a0"), _v("a1"), _v("b")})} + )