-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into refactoring_switch_node_handler
- Loading branch information
Showing
12 changed files
with
590 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,114 @@ | ||
from typing import Iterable, List, Optional, Set | ||
import itertools | ||
from itertools import combinations | ||
from typing import Iterator | ||
|
||
import networkx | ||
from decompiler.structures.graphs.cfg import ControlFlowGraph | ||
from decompiler.structures.interferencegraph import InterferenceGraph | ||
from decompiler.structures.pseudo import Expression, Operation, OperationType | ||
from decompiler.structures.pseudo.expressions import Variable | ||
from decompiler.structures.pseudo.instructions import Assignment | ||
from decompiler.structures.pseudo.operations import Call | ||
from networkx import DiGraph, weakly_connected_components | ||
from decompiler.util.decoration import DecoratedGraph | ||
from networkx import MultiDiGraph | ||
|
||
# Multiplicative constant applied to dependency scores when encountering operations, to penalize too much nesting. | ||
OPERATION_PENALTY = 0.9 | ||
|
||
def _non_call_assignments(cfg: ControlFlowGraph) -> Iterable[Assignment]: | ||
|
||
def decorate_dependency_graph(dependency_graph: MultiDiGraph, interference_graph: InterferenceGraph) -> DecoratedGraph: | ||
""" | ||
Creates a decorated graph from the given dependency and interference graphs. | ||
This function constructs a new graph where: | ||
- Variables are represented as nodes. | ||
- Dependencies between variables are represented as directed edges. | ||
- Interferences between variables are represented as red, undirected edges. | ||
""" | ||
decorated_graph = MultiDiGraph() | ||
for node in dependency_graph.nodes: | ||
decorated_graph.add_node(hash(node), label="\n".join(map(lambda n: f"{n}: {n.type}, aliased: {n.is_aliased}", node))) | ||
for u, v, data in dependency_graph.edges.data(): | ||
decorated_graph.add_edge(hash(u), hash(v), label=f"{data['score']}") | ||
for nodes in networkx.weakly_connected_components(dependency_graph): | ||
for node_1, node_2 in combinations(nodes, 2): | ||
if any(interference_graph.has_edge(pair[0], pair[1]) for pair in itertools.product(node_1, node_2)): | ||
decorated_graph.add_edge(hash(node_1), hash(node_2), color="red", dir="none") | ||
|
||
return DecoratedGraph(decorated_graph) | ||
|
||
|
||
def dependency_graph_from_cfg(cfg: ControlFlowGraph) -> MultiDiGraph: | ||
""" | ||
Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. | ||
- Add an edge the definition to at most one requirement for each instruction. | ||
- All variables that where not defined via Phi-functions before have out-degree of at most 1, because they are defined at most once. | ||
- Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function. | ||
""" | ||
dependency_graph = MultiDiGraph() | ||
|
||
for variable in _collect_variables(cfg): | ||
dependency_graph.add_node((variable,)) | ||
for instruction in _assignments_in_cfg(cfg): | ||
defined_variables = instruction.definitions | ||
for used_variable, score in _expression_dependencies(instruction.value).items(): | ||
if score > 0: | ||
dependency_graph.add_edges_from((((dvar,), (used_variable,)) for dvar in defined_variables), score=score) | ||
|
||
return dependency_graph | ||
|
||
|
||
def _collect_variables(cfg: ControlFlowGraph) -> Iterator[Variable]: | ||
""" | ||
Yields all variables contained in the given control flow graph. | ||
""" | ||
for instruction in cfg.instructions: | ||
for subexpression in instruction.subexpressions(): | ||
if isinstance(subexpression, Variable): | ||
yield subexpression | ||
|
||
|
||
def _assignments_in_cfg(cfg: ControlFlowGraph) -> Iterator[Assignment]: | ||
"""Yield all interesting assignments for the dependency graph.""" | ||
for instr in cfg.instructions: | ||
if isinstance(instr, Assignment) and isinstance(instr.destination, Variable) and not isinstance(instr.value, Call): | ||
if isinstance(instr, Assignment): | ||
yield instr | ||
|
||
|
||
class DependencyGraph(DiGraph): | ||
def __init__(self, interference_graph: Optional[InterferenceGraph] = None): | ||
super().__init__() | ||
self.add_nodes_from(interference_graph.nodes) | ||
self.interference_graph = interference_graph | ||
|
||
@classmethod | ||
def from_cfg(cls, cfg: ControlFlowGraph, interference_graph: InterferenceGraph): | ||
""" | ||
Construct the dependency graph of the given CFG, i.e. adds an edge between two variables if they depend on each other. | ||
- Add an edge the definition to at most one requirement for each instruction. | ||
- All variables that where not defined via Phi-functions before have out-degree at most 1, because they are defined at most once | ||
- Variables that are defined via Phi-functions can have one successor for each required variable of the Phi-function. | ||
""" | ||
dependency_graph = cls(interference_graph) | ||
for instruction in _non_call_assignments(cfg): | ||
defined_variable = instruction.destination | ||
if isinstance(instruction.value, Variable): | ||
if dependency_graph._variables_can_have_same_name(defined_variable, instruction.value): | ||
dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="high") | ||
elif len(instruction.requirements) == 1: | ||
if dependency_graph._variables_can_have_same_name(defined_variable, instruction.requirements[0]): | ||
dependency_graph.add_edge(defined_variable, instruction.requirements[0], strength="medium") | ||
else: | ||
if non_interfering_variable := dependency_graph._non_interfering_requirements(instruction.requirements, defined_variable): | ||
dependency_graph.add_edge(defined_variable, non_interfering_variable, strength="low") | ||
return dependency_graph | ||
|
||
def _non_interfering_requirements(self, requirements: List[Variable], defined_variable: Variable) -> Optional[Variable]: | ||
"""Get the unique non-interfering requirement if it exists, otherwise we return None.""" | ||
non_interfering_requirement = None | ||
for required_variable in requirements: | ||
if self._variables_can_have_same_name(defined_variable, required_variable): | ||
if non_interfering_requirement: | ||
return None | ||
non_interfering_requirement = required_variable | ||
return non_interfering_requirement | ||
|
||
def _variables_can_have_same_name(self, source: Variable, sink: Variable) -> bool: | ||
""" | ||
Two variable can have the same name, if they have the same type, are both aliased or both non-aliased variables, and if they | ||
do not interfere. | ||
:param source: The potential source vertex. | ||
:param sink: The potential sink vertex | ||
:return: True, if the given variables can have the same name, and false otherwise. | ||
""" | ||
if self.interference_graph.are_interfering(source, sink) or source.type != sink.type or source.is_aliased != sink.is_aliased: | ||
return False | ||
if source.is_aliased and sink.is_aliased and source.name != sink.name: | ||
return False | ||
return True | ||
|
||
def get_components(self) -> Iterable[Set[Variable]]: | ||
"""Returns the weakly connected components of the dependency graph.""" | ||
for component in weakly_connected_components(self): | ||
yield set(component) | ||
def _expression_dependencies(expression: Expression) -> dict[Variable, float]: | ||
""" | ||
Calculate the dependencies of an expression in terms of its constituent variables. | ||
This function analyzes the given `expression` and returns a dictionary mapping each | ||
`Variable` to a float score representing its contribution or dependency weight within | ||
the expression. | ||
The scoring mechanism accounts for different types of operations and | ||
penalizes nested operations to reflect their complexity. | ||
""" | ||
match expression: | ||
case Variable(): | ||
return {expression: 1.0} | ||
case Operation(): | ||
if expression.operation in { | ||
OperationType.call, | ||
OperationType.address, | ||
OperationType.dereference, | ||
OperationType.member_access, | ||
}: | ||
return {} | ||
|
||
operands_dependencies = list(filter(lambda d: d, (_expression_dependencies(operand) for operand in expression.operands))) | ||
dependencies: dict[Variable, float] = {} | ||
for deps in operands_dependencies: | ||
for var in deps: | ||
score = deps[var] | ||
score /= len(operands_dependencies) | ||
score *= OPERATION_PENALTY # penalize operations, so that expressions like (a + (a + (a + (a + a)))) gets a lower score than just (a) | ||
|
||
if var not in dependencies: | ||
dependencies[var] = score | ||
else: | ||
dependencies[var] += score | ||
|
||
return dependencies | ||
case _: | ||
return {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.