From e95f1620af3c5c7c0cb5083c6efcb433fcdabd25 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 31 Aug 2023 15:57:23 +0200 Subject: [PATCH 1/4] Assert that no duplicated DataflowObjects exist after any pipeline stage --- decompiler/pipeline/pipeline.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/decompiler/pipeline/pipeline.py b/decompiler/pipeline/pipeline.py index 37fe1f7ed..8fb748d8d 100644 --- a/decompiler/pipeline/pipeline.py +++ b/decompiler/pipeline/pipeline.py @@ -19,6 +19,7 @@ from decompiler.task import DecompilerTask from decompiler.util.decoration import DecoratedAST, DecoratedCFG +from ..structures.graphs.cfg import ControlFlowGraph from .default import AST_STAGES, CFG_STAGES from .stage import PipelineStage @@ -109,6 +110,9 @@ def run(self, task: DecompilerTask): raise e break + if debug_mode and task.graph is not None: + self._assert_no_cfg_duplicates(task.graph) + @staticmethod def _show_stage(task: DecompilerTask, stage_name: str, print_ascii: bool, show_in_tabs: bool): """Based on the task either an AST or a CFG is shown on the console (ASCII) and/or in BinaryNinja (FlowGraph) tabs.""" @@ -122,3 +126,14 @@ def _show_stage(task: DecompilerTask, stage_name: str, print_ascii: bool, show_i DecoratedCFG.print_ascii(task.graph, stage_name) if show_in_tabs: DecoratedCFG.show_flowgraph(task.graph, stage_name) + + @staticmethod + def _assert_no_cfg_duplicates(cfg: ControlFlowGraph): + encountered_ids: set[int] = set() + + for instruction in cfg.instructions: + for obj in instruction.subexpressions(): + if id(obj) in encountered_ids: + raise AssertionError(f"Found duplicated DataflowObject in cfg: {obj}") + + encountered_ids.add(id(obj)) From 4109e96288c4930e0c6c8fc0a5f1777022efa09f Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 7 Sep 2023 11:55:12 +0200 Subject: [PATCH 2/4] Add extra flag to toggle for dataflow dup validation --- decompiler/pipeline/pipeline.py | 3 ++- decompiler/util/default.json | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/decompiler/pipeline/pipeline.py b/decompiler/pipeline/pipeline.py index 8fb748d8d..176cc5e2b 100644 --- a/decompiler/pipeline/pipeline.py +++ b/decompiler/pipeline/pipeline.py @@ -87,6 +87,7 @@ def run(self, task: DecompilerTask): print_ascii = output_format == "ascii" or output_format == "ascii_and_tabs" show_in_tabs = output_format == "tabs" or output_format == "ascii_and_tabs" debug_mode = task.options.getboolean("pipeline.debug", fallback=False) + validate_no_dataflowobj_dup = task.options.getboolean("pipeline.validate_no_dataflowobj_dup", fallback=False) self.validate() @@ -110,7 +111,7 @@ def run(self, task: DecompilerTask): raise e break - if debug_mode and task.graph is not None: + if validate_no_dataflowobj_dup and task.graph is not None: self._assert_no_cfg_duplicates(task.graph) @staticmethod diff --git a/decompiler/util/default.json b/decompiler/util/default.json index ed48b1984..52dadbaf6 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -631,6 +631,16 @@ "is_hidden_from_cli": false, "argument_name": "--ast-stages" }, + { + "dest": "pipeline.validate_no_dataflowobj_dup", + "default": false, + "title": "Validate no DataflowObject duplication", + "type": "boolean", + "description": "Throw exception if duplicate DataflowObjects exist after any stage", + "is_hidden_from_gui": true, + "is_hidden_from_cli": false, + "argument_name": "--validate-no-dataflowobj-dup" + }, { "dest": "pipeline.debug", "default": false, From 89801379f46804b6bc53ac8009f961cf563918c0 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 7 Sep 2023 12:01:24 +0200 Subject: [PATCH 3/4] Add dataflow dup validation to ast --- decompiler/pipeline/pipeline.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/decompiler/pipeline/pipeline.py b/decompiler/pipeline/pipeline.py index 176cc5e2b..f929d35fc 100644 --- a/decompiler/pipeline/pipeline.py +++ b/decompiler/pipeline/pipeline.py @@ -19,7 +19,9 @@ from decompiler.task import DecompilerTask from decompiler.util.decoration import DecoratedAST, DecoratedCFG -from ..structures.graphs.cfg import ControlFlowGraph +from ..structures.ast.ast_nodes import CodeNode +from ..structures.ast.syntaxtree import AbstractSyntaxTree +from ..structures.pseudo import Instruction from .default import AST_STAGES, CFG_STAGES from .stage import PipelineStage @@ -111,8 +113,11 @@ def run(self, task: DecompilerTask): raise e break - if validate_no_dataflowobj_dup and task.graph is not None: - self._assert_no_cfg_duplicates(task.graph) + if validate_no_dataflowobj_dup: + if task.graph is not None: + self._assert_no_dataflow_duplicates(list(task.graph.instructions)) + if task.syntax_tree is not None: + self._assert_no_ast_duplicates(task.syntax_tree) @staticmethod def _show_stage(task: DecompilerTask, stage_name: str, print_ascii: bool, show_in_tabs: bool): @@ -129,10 +134,19 @@ def _show_stage(task: DecompilerTask, stage_name: str, print_ascii: bool, show_i DecoratedCFG.show_flowgraph(task.graph, stage_name) @staticmethod - def _assert_no_cfg_duplicates(cfg: ControlFlowGraph): + def _assert_no_ast_duplicates(ast: AbstractSyntaxTree): + instructions = [] + for node in ast.topological_order(): + if isinstance(node, CodeNode): + instructions.extend(node.instructions) + + DecompilerPipeline._assert_no_dataflow_duplicates(instructions) + + @staticmethod + def _assert_no_dataflow_duplicates(instructions: list[Instruction]): encountered_ids: set[int] = set() - for instruction in cfg.instructions: + for instruction in instructions: for obj in instruction.subexpressions(): if id(obj) in encountered_ids: raise AssertionError(f"Found duplicated DataflowObject in cfg: {obj}") From 7bc9412eb026093522e062e7cec3bceb219c45f5 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 7 Sep 2023 12:06:33 +0200 Subject: [PATCH 4/4] Rename flag --- decompiler/pipeline/pipeline.py | 4 ++-- decompiler/util/default.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/decompiler/pipeline/pipeline.py b/decompiler/pipeline/pipeline.py index f929d35fc..829efa2ce 100644 --- a/decompiler/pipeline/pipeline.py +++ b/decompiler/pipeline/pipeline.py @@ -89,7 +89,7 @@ def run(self, task: DecompilerTask): print_ascii = output_format == "ascii" or output_format == "ascii_and_tabs" show_in_tabs = output_format == "tabs" or output_format == "ascii_and_tabs" debug_mode = task.options.getboolean("pipeline.debug", fallback=False) - validate_no_dataflowobj_dup = task.options.getboolean("pipeline.validate_no_dataflowobj_dup", fallback=False) + validate_no_dataflow_dup = task.options.getboolean("pipeline.validate_no_dataflow_dup", fallback=False) self.validate() @@ -113,7 +113,7 @@ def run(self, task: DecompilerTask): raise e break - if validate_no_dataflowobj_dup: + if validate_no_dataflow_dup: if task.graph is not None: self._assert_no_dataflow_duplicates(list(task.graph.instructions)) if task.syntax_tree is not None: diff --git a/decompiler/util/default.json b/decompiler/util/default.json index 52dadbaf6..1fe8aabfd 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -632,14 +632,14 @@ "argument_name": "--ast-stages" }, { - "dest": "pipeline.validate_no_dataflowobj_dup", + "dest": "pipeline.validate_no_dataflow_dup", "default": false, "title": "Validate no DataflowObject duplication", "type": "boolean", "description": "Throw exception if duplicate DataflowObjects exist after any stage", "is_hidden_from_gui": true, "is_hidden_from_cli": false, - "argument_name": "--validate-no-dataflowobj-dup" + "argument_name": "--validate-no-dataflow-dup" }, { "dest": "pipeline.debug",