From 70d4374a617245c6b698559b6439da11e42b22f6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 17 Jan 2024 09:29:23 +0000 Subject: [PATCH 1/6] Create draft PR for #102 From 06a311565eda2e08f11279a5e9db0cd92e6d2886 Mon Sep 17 00:00:00 2001 From: fnhartmann Date: Thu, 18 Jan 2024 11:49:48 +0100 Subject: [PATCH 2/6] Simplify branches with true/false conditions over config option --- decompiler/backend/codevisitor.py | 9 +++++++++ decompiler/util/default.json | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/decompiler/backend/codevisitor.py b/decompiler/backend/codevisitor.py index 66058b239..59441f3fb 100644 --- a/decompiler/backend/codevisitor.py +++ b/decompiler/backend/codevisitor.py @@ -39,6 +39,7 @@ def __init__(self, task: DecompilerTask): self._int_repr_scope: int = task.options.getint("code-generator.int_representation_scope", fallback=256) self._neg_hex_as_twos_complement: bool = task.options.getboolean("code-generator.negative_hex_as_twos_complement", fallback=True) self._aggressive_array_detection: bool = task.options.getboolean("code-generator.aggressive_array_detection", fallback=False) + self._simplify_branches: bool = task.options.getboolean("code-generator.simplify_branches") self.task = task def visit_seq_node(self, node: ast_nodes.SeqNode) -> str: @@ -68,9 +69,17 @@ def visit_loop_node(self, node: ast_nodes.LoopNode) -> str: def visit_condition_node(self, node: ast_nodes.ConditionNode) -> str: """Generate code for a conditional.""" true_str = self.visit(node.true_branch_child) + if self._simplify_branches and node.condition.is_true: + return true_str if node.false_branch is None: + if self._simplify_branches and (node.condition.is_false or not true_str): + return "" return f"if ({self._condition_string(node.condition)}) {{{true_str}}}" false_str = self.visit(node.false_branch_child) + if self._simplify_branches and node.condition.is_false: + return false_str + if self._simplify_branches and not false_str: + return f"if ({self._condition_string(node.condition)}) {{{true_str}}}" if isinstance(node.false_branch_child, ast_nodes.ConditionNode): return f"if ({self._condition_string(node.condition)}){{{true_str}}} else {false_str}" return f"if ({self._condition_string(node.condition)}){{{true_str}}} else{{{false_str}}}" diff --git a/decompiler/util/default.json b/decompiler/util/default.json index 25f5be1c9..905f826e7 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -418,6 +418,16 @@ "is_hidden_from_cli": false, "argument_name": "--variable-declarations-per-line" }, + { + "dest": "code-generator.simplify_branches", + "default": false, + "title": "Simplify branches with true or false conditions", + "type": "boolean", + "description": "Removes branches in the output that wont be reached because of a 'true' or 'false' condition", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--simplify-branches" + }, { "dest": "pattern-independent-restructuring.switch_reconstruction", "default": true, From 31c647058c50e1d04fed2f83e3774917c20c1a8b Mon Sep 17 00:00:00 2001 From: fnhartmann Date: Thu, 18 Jan 2024 11:50:32 +0100 Subject: [PATCH 3/6] Tests for simplified branches --- tests/backend/test_codegenerator.py | 104 +++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 2 deletions(-) diff --git a/tests/backend/test_codegenerator.py b/tests/backend/test_codegenerator.py index 234c049a6..1d096a922 100644 --- a/tests/backend/test_codegenerator.py +++ b/tests/backend/test_codegenerator.py @@ -62,6 +62,11 @@ def true_condition(context=None): return LogicCondition.initialize_true(context) +def false_condition(context=None): + context = LogicCondition.generate_new_context() if context is None else context + return LogicCondition.initialize_false(context) + + def logic_cond(name: str, context) -> LogicCondition: return LogicCondition.initialize_symbol(name, context) @@ -99,6 +104,7 @@ def _generate_options( twos_complement: bool = True, array_detection: bool = False, var_declarations_per_line: int = 1, + simplify_branches: bool = False, ): options = Options() options.set("code-generator.max_complexity", max_complx) @@ -111,15 +117,15 @@ def _generate_options( options.set("code-generator.negative_hex_as_twos_complement", twos_complement) options.set("code-generator.aggressive_array_detection", array_detection) options.set("code-generator.variable_declarations_per_line", var_declarations_per_line) + options.set("code-generator.simplify_branches", simplify_branches) return options class TestCodeGeneration: @staticmethod - def _task(ast: AbstractSyntaxTree, params: List[DataflowObject] = None, return_type: Type = int32): + def _task(ast: AbstractSyntaxTree, params: List[DataflowObject] = None, return_type: Type = int32, options: Options = _generate_options(max_complx=100, compounding=False)): if not params: params = [] - options = _generate_options(max_complx=100, compounding=False) return DecompilerTask("test_function", None, ast=ast, options=options, function_parameters=params, function_return_type=return_type) @staticmethod @@ -236,6 +242,100 @@ def test_function_with_true_condition(self): self._task(ast, params=[var_a.copy(), var_b.copy()]), ) + def test_function_with_simplified_true_condition(self): + """ + if(true){ + c = 5 + return c + } + """ + context = LogicCondition.generate_new_context() + root = SeqNode(LogicCondition.initialize_true(context)) + ast = AbstractSyntaxTree(root, {x1_symbol(context): Condition(OperationType.less, [var_c.copy(), const_5.copy()])}) + seq_node = ast.factory.create_seq_node() + ast._add_node(seq_node) + code_node = ast._add_code_node([instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])]) + condition_node = ast._add_condition_node_with(condition=true_condition(ast.factory.logic_context), true_branch=seq_node) + ast._add_edges_from(((root, condition_node), (seq_node, code_node))) + assert self._regex_matches( + r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%c%=%5%;%return%c%;%}%$".replace("%", "\\s*"), + self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + ) + + def test_function_with_simplified_false_condition(self): + """ + if(false){ + c = 5 + return c + } else { + return 0 + } + """ + context = LogicCondition.generate_new_context() + root = SeqNode(LogicCondition.initialize_true(context)) + ast = AbstractSyntaxTree(root, {x1_symbol(context): Condition(OperationType.less, [var_c.copy(), const_5.copy()])}) + true_seq_node = ast.factory.create_seq_node() + ast._add_node(true_seq_node) + true_code_node = ast._add_code_node([instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])]) + false_seq_node = ast.factory.create_seq_node() + ast._add_node(false_seq_node) + false_code_node = ast._add_code_node([instructions.Return([const_0.copy()])]) + condition_node = ast._add_condition_node_with(condition=false_condition(ast.factory.logic_context), true_branch=true_seq_node, false_branch=false_seq_node) + ast._add_edges_from(((root, condition_node), (true_seq_node, true_code_node), (false_seq_node, false_code_node))) + assert self._regex_matches( + r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%return%0%;%}%$".replace("%", "\\s*"), + self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + ) + + def test_function_with_simplified_false_condition_in_true_branch(self): + """ + if(a == 5){ + if(false){ + c = 5 + return c + } + } + """ + context = LogicCondition.generate_new_context() + root = SeqNode(LogicCondition.initialize_true(context)) + ast = AbstractSyntaxTree(root, {x1_symbol(context): Condition(OperationType.less, [var_c.copy(), const_5.copy()])}) + seq_node = ast.factory.create_seq_node() + ast._add_node(seq_node) + code_node = ast._add_code_node([instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])]) + false_condition_node = ast._add_condition_node_with(condition=false_condition(ast.factory.logic_context), true_branch=seq_node) + condition_node = ast._add_condition_node_with(condition=x1_symbol(ast.factory.logic_context), true_branch=false_condition_node) + ast._add_edges_from(((root, condition_node), (seq_node, code_node))) + assert self._regex_matches( + r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%}%$".replace("%", "\\s*"), + self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + ) + + def test_function_with_simplified_false_condition_in_false_branch(self): + """ + if(a == 5){ + return 0 + } else { + if(false){ + c = 5 + return c + } + } + """ + context = LogicCondition.generate_new_context() + root = SeqNode(LogicCondition.initialize_true(context)) + ast = AbstractSyntaxTree(root, {x1_symbol(context): Condition(OperationType.less, [var_c.copy(), const_5.copy()])}) + seq_node = ast.factory.create_seq_node() + ast._add_node(seq_node) + false_condition_code_node = ast._add_code_node([instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])]) + false_condition_node = ast._add_condition_node_with(condition=false_condition(ast.factory.logic_context), true_branch=seq_node) + code_node = ast._add_code_node([instructions.Return([const_0.copy()])]) + condition_node = ast._add_condition_node_with(condition=x1_symbol(ast.factory.logic_context), true_branch=code_node, false_branch=false_condition_node) + ast._add_edges_from(((root, condition_node), (seq_node, false_condition_code_node))) + assert self._regex_matches( + r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%if%\(%c%<%5%\)%{%return%0%;%}%}%$".replace("%", "\\s*"), + self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + ) + def test_function_with_ifelse(self): context = LogicCondition.generate_new_context() root = SeqNode(LogicCondition.initialize_true(context)) From ae6c92f6cfc1fa148ddd447836177b051cd80fea Mon Sep 17 00:00:00 2001 From: fnhartmann Date: Thu, 18 Jan 2024 11:54:21 +0100 Subject: [PATCH 4/6] Black formatting --- tests/backend/test_codegenerator.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/backend/test_codegenerator.py b/tests/backend/test_codegenerator.py index 1d096a922..a21df0b13 100644 --- a/tests/backend/test_codegenerator.py +++ b/tests/backend/test_codegenerator.py @@ -123,7 +123,12 @@ def _generate_options( class TestCodeGeneration: @staticmethod - def _task(ast: AbstractSyntaxTree, params: List[DataflowObject] = None, return_type: Type = int32, options: Options = _generate_options(max_complx=100, compounding=False)): + def _task( + ast: AbstractSyntaxTree, + params: List[DataflowObject] = None, + return_type: Type = int32, + options: Options = _generate_options(max_complx=100, compounding=False), + ): if not params: params = [] return DecompilerTask("test_function", None, ast=ast, options=options, function_parameters=params, function_return_type=return_type) @@ -280,7 +285,9 @@ def test_function_with_simplified_false_condition(self): false_seq_node = ast.factory.create_seq_node() ast._add_node(false_seq_node) false_code_node = ast._add_code_node([instructions.Return([const_0.copy()])]) - condition_node = ast._add_condition_node_with(condition=false_condition(ast.factory.logic_context), true_branch=true_seq_node, false_branch=false_seq_node) + condition_node = ast._add_condition_node_with( + condition=false_condition(ast.factory.logic_context), true_branch=true_seq_node, false_branch=false_seq_node + ) ast._add_edges_from(((root, condition_node), (true_seq_node, true_code_node), (false_seq_node, false_code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%return%0%;%}%$".replace("%", "\\s*"), @@ -326,10 +333,14 @@ def test_function_with_simplified_false_condition_in_false_branch(self): ast = AbstractSyntaxTree(root, {x1_symbol(context): Condition(OperationType.less, [var_c.copy(), const_5.copy()])}) seq_node = ast.factory.create_seq_node() ast._add_node(seq_node) - false_condition_code_node = ast._add_code_node([instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])]) + false_condition_code_node = ast._add_code_node( + [instructions.Assignment(var_c.copy(), const_5.copy()), instructions.Return([var_c.copy()])] + ) false_condition_node = ast._add_condition_node_with(condition=false_condition(ast.factory.logic_context), true_branch=seq_node) code_node = ast._add_code_node([instructions.Return([const_0.copy()])]) - condition_node = ast._add_condition_node_with(condition=x1_symbol(ast.factory.logic_context), true_branch=code_node, false_branch=false_condition_node) + condition_node = ast._add_condition_node_with( + condition=x1_symbol(ast.factory.logic_context), true_branch=code_node, false_branch=false_condition_node + ) ast._add_edges_from(((root, condition_node), (seq_node, false_condition_code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%if%\(%c%<%5%\)%{%return%0%;%}%}%$".replace("%", "\\s*"), From 48b3188e8e334415f2ff41f7e299ee2cf748eb17 Mon Sep 17 00:00:00 2001 From: fnhartmann Date: Thu, 18 Jan 2024 12:07:01 +0100 Subject: [PATCH 5/6] Set fallback --- decompiler/backend/codevisitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/backend/codevisitor.py b/decompiler/backend/codevisitor.py index 59441f3fb..edb826897 100644 --- a/decompiler/backend/codevisitor.py +++ b/decompiler/backend/codevisitor.py @@ -39,7 +39,7 @@ def __init__(self, task: DecompilerTask): self._int_repr_scope: int = task.options.getint("code-generator.int_representation_scope", fallback=256) self._neg_hex_as_twos_complement: bool = task.options.getboolean("code-generator.negative_hex_as_twos_complement", fallback=True) self._aggressive_array_detection: bool = task.options.getboolean("code-generator.aggressive_array_detection", fallback=False) - self._simplify_branches: bool = task.options.getboolean("code-generator.simplify_branches") + self._simplify_branches: bool = task.options.getboolean("code-generator.simplify_branches", fallback=False) self.task = task def visit_seq_node(self, node: ast_nodes.SeqNode) -> str: From d1ab68af0ae5ea9626968ec3fc7df172dcbc5272 Mon Sep 17 00:00:00 2001 From: fnhartmann Date: Mon, 29 Jan 2024 11:13:25 +0100 Subject: [PATCH 6/6] Set option to true --- decompiler/backend/codevisitor.py | 2 +- decompiler/util/default.json | 2 +- tests/backend/test_codegenerator.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/decompiler/backend/codevisitor.py b/decompiler/backend/codevisitor.py index edb826897..8f9e26f5e 100644 --- a/decompiler/backend/codevisitor.py +++ b/decompiler/backend/codevisitor.py @@ -39,7 +39,7 @@ def __init__(self, task: DecompilerTask): self._int_repr_scope: int = task.options.getint("code-generator.int_representation_scope", fallback=256) self._neg_hex_as_twos_complement: bool = task.options.getboolean("code-generator.negative_hex_as_twos_complement", fallback=True) self._aggressive_array_detection: bool = task.options.getboolean("code-generator.aggressive_array_detection", fallback=False) - self._simplify_branches: bool = task.options.getboolean("code-generator.simplify_branches", fallback=False) + self._simplify_branches: bool = task.options.getboolean("code-generator.simplify_branches", fallback=True) self.task = task def visit_seq_node(self, node: ast_nodes.SeqNode) -> str: diff --git a/decompiler/util/default.json b/decompiler/util/default.json index 905f826e7..c195dda26 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -420,7 +420,7 @@ }, { "dest": "code-generator.simplify_branches", - "default": false, + "default": true, "title": "Simplify branches with true or false conditions", "type": "boolean", "description": "Removes branches in the output that wont be reached because of a 'true' or 'false' condition", diff --git a/tests/backend/test_codegenerator.py b/tests/backend/test_codegenerator.py index a21df0b13..e1a495273 100644 --- a/tests/backend/test_codegenerator.py +++ b/tests/backend/test_codegenerator.py @@ -104,7 +104,7 @@ def _generate_options( twos_complement: bool = True, array_detection: bool = False, var_declarations_per_line: int = 1, - simplify_branches: bool = False, + simplify_branches: bool = True, ): options = Options() options.set("code-generator.max_complexity", max_complx) @@ -244,7 +244,7 @@ def test_function_with_true_condition(self): ast._add_edges_from(((root, condition_node), (seq_node, code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%if%\(%true%\)%{%c%=%5%;%return%c%;%}%}%$".replace("%", "\\s*"), - self._task(ast, params=[var_a.copy(), var_b.copy()]), + self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=False)), ) def test_function_with_simplified_true_condition(self): @@ -264,7 +264,7 @@ def test_function_with_simplified_true_condition(self): ast._add_edges_from(((root, condition_node), (seq_node, code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%c%=%5%;%return%c%;%}%$".replace("%", "\\s*"), - self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + self._task(ast, params=[var_a.copy(), var_b.copy()]), ) def test_function_with_simplified_false_condition(self): @@ -291,7 +291,7 @@ def test_function_with_simplified_false_condition(self): ast._add_edges_from(((root, condition_node), (true_seq_node, true_code_node), (false_seq_node, false_code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%return%0%;%}%$".replace("%", "\\s*"), - self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + self._task(ast, params=[var_a.copy(), var_b.copy()]), ) def test_function_with_simplified_false_condition_in_true_branch(self): @@ -314,7 +314,7 @@ def test_function_with_simplified_false_condition_in_true_branch(self): ast._add_edges_from(((root, condition_node), (seq_node, code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%}%$".replace("%", "\\s*"), - self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + self._task(ast, params=[var_a.copy(), var_b.copy()]), ) def test_function_with_simplified_false_condition_in_false_branch(self): @@ -344,7 +344,7 @@ def test_function_with_simplified_false_condition_in_false_branch(self): ast._add_edges_from(((root, condition_node), (seq_node, false_condition_code_node))) assert self._regex_matches( r"^%int +test_function\(%int +a%,%int +b%\)%{%int%c;%if%\(%c%<%5%\)%{%return%0%;%}%}%$".replace("%", "\\s*"), - self._task(ast, params=[var_a.copy(), var_b.copy()], options=_generate_options(simplify_branches=True)), + self._task(ast, params=[var_a.copy(), var_b.copy()]), ) def test_function_with_ifelse(self):