Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[For-Loop Reconstruction] Follow up: Not handling "continue" instructions correctly #326

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
99b75ba
Create draft PR for #325
fnhartmann Aug 30, 2023
d698580
Merge branch 'main' into issue-325-_For-Loop_Reconstruction_Follow_up…
fnhartmann Sep 14, 2023
3e392cc
Equalize last defintion in continue node and reconstruct for loop
fnhartmann Sep 18, 2023
635faae
Changed and added tests
fnhartmann Sep 20, 2023
6767cf7
Changed condition in remove_continuation + comments
fnhartmann Sep 20, 2023
b34d013
Syntax and other small changes
fnhartmann Oct 12, 2023
ec1ef9d
Added check for uniform variables in continuation and last definition
fnhartmann Oct 16, 2023
6997fd5
Moved check for uniform variables, added check for commutativity
fnhartmann Oct 18, 2023
fe90534
Unify instructions, applied changing cases
fnhartmann Nov 13, 2023
42782bd
Merge branch 'main' into issue-325-_For-Loop_Reconstruction_Follow_up…
fnhartmann Nov 14, 2023
fd1076f
Moved methods in new structure
fnhartmann Nov 14, 2023
2e2fdf2
Clean up tests for new structure
fnhartmann Nov 14, 2023
560b5d0
Changed current tests to new recovery output
fnhartmann Nov 14, 2023
72be886
Black formatting
fnhartmann Nov 14, 2023
7799db5
Unified form only needed for continuation
fnhartmann Nov 15, 2023
b4cfe63
Replaced setter functions with substitutes
fnhartmann Nov 16, 2023
67f325a
Clean up imports
fnhartmann Nov 16, 2023
0c2d455
Implemented tests
fnhartmann Nov 16, 2023
a7f4d76
Fix comments
fnhartmann Nov 16, 2023
55ead2e
Shortened if statement
fnhartmann Nov 16, 2023
29ec9fa
Updated suggestions
fnhartmann Dec 6, 2023
4efd39a
Black formatting, added comment
fnhartmann Dec 6, 2023
13dc250
Fix substitute in unify
fnhartmann Dec 7, 2023
3176e98
Operate on last definitions
fnhartmann Dec 7, 2023
0d5b1df
Updated comments
fnhartmann Dec 7, 2023
795c68e
Only reconstruct if defined and used variables are the same
fnhartmann Dec 12, 2023
d4d5dd7
Added skip testcase for continuation with only constant
fnhartmann Dec 19, 2023
6a9e4bc
Adjusted check for simple binary operation and variable usage
fnhartmann Dec 19, 2023
459504d
Fix order of check for continue nodes
fnhartmann Dec 19, 2023
aa02657
Merged substitutes
fnhartmann Dec 19, 2023
83370e4
Fix missing brackets
fnhartmann Dec 19, 2023
9dc780e
Added check for correct continuation instruction
fnhartmann Dec 19, 2023
9515867
Merged substitutes
fnhartmann Dec 19, 2023
4a2f72d
Merge branch 'main' into issue-325-_For-Loop_Reconstruction_Follow_up…
ebehner Dec 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import Dict, Optional, Union
from typing import Dict, List, Optional, Union

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import (
Expand All @@ -19,7 +19,7 @@
)
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Assignment, Condition, Expression, Operation, Variable
from decompiler.structures.pseudo import Assignment, BinaryOperation, Condition, Constant, Expression, Operation, OperationType, Variable
from decompiler.structures.visitors.assignment_visitor import AssignmentVisitor
from decompiler.task import DecompilerTask
from decompiler.util.options import Options
Expand Down Expand Up @@ -277,6 +277,7 @@ def run(self):
-> loop condition complexity < condition complexity
-> possible modification complexity < modification complexity
-> if condition is only a symbol: check condition type for allowed one
-> has a continue statement which must and can be equalized

If 'force_for_loops' is enabled, the complexity options are ignored and every while loop after the
initial transformation will be forced into a for loop with an empty declaration/modification
Expand All @@ -287,9 +288,6 @@ def run(self):
or self._invalid_simple_for_loop_condition_type(loop_node.condition):
continue

if any(node.does_end_with_continue for node in loop_node.body.get_descendant_code_nodes_interrupting_ancestor_loop()):
continue

if not self._force_for_loops and loop_node.condition.get_complexity(self._ast.condition_map) > self._condition_max_complexity:
continue

Expand All @@ -300,6 +298,10 @@ def run(self):
continue
if not self._force_for_loops and continuation.instruction.complexity > self._modification_max_complexity:
continue
if (equalizable_continue_nodes := self._get_continue_nodes_with_equalizable_definition(loop_node, continuation, variable_init)) is None:
break
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved
for node in equalizable_continue_nodes:
self._remove_continuation_from_last_definition(node, continuation, variable_init)
self._replace_with_for_loop(loop_node, continuation, variable_init)
break

Expand All @@ -315,6 +317,63 @@ def run(self):
)
)

def _get_continue_nodes_with_equalizable_definition(self, loop_node: WhileLoopNode, continuation: AstInstruction, variable_init: AstInstruction) -> List[CodeNode]:
"""
Finds code nodes of a while loop containing continue statements and a definition of the continuation instruction, which can be easily equalized.

:param loop_node: While-loop to search in
:param continuation: Instruction defining the for-loops modification
:param variable_init: Instruction defining the for-loops declaration
:return: List of continue code nodes that has equalizable definitions, None if at least one continue node does not match the requirements
"""
equalizable_nodes = []
for code_node in loop_node.body.get_descendant_code_nodes_interrupting_ancestor_loop():
if not code_node.does_end_with_continue:
continue
ebehner marked this conversation as resolved.
Show resolved Hide resolved
if not self._is_expression_simple_binary_operation(continuation.instruction.value):
return None
if (last_definition_index := _get_last_definition_index_of(code_node, variable_init.instruction.destination)) == -1:
return None
if not (self._is_expression_simple_binary_operation(code_node.instructions[last_definition_index].value) \
or isinstance(code_node.instructions[last_definition_index].value, Constant)):
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved
return None
equalizable_nodes.append(code_node)
return equalizable_nodes

def _is_expression_simple_binary_operation(self, expression: Expression) -> bool:
"""Checks if an expression is a binary operation with plus or minus and a constant."""
if not isinstance(expression, BinaryOperation):
return False
if not expression.operation in [OperationType.plus, OperationType.minus]:
return False
if not any(isinstance(operand, Constant) for operand in expression.operands):
return False
return True
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved

def _remove_continuation_from_last_definition(self, code_node: CodeNode, continuation: AstInstruction, variable_init: AstInstruction):
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved
"""
Substracts the value of the continuation instruction from the last definition, which must be a simple binary operation or a constant.
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved

:param code_node: Code node whose last instruction is to be changed
:param continuation: Instruction defining the for-loops modification
:param variable_init: Instruction defining the for-loops declaration
"""
last_definition = code_node.instructions[_get_last_definition_index_of(code_node, variable_init.instruction.destination)]
continuation_operand = continuation.instruction.value.right if isinstance(continuation.instruction.value.right, Constant) else continuation.instruction.value.left

if isinstance(last_definition.value, BinaryOperation):
last_definition_operand = last_definition.value.right if isinstance(last_definition.value.right, Constant) else last_definition.value.left

if continuation.instruction.value.operation == last_definition.value.operation:
last_definition_operand.value -= continuation_operand.value
else:
last_definition_operand.value += continuation_operand.value
else:
if continuation.instruction.value.operation == OperationType.plus:
last_definition.value.value -= continuation_operand.value
else:
last_definition.value.value += continuation_operand.value

fnhartmann marked this conversation as resolved.
Show resolved Hide resolved
def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInstruction, init: AstInstruction):
"""
Replaces a given WhileLoopNode with a ForLoopNode.
Expand Down Expand Up @@ -347,7 +406,7 @@ def _replace_with_for_loop(self, loop_node: WhileLoopNode, continuation: AstInst
)
continuation.node.instructions.remove(continuation.instruction)
self._ast.clean_up()

def _invalid_simple_for_loop_condition_type(self, logic_condition) -> bool:
""" Checks if a logic condition is only a symbol, if true checks condition type of symbol for forbidden ones"""
if not logic_condition.is_symbol or not self._forbidden_condition_types:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
WhileLoopReplacer,
WhileLoopVariableRenamer,
_find_continuation_instruction,
_get_last_definition_index_of,
_has_deep_requirement,
_initialization_reaches_loop_node,
)
Expand Down Expand Up @@ -2080,8 +2081,10 @@ def test_declaration_listop(self, ast_call_for_loop):
if isinstance(node, ForLoopNode):
assert node.declaration.destination.operands[0].name == "i"

def test_skip_for_loop_recovery_if_continue_in_while(self):
def test_for_loop_recovery_if_continue_in_while_1(self):
"""
Test for loop recovery if a continue occurs in a while loop and the last definition is a simple binary operation

a = 0
while(a < 10) {
if(a == 2) {
Expand Down Expand Up @@ -2127,9 +2130,68 @@ def test_skip_for_loop_recovery_if_continue_in_while(self):
)

WhileLoopReplacer(ast, _generate_options()).run()
assert not any(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))
assert all(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))

condition_nodes = list(ast.get_condition_nodes_post_order())
last_definition = condition_nodes[0].true_branch_child.instructions[_get_last_definition_index_of(condition_nodes[0].true_branch_child, Variable("a"))]
assert last_definition.value.right.value == 1
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved

def test_for_loop_recovery_if_continue_in_while_2(self):
"""
Test for loop recovery if a continue occurs in a while loop and the last definition is a contant assignment

a = 0
while(a < 10) {
if(a == 2) {
a = 4
continue
}
a = a + 1
}
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved
"""
true_value = LogicCondition.initialize_true(context := LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(
root := SeqNode(true_value),
condition_map={
logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(10)]),
logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)])
}
)

true_branch = ast._add_code_node(
[
Assignment(Variable("a"), Constant(4)),
Continue()
]
)
if_condition = ast._add_condition_node_with(logic_cond("x2", context), true_branch)

init_code_node = ast._add_code_node([Assignment(Variable("a"), Constant(0))])

while_loop = ast.factory.create_while_loop_node(logic_cond("x1", context))
while_loop_body = ast.factory.create_seq_node()
while_loop_iteration = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))])
ast._add_node(while_loop)
ast._add_node(while_loop_body)

ast._add_edges_from(
[
(root, init_code_node),
(root, while_loop),
(while_loop, while_loop_body),
(while_loop_body, if_condition),
(while_loop_body, while_loop_iteration)
]
)

def test_skip_for_loop_recovery_if_continue_in_nested_while(self):
WhileLoopReplacer(ast, _generate_options()).run()
assert all(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))

condition_nodes = list(ast.get_condition_nodes_post_order())
last_definition = condition_nodes[0].true_branch_child.instructions[_get_last_definition_index_of(condition_nodes[0].true_branch_child, Variable("a"))]
assert last_definition.value.value == 3
fnhartmann marked this conversation as resolved.
Show resolved Hide resolved

def test_for_loop_recovery_if_continue_in_nested_while(self):
"""
while(a < 5) {
a = a + b
Expand Down Expand Up @@ -2188,5 +2250,156 @@ def test_skip_for_loop_recovery_if_continue_in_nested_while(self):
)

WhileLoopReplacer(ast, _generate_options()).run()
loop_nodes = list(ast.get_loop_nodes_post_order())
assert not isinstance(loop_nodes[0], ForLoopNode) and isinstance(loop_nodes[1], ForLoopNode)
assert all(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))

condition_nodes = list(ast.get_condition_nodes_post_order())
last_definition = condition_nodes[0].true_branch_child.instructions[_get_last_definition_index_of(condition_nodes[0].true_branch_child, Variable("b"))]
assert last_definition.value.right.value == 1

def test_skip_for_loop_recovery_if_continue_in_while_1(self):
"""
Test skip of for loop recovery if a continue occurs in a while loop, because the continuation instruction is no simple binary operation

a = 0
while(a < 10) {
if(a == 2) {
a = a + 2
continue
}
a = a * 2
}
"""
true_value = LogicCondition.initialize_true(context := LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(
root := SeqNode(true_value),
condition_map={
logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(10)]),
logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)])
}
)

true_branch = ast._add_code_node(
[
Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(2)])),
Continue()
]
)
if_condition = ast._add_condition_node_with(logic_cond("x2", context), true_branch)

init_code_node = ast._add_code_node([Assignment(Variable("a"), Constant(0))])

while_loop = ast.factory.create_while_loop_node(logic_cond("x1", context))
while_loop_body = ast.factory.create_seq_node()
while_loop_iteration = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.multiply, [Variable("a"), Constant(2)]))])
ast._add_node(while_loop)
ast._add_node(while_loop_body)

ast._add_edges_from(
[
(root, init_code_node),
(root, while_loop),
(while_loop, while_loop_body),
(while_loop_body, if_condition),
(while_loop_body, while_loop_iteration)
]
)

WhileLoopReplacer(ast, _generate_options()).run()
assert not any(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))


def test_skip_for_loop_recovery_if_continue_in_while_2(self):
"""
Test skip of for loop recovery if a continue occurs in a while loop, because the last definition is no simple binary operation

a = 0
while(a < 10) {
if(a == 2) {
a = a * 2
continue
}
a = a + 1
}
"""
true_value = LogicCondition.initialize_true(context := LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(
root := SeqNode(true_value),
condition_map={
logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(10)]),
logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)])
}
)

true_branch = ast._add_code_node(
[
Assignment(Variable("a"), BinaryOperation(OperationType.multiply, [Variable("a"), Constant(2)])),
Continue()
]
)
if_condition = ast._add_condition_node_with(logic_cond("x2", context), true_branch)

init_code_node = ast._add_code_node([Assignment(Variable("a"), Constant(0))])

while_loop = ast.factory.create_while_loop_node(logic_cond("x1", context))
while_loop_body = ast.factory.create_seq_node()
while_loop_iteration = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))])
ast._add_node(while_loop)
ast._add_node(while_loop_body)

ast._add_edges_from(
[
(root, init_code_node),
(root, while_loop),
(while_loop, while_loop_body),
(while_loop_body, if_condition),
(while_loop_body, while_loop_iteration)
]
)

WhileLoopReplacer(ast, _generate_options()).run()
assert not any(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))

def test_skip_for_loop_recovery_if_continue_in_while_3(self):
"""
Test skip of for loop recovery if a continue occurs in a while loop, because no last definition exists

a = 0
while(a < 10) {
if(a == 2) {
continue
}
a = a + 1
}
"""
true_value = LogicCondition.initialize_true(context := LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(
root := SeqNode(true_value),
condition_map={
logic_cond("x1", context): Condition(OperationType.less, [Variable("a"), Constant(10)]),
logic_cond("x2", context): Condition(OperationType.equal, [Variable("a"), Constant(2)])
}
)

true_branch = ast._add_code_node([Continue()])
if_condition = ast._add_condition_node_with(logic_cond("x2", context), true_branch)

init_code_node = ast._add_code_node([Assignment(Variable("a"), Constant(0))])

while_loop = ast.factory.create_while_loop_node(logic_cond("x1", context))
while_loop_body = ast.factory.create_seq_node()
while_loop_iteration = ast._add_code_node([Assignment(Variable("a"), BinaryOperation(OperationType.plus, [Variable("a"), Constant(1)]))])
ast._add_node(while_loop)
ast._add_node(while_loop_body)

ast._add_edges_from(
[
(root, init_code_node),
(root, while_loop),
(while_loop, while_loop_body),
(while_loop_body, if_condition),
(while_loop_body, while_loop_iteration)
]
)

WhileLoopReplacer(ast, _generate_options()).run()
assert not any(isinstance(loop_node, ForLoopNode) for loop_node in list(ast.get_loop_nodes_post_order()))
Loading