From 49039598c72992c884b022a846ba38f4c9a8efa6 Mon Sep 17 00:00:00 2001 From: rihi Date: Wed, 30 Aug 2023 13:47:02 +0000 Subject: [PATCH 1/5] Create draft PR for #322 From 4caa521d06cada863bfa19691beec28118e3fa9b Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 30 Aug 2023 15:44:12 +0200 Subject: [PATCH 2/5] Add special case for function pointer type variable declarations --- decompiler/backend/cexpressiongenerator.py | 13 +++++++++++-- decompiler/backend/codegenerator.py | 6 +++++- decompiler/backend/variabledeclarations.py | 13 +++++++------ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index f310fe341..56e9d85ef 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -1,10 +1,9 @@ import logging from ctypes import c_byte, c_int, c_long, c_short, c_ubyte, c_uint, c_ulong, c_ushort from itertools import chain, repeat -from typing import Union from decompiler.structures import pseudo as expressions -from decompiler.structures.pseudo import Float, Integer, OperationType, StringSymbol +from decompiler.structures.pseudo import Float, FunctionTypeDef, Integer, OperationType, Pointer, StringSymbol, Type from decompiler.structures.pseudo import instructions as instructions from decompiler.structures.pseudo import operations as operations from decompiler.structures.visitors.interfaces import DataflowObjectVisitorInterface @@ -361,3 +360,13 @@ def _format_string_literal(constant: expressions.Constant) -> str: escaped = string_representation.replace('"', '\\"') return f'"{escaped}"' return f"{constant}" + + @staticmethod + def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: + """ Return a string representation of variable declarations.""" + match var_type: + case Pointer(type=FunctionTypeDef() as fun_type): + rest = "".join(map(lambda n: f"(* {n})({', '.join(str(x) for x in fun_type.parameters)})", var_names)) + return f"{fun_type.return_type} {rest}" + case _: + return f"{var_type} {', '.join(var_names)}" diff --git a/decompiler/backend/codegenerator.py b/decompiler/backend/codegenerator.py index 4ce11655a..8bb89cb6f 100644 --- a/decompiler/backend/codegenerator.py +++ b/decompiler/backend/codegenerator.py @@ -2,6 +2,7 @@ from string import Template from typing import Iterable, List +from decompiler.backend.cexpressiongenerator import CExpressionGenerator from decompiler.backend.codevisitor import CodeVisitor from decompiler.backend.variabledeclarations import GlobalDeclarationGenerator, LocalDeclarationGenerator from decompiler.task import DecompilerTask @@ -37,7 +38,10 @@ def generate_function(self, task: DecompilerTask) -> str: return self.TEMPLATE.substitute( return_type=task.function_return_type, name=task.name, - parameters=", ".join(map(lambda param: f"{param.type} {param.name}", task.function_parameters)), + parameters=", ".join(map( + lambda param: CExpressionGenerator.format_variables_declaration(param.type, [param.name]), + task.function_parameters + )), local_declarations=LocalDeclarationGenerator.from_task(task) if not task.failed else "", function_body=CodeVisitor(task).visit(task.syntax_tree.root) if not task.failed else task.failure_message, ) diff --git a/decompiler/backend/variabledeclarations.py b/decompiler/backend/variabledeclarations.py index 56b2ae2c8..362912fe7 100644 --- a/decompiler/backend/variabledeclarations.py +++ b/decompiler/backend/variabledeclarations.py @@ -2,6 +2,7 @@ from collections import defaultdict from typing import Iterable, Iterator, List, Set +from decompiler.backend.cexpressiongenerator import CExpressionGenerator from decompiler.structures.ast.ast_nodes import ForLoopNode, LoopNode from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree from decompiler.structures.pseudo import ( @@ -61,19 +62,19 @@ def visit_unary_operation(self, unary: UnaryOperation): else: self.visit(unary.operand.left) - def generate(self, param_names: list = []) -> Iterator[str]: + def generate(self, param_names: list[str] = []) -> Iterator[str]: """Generate a string containing the variable definitions for the visited variables.""" variable_type_mapping = defaultdict(list) for variable in sorted(self._variables, key=lambda x: str(x)): - if not isinstance(variable, GlobalVariable): + if not isinstance(variable, GlobalVariable) and variable.name not in param_names: variable_type_mapping[variable.type].append(variable) for variable_type, variables in sorted(variable_type_mapping.items(), key=lambda x: str(x)): for chunked_variables in self._chunks(variables, self._vars_per_line): - variable_names = ", ".join([var.name for var in chunked_variables]) - if variable_names in param_names: - continue - yield f"{variable_type} {variable_names};" + yield CExpressionGenerator.format_variables_declaration( + variable_type, + [var.name for var in chunked_variables] + ) + ";" @staticmethod def _chunks(lst: List, n: int) -> Iterator[List]: From 88c79f31af5eec909107090021048e1f2ac6c71f Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:58:44 +0200 Subject: [PATCH 3/5] Fix missing comma in function pointer variables declaration --- decompiler/backend/cexpressiongenerator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 56e9d85ef..69e6413bd 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -366,7 +366,7 @@ def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: """ Return a string representation of variable declarations.""" match var_type: case Pointer(type=FunctionTypeDef() as fun_type): - rest = "".join(map(lambda n: f"(* {n})({', '.join(str(x) for x in fun_type.parameters)})", var_names)) + rest = ", ".join(map(lambda n: f"(* {n})({', '.join(str(x) for x in fun_type.parameters)})", var_names)) return f"{fun_type.return_type} {rest}" case _: return f"{var_type} {', '.join(var_names)}" From 598f85af6807691225ad425ae747182998c08bfb Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 31 Aug 2023 12:59:01 +0200 Subject: [PATCH 4/5] Add test cases --- tests/backend/test_codegenerator.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/backend/test_codegenerator.py b/tests/backend/test_codegenerator.py index 4f90e61f5..159986598 100644 --- a/tests/backend/test_codegenerator.py +++ b/tests/backend/test_codegenerator.py @@ -11,6 +11,7 @@ from decompiler.structures.ast.ast_nodes import CodeNode, SeqNode, SwitchNode from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree from decompiler.structures.logic.logic_condition import LogicCondition +from decompiler.structures.pseudo import FunctionTypeDef from decompiler.structures.pseudo.expressions import ( Constant, DataflowObject, @@ -75,6 +76,8 @@ def logic_cond(name: str, context) -> LogicCondition: var_x_u = Variable("x_u", uint32) var_y_u = Variable("y_u", uint32) var_p = Variable("p", Pointer(int32)) +var_fun_p = Variable("p", Pointer(FunctionTypeDef(0, int32, (int32,)))) +var_fun_p0 = Variable("p0", Pointer(FunctionTypeDef(0, int32, (int32,)))) const_0 = Constant(0, int32) const_1 = Constant(1, int32) @@ -155,6 +158,15 @@ def test_empty_function_two_parameters(self): r"^\s*int +test_function\(\s*int +a\s*,\s*int +b\s*\){\s*}\s*$", self._task(ast, params=[var_a.copy(), var_b.copy()]) ) + def test_empty_function_two_function_parameters(self): + root = SeqNode(LogicCondition.initialize_true(LogicCondition.generate_new_context())) + ast = AbstractSyntaxTree(root, {}) + code_node = ast._add_code_node([]) + ast._add_edge(root, code_node) + assert self._regex_matches( + r"^\s*int +test_function\(\s*int +\(\*\s*p\)\(int\)\s*,\s*int +\(\*\s*p0\)\(int\)\s*\){\s*}\s*$", self._task(ast, params=[var_fun_p.copy(), var_fun_p0.copy()]) + ) + def test_function_with_instruction(self): root = SeqNode(LogicCondition.initialize_true(LogicCondition.generate_new_context())) ast = AbstractSyntaxTree(root, {}) @@ -1069,6 +1081,8 @@ def test_operation(self, op, expected): (1, [var_x.copy(), var_y.copy(), var_x_f.copy(), var_y_f.copy()], "float x_f;\nfloat y_f;\nint x;\nint y;"), (2, [var_x.copy(), var_y.copy(), var_x_f.copy(), var_y_f.copy()], "float x_f, y_f;\nint x, y;"), (1, [var_x.copy(), var_y.copy(), var_p.copy()], "int x;\nint y;\nint * p;"), + (1, [var_x.copy(), var_y.copy(), var_fun_p.copy()], "int x;\nint y;\nint (* p)(int);"), + (2, [var_x.copy(), var_y.copy(), var_fun_p.copy(), var_fun_p0.copy()], "int x, y;\nint (* p)(int), (* p0)(int);"), ], ) def test_variable_declaration(self, vars_per_line: int, variables: List[Variable], expected: str): From 01af86d86df63fbb24eea1632401f98842c84495 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 6 Sep 2023 16:07:37 +0200 Subject: [PATCH 5/5] Improve readability of format_variables_declaration(Type,list[str]) --- decompiler/backend/cexpressiongenerator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/decompiler/backend/cexpressiongenerator.py b/decompiler/backend/cexpressiongenerator.py index 69e6413bd..9a3cc000d 100644 --- a/decompiler/backend/cexpressiongenerator.py +++ b/decompiler/backend/cexpressiongenerator.py @@ -366,7 +366,8 @@ def format_variables_declaration(var_type: Type, var_names: list[str]) -> str: """ Return a string representation of variable declarations.""" match var_type: case Pointer(type=FunctionTypeDef() as fun_type): - rest = ", ".join(map(lambda n: f"(* {n})({', '.join(str(x) for x in fun_type.parameters)})", var_names)) - return f"{fun_type.return_type} {rest}" + parameter_names = ", ".join(str(parameter) for parameter in fun_type.parameters) + declarations_without_return_type = [f"(* {var_name})({parameter_names})" for var_name in var_names] + return f"{fun_type.return_type} {', '.join(declarations_without_return_type)}" case _: return f"{var_type} {', '.join(var_names)}"