diff --git a/decompiler/pipeline/controlflowanalysis/__init__.py b/decompiler/pipeline/controlflowanalysis/__init__.py index 5ba040827..ebbc4b673 100644 --- a/decompiler/pipeline/controlflowanalysis/__init__.py +++ b/decompiler/pipeline/controlflowanalysis/__init__.py @@ -1,3 +1,4 @@ from .expression_simplification import ExpressionSimplification from .instruction_length_handler import InstructionLengthHandler from .readability_based_refinement import ReadabilityBasedRefinement +from .variable_name_generation import VariableNameGeneration diff --git a/decompiler/pipeline/controlflowanalysis/variable_name_generation.py b/decompiler/pipeline/controlflowanalysis/variable_name_generation.py new file mode 100644 index 000000000..031426747 --- /dev/null +++ b/decompiler/pipeline/controlflowanalysis/variable_name_generation.py @@ -0,0 +1,175 @@ +import re +from abc import ABC, abstractmethod +from enum import Enum +from typing import Dict, List, Optional + +from decompiler.pipeline.stage import PipelineStage +from decompiler.structures.ast.ast_nodes import CaseNode, CodeNode, ConditionNode, LoopNode, SwitchNode +from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree +from decompiler.structures.logic.logic_condition import LogicCondition +from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable +from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor +from decompiler.task import DecompilerTask + + +def _get_var_counter(var_name: str) -> Optional[str]: + """Return the counter of a given variable name, if any is present.""" + if counter := re.match(r".*?([0-9]+)$", var_name): + return counter.group(1) + return None + + +def _get_containing_variables(dfo: DataflowObject) -> List[Variable]: + """Returns a list of variables contained in this dataflow object.""" + variables: List[Variable] = [] + for sub_exp in dfo.subexpressions(): + if isinstance(sub_exp, Variable): + variables.append(sub_exp) + return variables + + +class VariableCollector(BaseAstDataflowObjectVisitor): + """Visit relevant nodes and collect their variables.""" + + def __init__(self, cond_map: Dict[LogicCondition, Condition]): + self._cond_map: Dict[LogicCondition, Condition] = cond_map + self._loop_vars: list[Variable] = [] + self._variables: list[Variable] = [] + + def get_variables(self) -> list[Variable]: + """Get collected variables.""" + return self._variables + + def get_loop_variables(self) -> list[Variable]: + """Get collected variables used in loops.""" + return self._loop_vars + + def visit_condition_node(self, node: ConditionNode): + for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: + self._variables.extend(_get_containing_variables(expr)) + + def visit_loop_node(self, node: LoopNode): + for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: + self._loop_vars.extend(_get_containing_variables(expr)) + + def visit_variable(self, expression: Variable): + self._variables.append(expression) + + +class NamingConvention(str, Enum): + """Enum for the currently available naming conventions.""" + default = "default" + system_hungarian = "system_hungarian" + + +class RenamingScheme(ABC): + """Base class for different Renaming schemes.""" + + def __init__(self, task: DecompilerTask) -> None: + """Collets all needed variables for renaming + filters already renamed + function arguments out""" + collector = VariableCollector(task._ast.condition_map) + collector.visit_ast(task._ast) + self._params: List[Variable] = task._function_parameters + self._loop_vars : List[Variable] = collector.get_loop_variables() + self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables())) + + + def _filter_variables(self, item: Variable) -> bool: + """Return False if variable is a parameter, renamed loop variable or GlobalVariable, else True""" + if item in self._params or (item in self._loop_vars and item.name.find("var_") == -1) or isinstance(item, GlobalVariable): + return False + return True + + + @abstractmethod + def renameVariableNames(self): + """Abstract method which should rename variables with respect to the used scheme.""" + pass + + +class HungarianScheme(RenamingScheme): + """Class which renames variables into hungarian notation.""" + + type_prefix = { + Float: {16: "h", 32: "f", 64: "d", 80: "ld", 128: "q", 256: "o"}, + Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"}, + } + + + def __init__(self, task: DecompilerTask) -> None: + super().__init__(task) + self._name = VariableNameGeneration.name + self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var") + self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True) + self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="") + self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="") + + + def renameVariableNames(self): + """Rename all collected variables to the hungarian notation.""" + for var in self._variables: + counter = _get_var_counter(var.name) + var._name = self._hungarian_notation(var, counter if counter else "") + + + def _hungarian_notation(self, var: Variable, counter: int) -> str: + """Return hungarian notation to a given variable.""" + return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self._var_name}{self._counter_separator}{counter}" + + + def _hungarian_prefix(self, var_type: Type) -> str: + """Return hungarian prefix to a given variable type.""" + if isinstance(var_type, Pointer): + if self._pointer_base: + return f"{self._hungarian_prefix(var_type.type)}p" + return "p" + if isinstance(var_type, CustomType): + if var_type.is_boolean: + return "b" + elif var_type.size == 0: + return "v" + if isinstance(var_type, (Integer, Float)): + sign = "" if var_type.is_signed else "u" + prefix = self.type_prefix[type(var_type)][var_type.size] + return f"{sign}{prefix}" + + +class DefaultScheme(RenamingScheme): + """Class which renames variables into the default scheme.""" + + def __init__(self, task: DecompilerTask) -> None: + super().__init__(task) + + + def renameVariableNames(self): + # Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters? + pass + + +class VariableNameGeneration(PipelineStage): + """ + Pipelinestage in charge of renaming variables to a configured format. + Currently only the 'default' or 'hungarian' system are supported. + """ + + name : str = "variable-name-generation" + + def __init__(self): + self._notation: str = None + + + def run(self, task: DecompilerTask): + """Rename variable names to the given scheme.""" + self._notation = task.options.getstring(f"{self.name}.notation", fallback="default") + + renamer: RenamingScheme = None + + match self._notation: + case NamingConvention.default: + renamer = DefaultScheme(task) + case NamingConvention.system_hungarian: + renamer = HungarianScheme(task) + case _: + return + + renamer.renameVariableNames() diff --git a/decompiler/pipeline/default.py b/decompiler/pipeline/default.py index d3bd44759..cfce108ec 100644 --- a/decompiler/pipeline/default.py +++ b/decompiler/pipeline/default.py @@ -1,6 +1,11 @@ """Module defining the available pipelines.""" -from decompiler.pipeline.controlflowanalysis import ExpressionSimplification, InstructionLengthHandler, ReadabilityBasedRefinement +from decompiler.pipeline.controlflowanalysis import ( + ExpressionSimplification, + InstructionLengthHandler, + ReadabilityBasedRefinement, + VariableNameGeneration, +) from decompiler.pipeline.dataflowanalysis import ( ArrayAccessDetection, CommonSubexpressionElimination, @@ -36,4 +41,4 @@ EdgePruner, ] -AST_STAGES = [ReadabilityBasedRefinement, ExpressionSimplification, InstructionLengthHandler] +AST_STAGES = [ReadabilityBasedRefinement, ExpressionSimplification, InstructionLengthHandler, VariableNameGeneration] diff --git a/decompiler/util/default.json b/decompiler/util/default.json index 8a72c6295..83f6ef0fd 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -357,6 +357,61 @@ "is_hidden_from_cli": false, "argument_name": "--rename-while-loop-variables" }, + { + "dest": "variable-name-generation.notation", + "default": "system_hungarian", + "title": "Variable Naming Convention", + "type": "string", + "enum": ["default", "system_hungarian"], + "enumDescriptions": [ + "Default setting (e.g. int var_0).", + "System Hungarian (e.g. int iVar0; float fVar0)" + ], + "description": "Selects the naming convention for variable names.", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--variable-generation-notation" + }, + { + "dest": "variable-name-generation.variable_name", + "default": "Var", + "title": "Variable Base Name for hungarian notation", + "type": "string", + "description": "", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--variable-generation-variable-name" + }, + { + "dest": "variable-name-generation.pointer_base", + "default": true, + "title": "Pointer base type prefix for hungarian notation", + "type": "boolean", + "description": "Prefix the basetype of a pointer (e.g. int * piVar0)", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--variable-generation-pointer-base" + }, + { + "dest": "variable-name-generation.type_separator", + "default": "", + "title": "Type Separator for hungarian notation", + "type": "string", + "description": "How to separate the prefix from the variable name (e.g. '_' -> i_Var0)", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--variable-generation-type-separator" + }, + { + "dest": "variable-name-generation.counter_separator", + "default": "", + "title": "Counter Separator for hungarian notation", + "type": "string", + "description": "How to separate the variable name from its counter (e.g. '_' -> iVar_0)", + "is_hidden_from_gui": false, + "is_hidden_from_cli": false, + "argument_name": "--variable-generation-counter-separator" + }, { "dest": "code-generator.max_complexity", "default": 100, @@ -546,7 +601,8 @@ "default": [ "readability-based-refinement", "expression-simplification", - "instruction-length-handler" + "instruction-length-handler", + "variable-name-generation" ], "title": "AST pipeline stages", "type": "array", diff --git a/tests/pipeline/controlflowanalysis/test_variable_name_generation.py b/tests/pipeline/controlflowanalysis/test_variable_name_generation.py new file mode 100644 index 000000000..acd24d9b8 --- /dev/null +++ b/tests/pipeline/controlflowanalysis/test_variable_name_generation.py @@ -0,0 +1,90 @@ +from typing import List + +import pytest +from decompiler.backend.codegenerator import CodeGenerator +from decompiler.pipeline.controlflowanalysis import VariableNameGeneration +from decompiler.structures.ast.ast_nodes import CodeNode +from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree +from decompiler.structures.logic.logic_condition import LogicCondition +from decompiler.structures.pseudo import Assignment, Constant, CustomType, Float, Integer, Pointer, Variable +from decompiler.task import DecompilerTask +from decompiler.util.decoration import DecoratedCode +from decompiler.util.options import Options + +PIPELINE_NAME = VariableNameGeneration.name + +I8 = Integer.int8_t() +I16 = Integer.int16_t() +I32 = Integer.int32_t() +I64 = Integer.int64_t() +I128 = Integer.int128_t() +UI8 = Integer.uint8_t() +UI16 = Integer.uint16_t() +UI32 = Integer.uint32_t() +UI64 = Integer.uint64_t() +UI128 = Integer.uint128_t() +HALF = Float(16) +FLOAT = Float.float() +DOUBLE = Float.double() +LONG_DOUBLE = Float(80) +QUADRUPLE = Float(128) +OCTUPLE = Float(256) +BOOL = CustomType.bool() +VOID = CustomType.void() + +ALL_TYPES = [I8, I16, I32, I64, I128, UI8, UI16, UI32, UI64, UI128, HALF, FLOAT, DOUBLE, LONG_DOUBLE, QUADRUPLE, OCTUPLE, BOOL, VOID] +EXPECTED_BASE_NAMES = ["chVar0", "sVar1", "iVar2", "lVar3", "i128Var4", "uchVar5", "usVar6", "uiVar7", "ulVar8", "ui128Var9", "hVar10", + "fVar11", "dVar12", "ldVar13", "qVar14", "oVar15", "bVar16", "vVar17"] +EXPECTED_POINTER_NAMES = ["chpVar0", "spVar1", "ipVar2", "lpVar3", "i128pVar4", "uchpVar5", "uspVar6", "uipVar7", "ulpVar8", "ui128pVar9", + "hpVar10", "fpVar11", "dpVar12", "ldpVar13", "qpVar14", "opVar15", "bpVar16", "vpVar17"] + + +def _generate_options(notation: str = "system_hungarian", pointer_base: bool = True, type_sep: str = "", counter_sep: str = "") -> Options: + options = Options() + options.set(f"{PIPELINE_NAME}.notation", notation) + options.set(f"{PIPELINE_NAME}.pointer_base", pointer_base) + options.set(f"{PIPELINE_NAME}.type_separator", type_sep) + options.set(f"{PIPELINE_NAME}.counter_separator", counter_sep) + options.set(f"code-generator.max_complexity", 100) + options.set("code-generator.use_increment_int", False) + options.set("code-generator.use_increment_float", False) + options.set("code-generator.use_compound_assignment", True) + return options + + +def _run_vng(ast: AbstractSyntaxTree, options: Options = _generate_options()): + task = DecompilerTask("variable_name_generation", None, ast, options, VOID) + VariableNameGeneration().run(task) + DecoratedCode.print_code(CodeGenerator().generate([task])) + + +def test_default_notation_1(): + true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) + ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) + _run_vng(ast, _generate_options(notation="default")) + assert var.name == "var_0" + + +@pytest.mark.parametrize( + "variable, name", + [ + (Variable("var_" + str(i), typ), EXPECTED_BASE_NAMES[i]) for i, typ in enumerate(ALL_TYPES) + ] + + [ + (Variable("var_" + str(i), Pointer(typ)), EXPECTED_POINTER_NAMES[i]) for i, typ in enumerate(ALL_TYPES) + ] + , +) +def test_hungarian_notation(variable, name): + true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) + ast = AbstractSyntaxTree(CodeNode([Assignment(variable, Constant(42))], true_value), {}) + _run_vng(ast) + assert variable.name == name + + +@pytest.mark.parametrize("type_sep, counter_sep", [("", ""), ("_", "_")]) +def test_hungarian_notation_separators(type_sep: str, counter_sep: str): + true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) + ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) + _run_vng(ast, _generate_options(type_sep=type_sep, counter_sep=counter_sep)) + assert var.name == f"i{type_sep}Var{counter_sep}0"