-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add simple VariableNameGenerator add config * add options add pipeline add test (wip) * add vng tests fix vng revert loglevel * fix test * ignore for loop counter * fix typo in test * Fix imports, Fix basic type errors * Func params + small fixes * Legacy calls adjustments + tests fixes * Global var, renamed counter fix * ...or maybe just turn off the default config :-) * Params, GVars, Counter/Index fixes * Better titles for hungarian renaming * More generic renaming + more docs * Refactor tests * Use visitor * Init order * purge visitor * remove debug list --------- Co-authored-by: Felix Prahl-Kamps <[email protected]> Co-authored-by: Steffen Enders <[email protected]> Co-authored-by: Spartak Ehrlich <[email protected]> Co-authored-by: Spartak Ehrlich <[email protected]>
- Loading branch information
1 parent
bdb0eac
commit fe30536
Showing
5 changed files
with
330 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from .expression_simplification import ExpressionSimplification | ||
from .instruction_length_handler import InstructionLengthHandler | ||
from .readability_based_refinement import ReadabilityBasedRefinement | ||
from .variable_name_generation import VariableNameGeneration |
175 changes: 175 additions & 0 deletions
175
decompiler/pipeline/controlflowanalysis/variable_name_generation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
import re | ||
from abc import ABC, abstractmethod | ||
from enum import Enum | ||
from typing import Dict, List, Optional | ||
|
||
from decompiler.pipeline.stage import PipelineStage | ||
from decompiler.structures.ast.ast_nodes import CaseNode, CodeNode, ConditionNode, LoopNode, SwitchNode | ||
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree | ||
from decompiler.structures.logic.logic_condition import LogicCondition | ||
from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable | ||
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor | ||
from decompiler.task import DecompilerTask | ||
|
||
|
||
def _get_var_counter(var_name: str) -> Optional[str]: | ||
"""Return the counter of a given variable name, if any is present.""" | ||
if counter := re.match(r".*?([0-9]+)$", var_name): | ||
return counter.group(1) | ||
return None | ||
|
||
|
||
def _get_containing_variables(dfo: DataflowObject) -> List[Variable]: | ||
"""Returns a list of variables contained in this dataflow object.""" | ||
variables: List[Variable] = [] | ||
for sub_exp in dfo.subexpressions(): | ||
if isinstance(sub_exp, Variable): | ||
variables.append(sub_exp) | ||
return variables | ||
|
||
|
||
class VariableCollector(BaseAstDataflowObjectVisitor): | ||
"""Visit relevant nodes and collect their variables.""" | ||
|
||
def __init__(self, cond_map: Dict[LogicCondition, Condition]): | ||
self._cond_map: Dict[LogicCondition, Condition] = cond_map | ||
self._loop_vars: list[Variable] = [] | ||
self._variables: list[Variable] = [] | ||
|
||
def get_variables(self) -> list[Variable]: | ||
"""Get collected variables.""" | ||
return self._variables | ||
|
||
def get_loop_variables(self) -> list[Variable]: | ||
"""Get collected variables used in loops.""" | ||
return self._loop_vars | ||
|
||
def visit_condition_node(self, node: ConditionNode): | ||
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: | ||
self._variables.extend(_get_containing_variables(expr)) | ||
|
||
def visit_loop_node(self, node: LoopNode): | ||
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: | ||
self._loop_vars.extend(_get_containing_variables(expr)) | ||
|
||
def visit_variable(self, expression: Variable): | ||
self._variables.append(expression) | ||
|
||
|
||
class NamingConvention(str, Enum): | ||
"""Enum for the currently available naming conventions.""" | ||
default = "default" | ||
system_hungarian = "system_hungarian" | ||
|
||
|
||
class RenamingScheme(ABC): | ||
"""Base class for different Renaming schemes.""" | ||
|
||
def __init__(self, task: DecompilerTask) -> None: | ||
"""Collets all needed variables for renaming + filters already renamed + function arguments out""" | ||
collector = VariableCollector(task._ast.condition_map) | ||
collector.visit_ast(task._ast) | ||
self._params: List[Variable] = task._function_parameters | ||
self._loop_vars : List[Variable] = collector.get_loop_variables() | ||
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables())) | ||
|
||
|
||
def _filter_variables(self, item: Variable) -> bool: | ||
"""Return False if variable is a parameter, renamed loop variable or GlobalVariable, else True""" | ||
if item in self._params or (item in self._loop_vars and item.name.find("var_") == -1) or isinstance(item, GlobalVariable): | ||
return False | ||
return True | ||
|
||
|
||
@abstractmethod | ||
def renameVariableNames(self): | ||
"""Abstract method which should rename variables with respect to the used scheme.""" | ||
pass | ||
|
||
|
||
class HungarianScheme(RenamingScheme): | ||
"""Class which renames variables into hungarian notation.""" | ||
|
||
type_prefix = { | ||
Float: {16: "h", 32: "f", 64: "d", 80: "ld", 128: "q", 256: "o"}, | ||
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"}, | ||
} | ||
|
||
|
||
def __init__(self, task: DecompilerTask) -> None: | ||
super().__init__(task) | ||
self._name = VariableNameGeneration.name | ||
self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var") | ||
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True) | ||
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="") | ||
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="") | ||
|
||
|
||
def renameVariableNames(self): | ||
"""Rename all collected variables to the hungarian notation.""" | ||
for var in self._variables: | ||
counter = _get_var_counter(var.name) | ||
var._name = self._hungarian_notation(var, counter if counter else "") | ||
|
||
|
||
def _hungarian_notation(self, var: Variable, counter: int) -> str: | ||
"""Return hungarian notation to a given variable.""" | ||
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self._var_name}{self._counter_separator}{counter}" | ||
|
||
|
||
def _hungarian_prefix(self, var_type: Type) -> str: | ||
"""Return hungarian prefix to a given variable type.""" | ||
if isinstance(var_type, Pointer): | ||
if self._pointer_base: | ||
return f"{self._hungarian_prefix(var_type.type)}p" | ||
return "p" | ||
if isinstance(var_type, CustomType): | ||
if var_type.is_boolean: | ||
return "b" | ||
elif var_type.size == 0: | ||
return "v" | ||
if isinstance(var_type, (Integer, Float)): | ||
sign = "" if var_type.is_signed else "u" | ||
prefix = self.type_prefix[type(var_type)][var_type.size] | ||
return f"{sign}{prefix}" | ||
|
||
|
||
class DefaultScheme(RenamingScheme): | ||
"""Class which renames variables into the default scheme.""" | ||
|
||
def __init__(self, task: DecompilerTask) -> None: | ||
super().__init__(task) | ||
|
||
|
||
def renameVariableNames(self): | ||
# Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters? | ||
pass | ||
|
||
|
||
class VariableNameGeneration(PipelineStage): | ||
""" | ||
Pipelinestage in charge of renaming variables to a configured format. | ||
Currently only the 'default' or 'hungarian' system are supported. | ||
""" | ||
|
||
name : str = "variable-name-generation" | ||
|
||
def __init__(self): | ||
self._notation: str = None | ||
|
||
|
||
def run(self, task: DecompilerTask): | ||
"""Rename variable names to the given scheme.""" | ||
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default") | ||
|
||
renamer: RenamingScheme = None | ||
|
||
match self._notation: | ||
case NamingConvention.default: | ||
renamer = DefaultScheme(task) | ||
case NamingConvention.system_hungarian: | ||
renamer = HungarianScheme(task) | ||
case _: | ||
return | ||
|
||
renamer.renameVariableNames() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
tests/pipeline/controlflowanalysis/test_variable_name_generation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from typing import List | ||
|
||
import pytest | ||
from decompiler.backend.codegenerator import CodeGenerator | ||
from decompiler.pipeline.controlflowanalysis import VariableNameGeneration | ||
from decompiler.structures.ast.ast_nodes import CodeNode | ||
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree | ||
from decompiler.structures.logic.logic_condition import LogicCondition | ||
from decompiler.structures.pseudo import Assignment, Constant, CustomType, Float, Integer, Pointer, Variable | ||
from decompiler.task import DecompilerTask | ||
from decompiler.util.decoration import DecoratedCode | ||
from decompiler.util.options import Options | ||
|
||
PIPELINE_NAME = VariableNameGeneration.name | ||
|
||
I8 = Integer.int8_t() | ||
I16 = Integer.int16_t() | ||
I32 = Integer.int32_t() | ||
I64 = Integer.int64_t() | ||
I128 = Integer.int128_t() | ||
UI8 = Integer.uint8_t() | ||
UI16 = Integer.uint16_t() | ||
UI32 = Integer.uint32_t() | ||
UI64 = Integer.uint64_t() | ||
UI128 = Integer.uint128_t() | ||
HALF = Float(16) | ||
FLOAT = Float.float() | ||
DOUBLE = Float.double() | ||
LONG_DOUBLE = Float(80) | ||
QUADRUPLE = Float(128) | ||
OCTUPLE = Float(256) | ||
BOOL = CustomType.bool() | ||
VOID = CustomType.void() | ||
|
||
ALL_TYPES = [I8, I16, I32, I64, I128, UI8, UI16, UI32, UI64, UI128, HALF, FLOAT, DOUBLE, LONG_DOUBLE, QUADRUPLE, OCTUPLE, BOOL, VOID] | ||
EXPECTED_BASE_NAMES = ["chVar0", "sVar1", "iVar2", "lVar3", "i128Var4", "uchVar5", "usVar6", "uiVar7", "ulVar8", "ui128Var9", "hVar10", | ||
"fVar11", "dVar12", "ldVar13", "qVar14", "oVar15", "bVar16", "vVar17"] | ||
EXPECTED_POINTER_NAMES = ["chpVar0", "spVar1", "ipVar2", "lpVar3", "i128pVar4", "uchpVar5", "uspVar6", "uipVar7", "ulpVar8", "ui128pVar9", | ||
"hpVar10", "fpVar11", "dpVar12", "ldpVar13", "qpVar14", "opVar15", "bpVar16", "vpVar17"] | ||
|
||
|
||
def _generate_options(notation: str = "system_hungarian", pointer_base: bool = True, type_sep: str = "", counter_sep: str = "") -> Options: | ||
options = Options() | ||
options.set(f"{PIPELINE_NAME}.notation", notation) | ||
options.set(f"{PIPELINE_NAME}.pointer_base", pointer_base) | ||
options.set(f"{PIPELINE_NAME}.type_separator", type_sep) | ||
options.set(f"{PIPELINE_NAME}.counter_separator", counter_sep) | ||
options.set(f"code-generator.max_complexity", 100) | ||
options.set("code-generator.use_increment_int", False) | ||
options.set("code-generator.use_increment_float", False) | ||
options.set("code-generator.use_compound_assignment", True) | ||
return options | ||
|
||
|
||
def _run_vng(ast: AbstractSyntaxTree, options: Options = _generate_options()): | ||
task = DecompilerTask("variable_name_generation", None, ast, options, VOID) | ||
VariableNameGeneration().run(task) | ||
DecoratedCode.print_code(CodeGenerator().generate([task])) | ||
|
||
|
||
def test_default_notation_1(): | ||
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) | ||
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) | ||
_run_vng(ast, _generate_options(notation="default")) | ||
assert var.name == "var_0" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"variable, name", | ||
[ | ||
(Variable("var_" + str(i), typ), EXPECTED_BASE_NAMES[i]) for i, typ in enumerate(ALL_TYPES) | ||
] + | ||
[ | ||
(Variable("var_" + str(i), Pointer(typ)), EXPECTED_POINTER_NAMES[i]) for i, typ in enumerate(ALL_TYPES) | ||
] | ||
, | ||
) | ||
def test_hungarian_notation(variable, name): | ||
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) | ||
ast = AbstractSyntaxTree(CodeNode([Assignment(variable, Constant(42))], true_value), {}) | ||
_run_vng(ast) | ||
assert variable.name == name | ||
|
||
|
||
@pytest.mark.parametrize("type_sep, counter_sep", [("", ""), ("_", "_")]) | ||
def test_hungarian_notation_separators(type_sep: str, counter_sep: str): | ||
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) | ||
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) | ||
_run_vng(ast, _generate_options(type_sep=type_sep, counter_sep=counter_sep)) | ||
assert var.name == f"i{type_sep}Var{counter_sep}0" |