Skip to content

Commit

Permalink
Hungarian notation (#40)
Browse files Browse the repository at this point in the history
* add simple VariableNameGenerator
add config

* add options
add pipeline
add test (wip)

* add vng tests
fix vng
revert loglevel

* fix test

* ignore for loop counter

* fix typo in test

* Fix imports, Fix basic type errors

* Func params + small fixes

* Legacy calls adjustments + tests fixes

* Global var, renamed counter fix

* ...or maybe just turn off the default config :-)

* Params, GVars, Counter/Index fixes

* Better titles for hungarian renaming

* More generic renaming + more docs

* Refactor tests

* Use visitor

* Init order

* purge visitor

* remove debug list

---------

Co-authored-by: Felix Prahl-Kamps <[email protected]>
Co-authored-by: Steffen Enders <[email protected]>
Co-authored-by: Spartak Ehrlich <[email protected]>
Co-authored-by: Spartak Ehrlich <[email protected]>
  • Loading branch information
5 people authored Apr 24, 2023
1 parent bdb0eac commit fe30536
Show file tree
Hide file tree
Showing 5 changed files with 330 additions and 3 deletions.
1 change: 1 addition & 0 deletions decompiler/pipeline/controlflowanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .expression_simplification import ExpressionSimplification
from .instruction_length_handler import InstructionLengthHandler
from .readability_based_refinement import ReadabilityBasedRefinement
from .variable_name_generation import VariableNameGeneration
175 changes: 175 additions & 0 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import re
from abc import ABC, abstractmethod
from enum import Enum
from typing import Dict, List, Optional

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import CaseNode, CodeNode, ConditionNode, LoopNode, SwitchNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.task import DecompilerTask


def _get_var_counter(var_name: str) -> Optional[str]:
"""Return the counter of a given variable name, if any is present."""
if counter := re.match(r".*?([0-9]+)$", var_name):
return counter.group(1)
return None


def _get_containing_variables(dfo: DataflowObject) -> List[Variable]:
"""Returns a list of variables contained in this dataflow object."""
variables: List[Variable] = []
for sub_exp in dfo.subexpressions():
if isinstance(sub_exp, Variable):
variables.append(sub_exp)
return variables


class VariableCollector(BaseAstDataflowObjectVisitor):
"""Visit relevant nodes and collect their variables."""

def __init__(self, cond_map: Dict[LogicCondition, Condition]):
self._cond_map: Dict[LogicCondition, Condition] = cond_map
self._loop_vars: list[Variable] = []
self._variables: list[Variable] = []

def get_variables(self) -> list[Variable]:
"""Get collected variables."""
return self._variables

def get_loop_variables(self) -> list[Variable]:
"""Get collected variables used in loops."""
return self._loop_vars

def visit_condition_node(self, node: ConditionNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._variables.extend(_get_containing_variables(expr))

def visit_loop_node(self, node: LoopNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._loop_vars.extend(_get_containing_variables(expr))

def visit_variable(self, expression: Variable):
self._variables.append(expression)


class NamingConvention(str, Enum):
"""Enum for the currently available naming conventions."""
default = "default"
system_hungarian = "system_hungarian"


class RenamingScheme(ABC):
"""Base class for different Renaming schemes."""

def __init__(self, task: DecompilerTask) -> None:
"""Collets all needed variables for renaming + filters already renamed + function arguments out"""
collector = VariableCollector(task._ast.condition_map)
collector.visit_ast(task._ast)
self._params: List[Variable] = task._function_parameters
self._loop_vars : List[Variable] = collector.get_loop_variables()
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables()))


def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is a parameter, renamed loop variable or GlobalVariable, else True"""
if item in self._params or (item in self._loop_vars and item.name.find("var_") == -1) or isinstance(item, GlobalVariable):
return False
return True


@abstractmethod
def renameVariableNames(self):
"""Abstract method which should rename variables with respect to the used scheme."""
pass


class HungarianScheme(RenamingScheme):
"""Class which renames variables into hungarian notation."""

type_prefix = {
Float: {16: "h", 32: "f", 64: "d", 80: "ld", 128: "q", 256: "o"},
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}


def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
self._name = VariableNameGeneration.name
self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var")
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="")


def renameVariableNames(self):
"""Rename all collected variables to the hungarian notation."""
for var in self._variables:
counter = _get_var_counter(var.name)
var._name = self._hungarian_notation(var, counter if counter else "")


def _hungarian_notation(self, var: Variable, counter: int) -> str:
"""Return hungarian notation to a given variable."""
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self._var_name}{self._counter_separator}{counter}"


def _hungarian_prefix(self, var_type: Type) -> str:
"""Return hungarian prefix to a given variable type."""
if isinstance(var_type, Pointer):
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
return "p"
if isinstance(var_type, CustomType):
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
if isinstance(var_type, (Integer, Float)):
sign = "" if var_type.is_signed else "u"
prefix = self.type_prefix[type(var_type)][var_type.size]
return f"{sign}{prefix}"


class DefaultScheme(RenamingScheme):
"""Class which renames variables into the default scheme."""

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)


def renameVariableNames(self):
# Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters?
pass


class VariableNameGeneration(PipelineStage):
"""
Pipelinestage in charge of renaming variables to a configured format.
Currently only the 'default' or 'hungarian' system are supported.
"""

name : str = "variable-name-generation"

def __init__(self):
self._notation: str = None


def run(self, task: DecompilerTask):
"""Rename variable names to the given scheme."""
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default")

renamer: RenamingScheme = None

match self._notation:
case NamingConvention.default:
renamer = DefaultScheme(task)
case NamingConvention.system_hungarian:
renamer = HungarianScheme(task)
case _:
return

renamer.renameVariableNames()
9 changes: 7 additions & 2 deletions decompiler/pipeline/default.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Module defining the available pipelines."""

from decompiler.pipeline.controlflowanalysis import ExpressionSimplification, InstructionLengthHandler, ReadabilityBasedRefinement
from decompiler.pipeline.controlflowanalysis import (
ExpressionSimplification,
InstructionLengthHandler,
ReadabilityBasedRefinement,
VariableNameGeneration,
)
from decompiler.pipeline.dataflowanalysis import (
ArrayAccessDetection,
CommonSubexpressionElimination,
Expand Down Expand Up @@ -36,4 +41,4 @@
EdgePruner,
]

AST_STAGES = [ReadabilityBasedRefinement, ExpressionSimplification, InstructionLengthHandler]
AST_STAGES = [ReadabilityBasedRefinement, ExpressionSimplification, InstructionLengthHandler, VariableNameGeneration]
58 changes: 57 additions & 1 deletion decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,61 @@
"is_hidden_from_cli": false,
"argument_name": "--rename-while-loop-variables"
},
{
"dest": "variable-name-generation.notation",
"default": "system_hungarian",
"title": "Variable Naming Convention",
"type": "string",
"enum": ["default", "system_hungarian"],
"enumDescriptions": [
"Default setting (e.g. int var_0).",
"System Hungarian (e.g. int iVar0; float fVar0)"
],
"description": "Selects the naming convention for variable names.",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-notation"
},
{
"dest": "variable-name-generation.variable_name",
"default": "Var",
"title": "Variable Base Name for hungarian notation",
"type": "string",
"description": "",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-variable-name"
},
{
"dest": "variable-name-generation.pointer_base",
"default": true,
"title": "Pointer base type prefix for hungarian notation",
"type": "boolean",
"description": "Prefix the basetype of a pointer (e.g. int * piVar0)",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-pointer-base"
},
{
"dest": "variable-name-generation.type_separator",
"default": "",
"title": "Type Separator for hungarian notation",
"type": "string",
"description": "How to separate the prefix from the variable name (e.g. '_' -> i_Var0)",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-type-separator"
},
{
"dest": "variable-name-generation.counter_separator",
"default": "",
"title": "Counter Separator for hungarian notation",
"type": "string",
"description": "How to separate the variable name from its counter (e.g. '_' -> iVar_0)",
"is_hidden_from_gui": false,
"is_hidden_from_cli": false,
"argument_name": "--variable-generation-counter-separator"
},
{
"dest": "code-generator.max_complexity",
"default": 100,
Expand Down Expand Up @@ -546,7 +601,8 @@
"default": [
"readability-based-refinement",
"expression-simplification",
"instruction-length-handler"
"instruction-length-handler",
"variable-name-generation"
],
"title": "AST pipeline stages",
"type": "array",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import List

import pytest
from decompiler.backend.codegenerator import CodeGenerator
from decompiler.pipeline.controlflowanalysis import VariableNameGeneration
from decompiler.structures.ast.ast_nodes import CodeNode
from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Assignment, Constant, CustomType, Float, Integer, Pointer, Variable
from decompiler.task import DecompilerTask
from decompiler.util.decoration import DecoratedCode
from decompiler.util.options import Options

PIPELINE_NAME = VariableNameGeneration.name

I8 = Integer.int8_t()
I16 = Integer.int16_t()
I32 = Integer.int32_t()
I64 = Integer.int64_t()
I128 = Integer.int128_t()
UI8 = Integer.uint8_t()
UI16 = Integer.uint16_t()
UI32 = Integer.uint32_t()
UI64 = Integer.uint64_t()
UI128 = Integer.uint128_t()
HALF = Float(16)
FLOAT = Float.float()
DOUBLE = Float.double()
LONG_DOUBLE = Float(80)
QUADRUPLE = Float(128)
OCTUPLE = Float(256)
BOOL = CustomType.bool()
VOID = CustomType.void()

ALL_TYPES = [I8, I16, I32, I64, I128, UI8, UI16, UI32, UI64, UI128, HALF, FLOAT, DOUBLE, LONG_DOUBLE, QUADRUPLE, OCTUPLE, BOOL, VOID]
EXPECTED_BASE_NAMES = ["chVar0", "sVar1", "iVar2", "lVar3", "i128Var4", "uchVar5", "usVar6", "uiVar7", "ulVar8", "ui128Var9", "hVar10",
"fVar11", "dVar12", "ldVar13", "qVar14", "oVar15", "bVar16", "vVar17"]
EXPECTED_POINTER_NAMES = ["chpVar0", "spVar1", "ipVar2", "lpVar3", "i128pVar4", "uchpVar5", "uspVar6", "uipVar7", "ulpVar8", "ui128pVar9",
"hpVar10", "fpVar11", "dpVar12", "ldpVar13", "qpVar14", "opVar15", "bpVar16", "vpVar17"]


def _generate_options(notation: str = "system_hungarian", pointer_base: bool = True, type_sep: str = "", counter_sep: str = "") -> Options:
options = Options()
options.set(f"{PIPELINE_NAME}.notation", notation)
options.set(f"{PIPELINE_NAME}.pointer_base", pointer_base)
options.set(f"{PIPELINE_NAME}.type_separator", type_sep)
options.set(f"{PIPELINE_NAME}.counter_separator", counter_sep)
options.set(f"code-generator.max_complexity", 100)
options.set("code-generator.use_increment_int", False)
options.set("code-generator.use_increment_float", False)
options.set("code-generator.use_compound_assignment", True)
return options


def _run_vng(ast: AbstractSyntaxTree, options: Options = _generate_options()):
task = DecompilerTask("variable_name_generation", None, ast, options, VOID)
VariableNameGeneration().run(task)
DecoratedCode.print_code(CodeGenerator().generate([task]))


def test_default_notation_1():
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {})
_run_vng(ast, _generate_options(notation="default"))
assert var.name == "var_0"


@pytest.mark.parametrize(
"variable, name",
[
(Variable("var_" + str(i), typ), EXPECTED_BASE_NAMES[i]) for i, typ in enumerate(ALL_TYPES)
] +
[
(Variable("var_" + str(i), Pointer(typ)), EXPECTED_POINTER_NAMES[i]) for i, typ in enumerate(ALL_TYPES)
]
,
)
def test_hungarian_notation(variable, name):
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode([Assignment(variable, Constant(42))], true_value), {})
_run_vng(ast)
assert variable.name == name


@pytest.mark.parametrize("type_sep, counter_sep", [("", ""), ("_", "_")])
def test_hungarian_notation_separators(type_sep: str, counter_sep: str):
true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context())
ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {})
_run_vng(ast, _generate_options(type_sep=type_sep, counter_sep=counter_sep))
assert var.name == f"i{type_sep}Var{counter_sep}0"

0 comments on commit fe30536

Please sign in to comment.