Skip to content

Commit

Permalink
Merge branch 'main' into issue-432-_IndexError_globals_py_301_IndexEr…
Browse files Browse the repository at this point in the history
…ror_list_index_out_of_range
  • Loading branch information
steffenenders authored Aug 22, 2024
2 parents 88c3a7b + 0afec52 commit 60ca62b
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 210 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def _get_dangerous_relations_between_definition_and_target(self, alias_variable:
for basic_block in self._cfg:
for instruction in basic_block:
if isinstance(instruction, Relation) and instruction.destination.name == alias_variable.name:
relations |= {instruction}
relations.add(instruction)

return relations

Expand Down
259 changes: 121 additions & 138 deletions decompiler/pipeline/controlflowanalysis/variable_name_generation.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,26 @@
import re
import logging
import string
from abc import ABC, abstractmethod
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional
from typing import Counter, List

from decompiler.pipeline.stage import PipelineStage
from decompiler.structures.ast.ast_nodes import ConditionNode, LoopNode
from decompiler.structures.logic.logic_condition import LogicCondition
from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.pseudo import ArrayType, CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable
from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor
from decompiler.structures.visitors.substitute_visitor import SubstituteVisitor
from decompiler.task import DecompilerTask

"""
Small explanation how variables work in the decompiler:
- sometimes they are the same object in different structures (assignments, loops etc.)
- sometimes they are real copies of another
==> Therefore if we change a parameter of a variable (name), we have no guarantee that all usages of the variable will be updated
==> Therefore we always collect EVERY variable used + check with a method (already_renamed) if we already renamed it to our new naming scheme
"""


def _get_var_counter(var_name: str) -> Optional[str]:
"""Return the counter of a given variable name, if any is present."""
if counter := re.match(r".*?([0-9]+)$", var_name):
return counter.group(1)
return None


def _get_containing_variables(dfo: DataflowObject) -> List[Variable]:
"""Returns a list of variables contained in this dataflow object."""
variables: List[Variable] = []
for sub_exp in dfo.subexpressions():
if isinstance(sub_exp, Variable):
variables.append(sub_exp)
return variables


class VariableCollector(BaseAstDataflowObjectVisitor):
"""Visit relevant nodes and collect their variables."""

def __init__(self, cond_map: Dict[LogicCondition, Condition]):
self._cond_map: Dict[LogicCondition, Condition] = cond_map
self._loop_vars: list[Variable] = []
self._variables: list[Variable] = []

def get_variables(self) -> list[Variable]:
"""Get collected variables."""
return self._variables
"""Collect all variables in nodes/expressions"""

def get_loop_variables(self) -> list[Variable]:
"""Get collected variables used in loops."""
return self._loop_vars

def visit_condition_node(self, node: ConditionNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._variables.extend(_get_containing_variables(expr))

def visit_loop_node(self, node: LoopNode):
for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]:
self._loop_vars.extend(_get_containing_variables(expr))
def __init__(self):
self.variables: list[Variable] = []

def visit_variable(self, expression: Variable):
self._variables.append(expression)
self.variables.append(expression)


class NamingConvention(str, Enum):
Expand All @@ -70,35 +30,28 @@ class NamingConvention(str, Enum):
system_hungarian = "system_hungarian"


class RenamingScheme(ABC):
"""Base class for different Renaming schemes."""
@dataclass(frozen=True)
class VariableIdentifier:
name: str
ssa_label: int | None


def identifier(var: Variable) -> VariableIdentifier:
return VariableIdentifier(var.name, var.ssa_label)

def __init__(self, task: DecompilerTask) -> None:
"""Collets all needed variables for renaming + filters already renamed + function arguments out"""
collector = VariableCollector(task.ast.condition_map)
collector.visit_ast(task.ast)
self._params: List[Variable] = task.function_parameters
self._loop_vars: List[Variable] = collector.get_loop_variables()
self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables()))

def _filter_variables(self, item: Variable) -> bool:
"""Return False if variable is either a:
- parameter
- renamed loop variable
- GlobalVariable
"""
return (
not item in self._params
and not (item in self._loop_vars and item.name.find("var_") == -1)
and not isinstance(item, GlobalVariable)
)

class RenamingScheme(ABC):

@abstractmethod
def renameVariableNames(self):
"""Abstract method which should rename variables with respect to the used scheme."""
def rename_variable(self, variable: Variable) -> Variable | None:
pass


class NoRenamingScheme(RenamingScheme):
def rename_variable(self, variable: Variable) -> Variable | None:
return None


class HungarianScheme(RenamingScheme):
"""Class which renames variables into hungarian notation."""

Expand All @@ -107,62 +60,84 @@ class HungarianScheme(RenamingScheme):
Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"},
}

custom_var_names = {"tmp_": "Tmp", "loop_break": "LoopBreak"}

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)
self._name = VariableNameGeneration.name
self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var")
self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="")

def renameVariableNames(self):
"""Rename all collected variables to the hungarian notation."""
for var in self._variables:
if self.alread_renamed(var._name):
continue
counter = _get_var_counter(var.name)
var._name = self._hungarian_notation(var, counter if counter else "")

def _hungarian_notation(self, var: Variable, counter: int) -> str:
"""Return hungarian notation to a given variable."""
return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self.custom_var_names.get(var._name.rstrip(counter), self._var_name)}{self._counter_separator}{counter}"

def _hungarian_prefix(self, var_type: Type) -> str:
self._task = task
self._var_name: str = task.options.getstring(f"{VariableNameGeneration.name}.variable_name", fallback="var")
self._pointer_base: bool = task.options.getboolean(f"{VariableNameGeneration.name}.pointer_base", fallback=True)
self._type_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.type_separator", fallback="")
self._counter_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.counter_separator", fallback="")

self._variables = self._get_variables_to_rename()

counter = Counter[tuple[str, str]]()
self._variable_rename_map: dict[VariableIdentifier, str] = {}

variable_id: VariableIdentifier
vars: list[Variable]
for variable_id, vars in self._variables.items():
# because the way our cfg works, each use site of each variable could theoretically have a different type
# we just take the first assuming that they are all the same...
var_type = Counter(vars).most_common()[0][0].type
name_identifier = self._get_name_identifier(variable_id.name)
prefix = self._hungarian_prefix(var_type)

counter_postfix = f"{self._counter_separator}{counter[(name_identifier, prefix)]}"
counter[(name_identifier, prefix)] += 1

new_name: str = f"{prefix}{self._type_separator}{name_identifier.capitalize()}{counter_postfix}"

self._variable_rename_map[variable_id] = new_name

def rename_variable(self, variable: Variable) -> Variable | None:
new_name = self._variable_rename_map.get(identifier(variable))
if new_name is None:
return None
else:
return variable.copy(name=new_name)

def _get_name_identifier(self, name: str) -> str:
"""Return identifier by purging non alpha chars + capitalize the char afterwards. If string is too short, return generic"""
if len(name) < 2:
return self._var_name

x = string.capwords("".join([c if c.isalnum() else " " for c in name]))
x = x[0].lower() + x[1:] # important! We want to be able to choose later if the first letter should be capitalized
return "".join(filter(str.isalpha, x))

def _hungarian_prefix(self, var_type: Type) -> str | None:
"""Return hungarian prefix to a given variable type."""
if isinstance(var_type, Pointer):
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
return "p"
if isinstance(var_type, CustomType):
if var_type.is_boolean:
return "b"
elif var_type.size == 0:
return "v"
else:
return ""
if isinstance(var_type, (Integer, Float)):
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"
return ""

def alread_renamed(self, name) -> bool:
"""Return true if variable with custom name was already renamed, false otherwise"""
renamed_keys_words = [key for key in self.custom_var_names.values()] + ["unk", self._var_name]
return any(keyword in name for keyword in renamed_keys_words)


class DefaultScheme(RenamingScheme):
"""Class which renames variables into the default scheme."""

def __init__(self, task: DecompilerTask) -> None:
super().__init__(task)

def renameVariableNames(self):
# Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters?
pass
match var_type:
case Pointer():
if self._pointer_base:
return f"{self._hungarian_prefix(var_type.type)}p"
else:
return "p"
case ArrayType():
return f"arr{self._hungarian_prefix(var_type.type)}"
case CustomType():
if var_type.is_boolean:
return "b"
if var_type.size == 0:
return "v"
case Integer() | Float():
sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else ""
prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk")
return f"{sign}{prefix}"

return "unk"

def _get_variables_to_rename(self) -> dict[VariableIdentifier, list[Variable]]:
collector = VariableCollector()
collector.visit_ast(self._task.ast)

def include_variable(item: Variable):
return item not in self._task.function_parameters and not isinstance(item, GlobalVariable)

variables: dict[VariableIdentifier, List[Variable]] = defaultdict(list)
for variable in collector.variables:
if include_variable(variable):
variables[identifier(variable)].append(variable)
return variables


class VariableNameGeneration(PipelineStage):
Expand All @@ -173,21 +148,29 @@ class VariableNameGeneration(PipelineStage):

name: str = "variable-name-generation"

def __init__(self):
self._notation: str = None

def run(self, task: DecompilerTask):
"""Rename variable names to the given scheme."""
self._notation = task.options.getstring(f"{self.name}.notation", fallback="default")
notation = task.options.getstring(f"{self.name}.notation", fallback=NamingConvention.default)

renamer: RenamingScheme = None

match self._notation:
scheme: RenamingScheme
match notation:
case NamingConvention.default:
renamer = DefaultScheme(task)
scheme = NoRenamingScheme()
case NamingConvention.system_hungarian:
renamer = HungarianScheme(task)
scheme = HungarianScheme(task)
case _:
logging.warning("Unknown naming convention: %s", notation)
return

renamer.renameVariableNames()
self._rename_with_scheme(task, scheme)

@staticmethod
def _rename_with_scheme(task: DecompilerTask, rename_scheme: RenamingScheme):
rename_visitor = SubstituteVisitor(lambda o: rename_scheme.rename_variable(o) if isinstance(o, Variable) else None)

for node in task.ast.nodes:
for obj in node.get_dataflow_objets(task.ast.condition_map):
new_obj = rename_visitor.visit(obj)
if new_obj is not None:
# while this should not happen, in theory, there is nothing preventing this case...
logging.warning("Variable name renaming couldn't rename %s", new_obj)
4 changes: 2 additions & 2 deletions decompiler/pipeline/preprocessing/phi_predecessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def run(self, task: DecompilerTask):
self.cfg = task.graph
self.head = task.graph.root
self._def_map, self._use_map = _init_maps(self.cfg)
self._basic_block_of_definition = _init_basicblocks_of_definition(self.cfg)
self.extend_phi_functions()

def extend_phi_functions(self):
Expand Down Expand Up @@ -77,10 +78,9 @@ def _basicblocks_of_used_variables_in_phi_function(
each key is the variable that is defined at the node
"""
variable_definition_nodes: Dict[BasicBlock, Variable] = dict()
basic_block_of_definition = _init_basicblocks_of_definition(self.cfg)
for variable in used_variables:
if self._def_map.get(variable):
node_with_variable_definition = basic_block_of_definition[variable]
node_with_variable_definition = self._basic_block_of_definition[variable]
else:
node_with_variable_definition = None if is_head else self.head
if node_with_variable_definition not in variable_definition_nodes.keys():
Expand Down
6 changes: 4 additions & 2 deletions decompiler/structures/graphs/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ def __iter__(self) -> Iterator[Instruction]:
yield from self._instructions

def __str__(self) -> str:
"""Return a string representation of all instructions in the basic block."""
return "\n".join((f"{instruction}" for instruction in self))
"""Return a string representation of the block"""
# Note: Returning a string representation of all instructions here can be pretty expensive.
# Because most code does not expect this, we choose to simply return the cheap repr instead.
return repr(self)

def __repr__(self) -> str:
"""Return a debug representation of the block."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(self, address: int, ast: AbstractSyntaxTreeNode):
self.ast: AbstractSyntaxTreeNode = ast

def __str__(self) -> str:
"""Return a string representation of all instructions in the basic block."""
"""Return a string representation of the block"""
return str(self.ast)

def __repr__(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion decompiler/util/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@
},
{
"dest": "variable-name-generation.variable_name",
"default": "Var",
"default": "var",
"title": "Variable Base Name for hungarian notation",
"type": "string",
"description": "",
Expand Down
Loading

0 comments on commit 60ca62b

Please sign in to comment.