From 8daf9a649a0974e1e83928c0f567db29ca534429 Mon Sep 17 00:00:00 2001 From: Rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 15 Aug 2024 10:20:26 +0200 Subject: [PATCH 1/6] Slight more performant way of updating set (#434) --- decompiler/pipeline/commons/expressionpropagationcommons.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/pipeline/commons/expressionpropagationcommons.py b/decompiler/pipeline/commons/expressionpropagationcommons.py index c9e95f4ff..2df7d00a3 100644 --- a/decompiler/pipeline/commons/expressionpropagationcommons.py +++ b/decompiler/pipeline/commons/expressionpropagationcommons.py @@ -363,7 +363,7 @@ def _get_dangerous_relations_between_definition_and_target(self, alias_variable: for basic_block in self._cfg: for instruction in basic_block: if isinstance(instruction, Relation) and instruction.destination.name == alias_variable.name: - relations |= {instruction} + relations.add(instruction) return relations From ecee86b38e13bd4529aac7b1c52e63667e7ad2bd Mon Sep 17 00:00:00 2001 From: Rihi <19492038+rihi@users.noreply.github.com> Date: Mon, 19 Aug 2024 11:15:55 +0200 Subject: [PATCH 2/6] Improve performance of expression propagation (#435) Making basicblock.__str__ return repr because its way cheaper --- decompiler/structures/graphs/basicblock.py | 6 ++++-- .../structures/graphs/restructuring_graph/transition_cfg.py | 2 +- tests/structures/graphs/test_basicblock.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/decompiler/structures/graphs/basicblock.py b/decompiler/structures/graphs/basicblock.py index 44e5aef0c..ec75062a1 100644 --- a/decompiler/structures/graphs/basicblock.py +++ b/decompiler/structures/graphs/basicblock.py @@ -43,8 +43,10 @@ def __iter__(self) -> Iterator[Instruction]: yield from self._instructions def __str__(self) -> str: - """Return a string representation of all instructions in the basic block.""" - return "\n".join((f"{instruction}" for instruction in self)) + """Return a string representation of the block""" + # Note: Returning a string representation of all instructions here can be pretty expensive. + # Because most code does not expect this, we choose to simply return the cheap repr instead. + return repr(self) def __repr__(self) -> str: """Return a debug representation of the block.""" diff --git a/decompiler/structures/graphs/restructuring_graph/transition_cfg.py b/decompiler/structures/graphs/restructuring_graph/transition_cfg.py index 7f95997e5..0271e9b36 100644 --- a/decompiler/structures/graphs/restructuring_graph/transition_cfg.py +++ b/decompiler/structures/graphs/restructuring_graph/transition_cfg.py @@ -29,7 +29,7 @@ def __init__(self, address: int, ast: AbstractSyntaxTreeNode): self.ast: AbstractSyntaxTreeNode = ast def __str__(self) -> str: - """Return a string representation of all instructions in the basic block.""" + """Return a string representation of the block""" return str(self.ast) def __repr__(self) -> str: diff --git a/tests/structures/graphs/test_basicblock.py b/tests/structures/graphs/test_basicblock.py index e278c521e..6ff7e8d8b 100644 --- a/tests/structures/graphs/test_basicblock.py +++ b/tests/structures/graphs/test_basicblock.py @@ -117,7 +117,7 @@ def test_instruction_management(testblock: BasicBlock): def test_block_representations(testblock: BasicBlock): - assert str(testblock) == "\n".join(str(instruction) for instruction in testblock) + assert str(testblock) == "BasicBlock(0x539, len=5)" assert repr(testblock) == "BasicBlock(0x539, len=5)" From 0e451e9c4ccd7b09545c473bac0258861e425753 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 19 Aug 2024 16:25:00 +0200 Subject: [PATCH 3/6] [Hungarian Notation] Variables with different names but same labels are renamed to the same variable. (#317) Co-authored-by: rihi <19492038+rihi@users.noreply.github.com> Co-authored-by: Spartak Ehrlich Co-authored-by: ebehner --- .../variable_name_generation.py | 259 ++++++++---------- decompiler/util/default.json | 2 +- .../test_variable_name_generation.py | 156 ++++++----- 3 files changed, 214 insertions(+), 203 deletions(-) diff --git a/decompiler/pipeline/controlflowanalysis/variable_name_generation.py b/decompiler/pipeline/controlflowanalysis/variable_name_generation.py index 10876adb1..39ca7162a 100644 --- a/decompiler/pipeline/controlflowanalysis/variable_name_generation.py +++ b/decompiler/pipeline/controlflowanalysis/variable_name_generation.py @@ -1,66 +1,26 @@ -import re +import logging +import string from abc import ABC, abstractmethod +from collections import defaultdict +from dataclasses import dataclass from enum import Enum -from typing import Dict, List, Optional +from typing import Counter, List from decompiler.pipeline.stage import PipelineStage -from decompiler.structures.ast.ast_nodes import ConditionNode, LoopNode -from decompiler.structures.logic.logic_condition import LogicCondition -from decompiler.structures.pseudo import Condition, CustomType, DataflowObject, Float, GlobalVariable, Integer, Pointer, Type, Variable +from decompiler.structures.pseudo import ArrayType, CustomType, Float, GlobalVariable, Integer, Pointer, Type, Variable from decompiler.structures.visitors.ast_dataflowobjectvisitor import BaseAstDataflowObjectVisitor +from decompiler.structures.visitors.substitute_visitor import SubstituteVisitor from decompiler.task import DecompilerTask -""" - Small explanation how variables work in the decompiler: - - sometimes they are the same object in different structures (assignments, loops etc.) - - sometimes they are real copies of another - ==> Therefore if we change a parameter of a variable (name), we have no guarantee that all usages of the variable will be updated - ==> Therefore we always collect EVERY variable used + check with a method (already_renamed) if we already renamed it to our new naming scheme -""" - - -def _get_var_counter(var_name: str) -> Optional[str]: - """Return the counter of a given variable name, if any is present.""" - if counter := re.match(r".*?([0-9]+)$", var_name): - return counter.group(1) - return None - - -def _get_containing_variables(dfo: DataflowObject) -> List[Variable]: - """Returns a list of variables contained in this dataflow object.""" - variables: List[Variable] = [] - for sub_exp in dfo.subexpressions(): - if isinstance(sub_exp, Variable): - variables.append(sub_exp) - return variables - class VariableCollector(BaseAstDataflowObjectVisitor): - """Visit relevant nodes and collect their variables.""" - - def __init__(self, cond_map: Dict[LogicCondition, Condition]): - self._cond_map: Dict[LogicCondition, Condition] = cond_map - self._loop_vars: list[Variable] = [] - self._variables: list[Variable] = [] - - def get_variables(self) -> list[Variable]: - """Get collected variables.""" - return self._variables + """Collect all variables in nodes/expressions""" - def get_loop_variables(self) -> list[Variable]: - """Get collected variables used in loops.""" - return self._loop_vars - - def visit_condition_node(self, node: ConditionNode): - for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: - self._variables.extend(_get_containing_variables(expr)) - - def visit_loop_node(self, node: LoopNode): - for expr in [self._cond_map[symbol] for symbol in node.condition.get_symbols()]: - self._loop_vars.extend(_get_containing_variables(expr)) + def __init__(self): + self.variables: list[Variable] = [] def visit_variable(self, expression: Variable): - self._variables.append(expression) + self.variables.append(expression) class NamingConvention(str, Enum): @@ -70,35 +30,28 @@ class NamingConvention(str, Enum): system_hungarian = "system_hungarian" -class RenamingScheme(ABC): - """Base class for different Renaming schemes.""" +@dataclass(frozen=True) +class VariableIdentifier: + name: str + ssa_label: int | None + + +def identifier(var: Variable) -> VariableIdentifier: + return VariableIdentifier(var.name, var.ssa_label) - def __init__(self, task: DecompilerTask) -> None: - """Collets all needed variables for renaming + filters already renamed + function arguments out""" - collector = VariableCollector(task.ast.condition_map) - collector.visit_ast(task.ast) - self._params: List[Variable] = task.function_parameters - self._loop_vars: List[Variable] = collector.get_loop_variables() - self._variables: List[Variable] = list(filter(self._filter_variables, collector.get_variables())) - - def _filter_variables(self, item: Variable) -> bool: - """Return False if variable is either a: - - parameter - - renamed loop variable - - GlobalVariable - """ - return ( - not item in self._params - and not (item in self._loop_vars and item.name.find("var_") == -1) - and not isinstance(item, GlobalVariable) - ) + +class RenamingScheme(ABC): @abstractmethod - def renameVariableNames(self): - """Abstract method which should rename variables with respect to the used scheme.""" + def rename_variable(self, variable: Variable) -> Variable | None: pass +class NoRenamingScheme(RenamingScheme): + def rename_variable(self, variable: Variable) -> Variable | None: + return None + + class HungarianScheme(RenamingScheme): """Class which renames variables into hungarian notation.""" @@ -107,62 +60,84 @@ class HungarianScheme(RenamingScheme): Integer: {8: "ch", 16: "s", 32: "i", 64: "l", 128: "i128"}, } - custom_var_names = {"tmp_": "Tmp", "loop_break": "LoopBreak"} - def __init__(self, task: DecompilerTask) -> None: - super().__init__(task) - self._name = VariableNameGeneration.name - self._var_name: str = task.options.getstring(f"{self._name}.variable_name", fallback="Var") - self._pointer_base: bool = task.options.getboolean(f"{self._name}.pointer_base", fallback=True) - self._type_separator: str = task.options.getstring(f"{self._name}.type_separator", fallback="") - self._counter_separator: str = task.options.getstring(f"{self._name}.counter_separator", fallback="") - - def renameVariableNames(self): - """Rename all collected variables to the hungarian notation.""" - for var in self._variables: - if self.alread_renamed(var._name): - continue - counter = _get_var_counter(var.name) - var._name = self._hungarian_notation(var, counter if counter else "") - - def _hungarian_notation(self, var: Variable, counter: int) -> str: - """Return hungarian notation to a given variable.""" - return f"{self._hungarian_prefix(var.type)}{self._type_separator}{self.custom_var_names.get(var._name.rstrip(counter), self._var_name)}{self._counter_separator}{counter}" - - def _hungarian_prefix(self, var_type: Type) -> str: + self._task = task + self._var_name: str = task.options.getstring(f"{VariableNameGeneration.name}.variable_name", fallback="var") + self._pointer_base: bool = task.options.getboolean(f"{VariableNameGeneration.name}.pointer_base", fallback=True) + self._type_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.type_separator", fallback="") + self._counter_separator: str = task.options.getstring(f"{VariableNameGeneration.name}.counter_separator", fallback="") + + self._variables = self._get_variables_to_rename() + + counter = Counter[tuple[str, str]]() + self._variable_rename_map: dict[VariableIdentifier, str] = {} + + variable_id: VariableIdentifier + vars: list[Variable] + for variable_id, vars in self._variables.items(): + # because the way our cfg works, each use site of each variable could theoretically have a different type + # we just take the first assuming that they are all the same... + var_type = Counter(vars).most_common()[0][0].type + name_identifier = self._get_name_identifier(variable_id.name) + prefix = self._hungarian_prefix(var_type) + + counter_postfix = f"{self._counter_separator}{counter[(name_identifier, prefix)]}" + counter[(name_identifier, prefix)] += 1 + + new_name: str = f"{prefix}{self._type_separator}{name_identifier.capitalize()}{counter_postfix}" + + self._variable_rename_map[variable_id] = new_name + + def rename_variable(self, variable: Variable) -> Variable | None: + new_name = self._variable_rename_map.get(identifier(variable)) + if new_name is None: + return None + else: + return variable.copy(name=new_name) + + def _get_name_identifier(self, name: str) -> str: + """Return identifier by purging non alpha chars + capitalize the char afterwards. If string is too short, return generic""" + if len(name) < 2: + return self._var_name + + x = string.capwords("".join([c if c.isalnum() else " " for c in name])) + x = x[0].lower() + x[1:] # important! We want to be able to choose later if the first letter should be capitalized + return "".join(filter(str.isalpha, x)) + + def _hungarian_prefix(self, var_type: Type) -> str | None: """Return hungarian prefix to a given variable type.""" - if isinstance(var_type, Pointer): - if self._pointer_base: - return f"{self._hungarian_prefix(var_type.type)}p" - return "p" - if isinstance(var_type, CustomType): - if var_type.is_boolean: - return "b" - elif var_type.size == 0: - return "v" - else: - return "" - if isinstance(var_type, (Integer, Float)): - sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else "" - prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk") - return f"{sign}{prefix}" - return "" - - def alread_renamed(self, name) -> bool: - """Return true if variable with custom name was already renamed, false otherwise""" - renamed_keys_words = [key for key in self.custom_var_names.values()] + ["unk", self._var_name] - return any(keyword in name for keyword in renamed_keys_words) - - -class DefaultScheme(RenamingScheme): - """Class which renames variables into the default scheme.""" - - def __init__(self, task: DecompilerTask) -> None: - super().__init__(task) - - def renameVariableNames(self): - # Maybe make the suboptions more generic, so that the default scheme can also be changed by some parameters? - pass + match var_type: + case Pointer(): + if self._pointer_base: + return f"{self._hungarian_prefix(var_type.type)}p" + else: + return "p" + case ArrayType(): + return f"arr{self._hungarian_prefix(var_type.type)}" + case CustomType(): + if var_type.is_boolean: + return "b" + if var_type.size == 0: + return "v" + case Integer() | Float(): + sign = "u" if isinstance(var_type, Integer) and not var_type.is_signed else "" + prefix = self.type_prefix[type(var_type)].get(var_type.size, "unk") + return f"{sign}{prefix}" + + return "unk" + + def _get_variables_to_rename(self) -> dict[VariableIdentifier, list[Variable]]: + collector = VariableCollector() + collector.visit_ast(self._task.ast) + + def include_variable(item: Variable): + return item not in self._task.function_parameters and not isinstance(item, GlobalVariable) + + variables: dict[VariableIdentifier, List[Variable]] = defaultdict(list) + for variable in collector.variables: + if include_variable(variable): + variables[identifier(variable)].append(variable) + return variables class VariableNameGeneration(PipelineStage): @@ -173,21 +148,29 @@ class VariableNameGeneration(PipelineStage): name: str = "variable-name-generation" - def __init__(self): - self._notation: str = None - def run(self, task: DecompilerTask): """Rename variable names to the given scheme.""" - self._notation = task.options.getstring(f"{self.name}.notation", fallback="default") + notation = task.options.getstring(f"{self.name}.notation", fallback=NamingConvention.default) - renamer: RenamingScheme = None - - match self._notation: + scheme: RenamingScheme + match notation: case NamingConvention.default: - renamer = DefaultScheme(task) + scheme = NoRenamingScheme() case NamingConvention.system_hungarian: - renamer = HungarianScheme(task) + scheme = HungarianScheme(task) case _: + logging.warning("Unknown naming convention: %s", notation) return - renamer.renameVariableNames() + self._rename_with_scheme(task, scheme) + + @staticmethod + def _rename_with_scheme(task: DecompilerTask, rename_scheme: RenamingScheme): + rename_visitor = SubstituteVisitor(lambda o: rename_scheme.rename_variable(o) if isinstance(o, Variable) else None) + + for node in task.ast.nodes: + for obj in node.get_dataflow_objets(task.ast.condition_map): + new_obj = rename_visitor.visit(obj) + if new_obj is not None: + # while this should not happen, in theory, there is nothing preventing this case... + logging.warning("Variable name renaming couldn't rename %s", new_obj) diff --git a/decompiler/util/default.json b/decompiler/util/default.json index e2eedadac..f99a95817 100644 --- a/decompiler/util/default.json +++ b/decompiler/util/default.json @@ -230,7 +230,7 @@ }, { "dest": "variable-name-generation.variable_name", - "default": "Var", + "default": "var", "title": "Variable Base Name for hungarian notation", "type": "string", "description": "", diff --git a/tests/pipeline/controlflowanalysis/test_variable_name_generation.py b/tests/pipeline/controlflowanalysis/test_variable_name_generation.py index dc98392b2..c29b29b68 100644 --- a/tests/pipeline/controlflowanalysis/test_variable_name_generation.py +++ b/tests/pipeline/controlflowanalysis/test_variable_name_generation.py @@ -1,8 +1,7 @@ import pytest from decompiler.backend.codegenerator import CodeGenerator from decompiler.pipeline.controlflowanalysis import VariableNameGeneration -from decompiler.structures.ast.ast_nodes import CodeNode -from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree +from decompiler.structures.ast.syntaxtree import AbstractSyntaxTree, CodeNode from decompiler.structures.logic.logic_condition import LogicCondition from decompiler.structures.pseudo import Assignment, Constant, CustomType, Float, Integer, Pointer, Variable from decompiler.task import DecompilerTask @@ -33,43 +32,43 @@ ALL_TYPES = [I8, I16, I32, I64, I128, UI8, UI16, UI32, UI64, UI128, HALF, FLOAT, DOUBLE, LONG_DOUBLE, QUADRUPLE, OCTUPLE, BOOL, VOID] EXPECTED_BASE_NAMES = [ "chVar0", - "sVar1", - "iVar2", - "lVar3", - "i128Var4", - "uchVar5", - "usVar6", - "uiVar7", - "ulVar8", - "ui128Var9", - "hVar10", - "fVar11", - "dVar12", - "ldVar13", - "qVar14", - "oVar15", - "bVar16", - "vVar17", + "sVar0", + "iVar0", + "lVar0", + "i128Var0", + "uchVar0", + "usVar0", + "uiVar0", + "ulVar0", + "ui128Var0", + "hVar0", + "fVar0", + "dVar0", + "ldVar0", + "qVar0", + "oVar0", + "bVar0", + "vVar0", ] EXPECTED_POINTER_NAMES = [ "chpVar0", - "spVar1", - "ipVar2", - "lpVar3", - "i128pVar4", - "uchpVar5", - "uspVar6", - "uipVar7", - "ulpVar8", - "ui128pVar9", - "hpVar10", - "fpVar11", - "dpVar12", - "ldpVar13", - "qpVar14", - "opVar15", - "bpVar16", - "vpVar17", + "spVar0", + "ipVar0", + "lpVar0", + "i128pVar0", + "uchpVar0", + "uspVar0", + "uipVar0", + "ulpVar0", + "ui128pVar0", + "hpVar0", + "fpVar0", + "dpVar0", + "ldpVar0", + "qpVar0", + "opVar0", + "bpVar0", + "vpVar0", ] @@ -79,6 +78,8 @@ def _generate_options(notation: str = "system_hungarian", pointer_base: bool = T options.set(f"{PIPELINE_NAME}.pointer_base", pointer_base) options.set(f"{PIPELINE_NAME}.type_separator", type_sep) options.set(f"{PIPELINE_NAME}.counter_separator", counter_sep) + options.set(f"{PIPELINE_NAME}.rename_while_loop_variables", True) + options.set(f"{PIPELINE_NAME}.for_loop_variable_names", ["i", "j", "k", "l", "m", "n"]) options.set(f"code-generator.max_complexity", 100) options.set("code-generator.use_increment_int", False) options.set("code-generator.use_increment_float", False) @@ -96,9 +97,9 @@ def _run_vng(ast: AbstractSyntaxTree, options: Options = _generate_options()): def test_default_notation_1(): true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) + ast = AbstractSyntaxTree(CodeNode([assignment := Assignment(Variable("var_0", I32), Constant(0))], true_value), {}) _run_vng(ast, _generate_options(notation="default")) - assert var.name == "var_0" + assert assignment.destination.name == "var_0" @pytest.mark.parametrize( @@ -108,52 +109,79 @@ def test_default_notation_1(): ) def test_hungarian_notation(variable, name): true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode([Assignment(variable, Constant(42))], true_value), {}) + ast = AbstractSyntaxTree(CodeNode([assignment := Assignment(variable, Constant(42))], true_value), {}) _run_vng(ast) - assert variable.name == name + assert assignment.destination.name == name -@pytest.mark.parametrize("type_sep, counter_sep", [("", ""), ("_", "_")]) +@pytest.mark.parametrize("type_sep, counter_sep", [("", ""), ("_", "_"), ("", "_"), ("_", "")]) def test_hungarian_notation_separators(type_sep: str, counter_sep: str): true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", I32), Constant(0)), true_value), {}) + ast = AbstractSyntaxTree(CodeNode([assignment := Assignment(Variable("var_0", I32), Constant(0))], true_value), {}) _run_vng(ast, _generate_options(type_sep=type_sep, counter_sep=counter_sep)) - assert var.name == f"i{type_sep}Var{counter_sep}0" + assert assignment.destination.name == f"i{type_sep}Var{counter_sep}0" def test_custom_type(): true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", CustomType("size_t", 64)), Constant(0)), true_value), {}) + ast = AbstractSyntaxTree(CodeNode([assignment := Assignment(Variable("var_0", CustomType("size_t", 64)), Constant(0))], true_value), {}) _run_vng(ast, _generate_options()) - assert var._name == "Var0" + assert assignment.destination.name == "unkVar0" def test_bninja_invalid_type(): true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("var_0", Integer(104, True)), Constant(0)), true_value), {}) + ast = AbstractSyntaxTree(CodeNode([assignment := Assignment(Variable("var_0", Integer(104, True)), Constant(0))], true_value), {}) _run_vng(ast, _generate_options()) - assert var._name == "unkVar0" - - -def test_tmp_variable(): - true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) - ast = AbstractSyntaxTree(CodeNode(Assignment(var := Variable("tmp_42", Float(64)), Constant(0)), true_value), {}) - _run_vng(ast, _generate_options()) - assert var._name == "dTmp42" + assert assignment.destination.name == "unkVar0" def test_same_variable(): """Variables can be copies of the same one. The renamer should only rename a variable once. (More times would destroy the actual name)""" - true_value = LogicCondition.initialize_true(LogicCondition.generate_new_context()) var1 = Variable("tmp_42", Float(64)) - var2 = Variable("var_0", Integer(104, True)) - ast = AbstractSyntaxTree( - CodeNode( - [Assignment(var1, Constant(0)), Assignment(var1, Constant(0)), Assignment(var2, Constant(0)), Assignment(var2, Constant(0))], - true_value, - ), - {}, + node = CodeNode( + [Assignment(var1, Constant(0)), Assignment(var1, Constant(0))], + LogicCondition.initialize_true(LogicCondition.generate_new_context()), + ) + ast = AbstractSyntaxTree(node, {}) + _run_vng(ast, _generate_options()) + assert node.instructions[0].destination.name == "dTmp0" + assert node.instructions[1].destination.name == "dTmp0" + + +def test_same_variable_idx(): + """Variables with the same counter should not be renamed into the same thing""" + var1 = Variable("x_1", Integer.int32_t()) + var2 = Variable("y_1", Integer.int32_t()) + node = CodeNode( + [Assignment(var1, Constant(0)), Assignment(var2, Constant(0))], + LogicCondition.initialize_true(LogicCondition.generate_new_context()), + ) + ast = AbstractSyntaxTree(node, {}) + _run_vng(ast, _generate_options()) + assert node.instructions[0].destination.name != node.instructions[1].destination.name + + +def test_different_custom_names_0(): + node = CodeNode( + [ + Assignment(Variable("tmp_42", Float(64)), Constant(0)), + Assignment(Variable("entry_", Float(64)), Constant(0)), + Assignment(Variable("exit_", Float(64)), Constant(0)), + ], + LogicCondition.initialize_true(LogicCondition.generate_new_context()), + ) + ast = AbstractSyntaxTree(node, {}) + _run_vng(ast, _generate_options()) + assert node.instructions[0].destination.name == "dTmp0" + assert node.instructions[1].destination.name == "dEntry0" + assert node.instructions[2].destination.name == "dExit0" + + +def test_different_custom_names_1(): + node = CodeNode( + [Assignment(Variable("loop_break", Float(64)), Constant(0))], LogicCondition.initialize_true(LogicCondition.generate_new_context()) ) + ast = AbstractSyntaxTree(node, {}) _run_vng(ast, _generate_options()) - assert var1._name == "dTmp42" - assert var2._name == "unkVar0" + assert node.instructions[0].destination.name == "dLoopbreak0" From 0afec524746c6ac2b2cc57d9f6a1b13cecc85e1e Mon Sep 17 00:00:00 2001 From: Rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:57:04 +0200 Subject: [PATCH 4/6] Calculate block def site of variables once (#430) --- decompiler/pipeline/preprocessing/phi_predecessors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/decompiler/pipeline/preprocessing/phi_predecessors.py b/decompiler/pipeline/preprocessing/phi_predecessors.py index b924efdc1..1e617d14d 100644 --- a/decompiler/pipeline/preprocessing/phi_predecessors.py +++ b/decompiler/pipeline/preprocessing/phi_predecessors.py @@ -27,6 +27,7 @@ def run(self, task: DecompilerTask): self.cfg = task.graph self.head = task.graph.root self._def_map, self._use_map = _init_maps(self.cfg) + self._basic_block_of_definition = _init_basicblocks_of_definition(self.cfg) self.extend_phi_functions() def extend_phi_functions(self): @@ -77,10 +78,9 @@ def _basicblocks_of_used_variables_in_phi_function( each key is the variable that is defined at the node """ variable_definition_nodes: Dict[BasicBlock, Variable] = dict() - basic_block_of_definition = _init_basicblocks_of_definition(self.cfg) for variable in used_variables: if self._def_map.get(variable): - node_with_variable_definition = basic_block_of_definition[variable] + node_with_variable_definition = self._basic_block_of_definition[variable] else: node_with_variable_definition = None if is_head else self.head if node_with_variable_definition not in variable_definition_nodes.keys(): From d1da88646a49938a90ffda6fdff4b3e84209dbf1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 08:16:34 +0000 Subject: [PATCH 5/6] [IndexError@globals.py:301] IndexError: list index out of range (#433) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Spartak Ehrlich --- decompiler/frontend/binaryninja/handlers/globals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/handlers/globals.py b/decompiler/frontend/binaryninja/handlers/globals.py index 076610109..a3b71256b 100644 --- a/decompiler/frontend/binaryninja/handlers/globals.py +++ b/decompiler/frontend/binaryninja/handlers/globals.py @@ -344,7 +344,7 @@ def _get_string_at(view: BinaryView, addr: int, width: int) -> Optional[str]: def addr_in_section(view: BinaryView, addr: int) -> bool: """Returns True if address is contained in a section, False otherwise""" for _, section in view.sections.items(): - if addr >= section.start and addr <= section.end: + if addr >= section.start and addr < section.end: return True return False From af2365531cc593167d01723b2b53e0ca6db9c4a7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 08:47:43 +0000 Subject: [PATCH 6/6] [ctypes.ArgumentError@_binaryninjacore.py:18869] ctypes.ArgumentError: argument 2: TypeError: wrong type (#420) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Spartak Ehrlich Co-authored-by: NeoQuix <83972469+NeoQuix@users.noreply.github.com> --- decompiler/frontend/binaryninja/handlers/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/decompiler/frontend/binaryninja/handlers/constants.py b/decompiler/frontend/binaryninja/handlers/constants.py index e7e5dffdd..25c897889 100644 --- a/decompiler/frontend/binaryninja/handlers/constants.py +++ b/decompiler/frontend/binaryninja/handlers/constants.py @@ -40,7 +40,7 @@ def lift_constant(self, constant: mediumlevelil.MediumLevelILConst, **kwargs): """Lift the given constant value.""" if constant.constant in [math.inf, -math.inf, math.nan]: return NotUseableConstant(str(constant.constant)) - if addr_in_section(constant.function.view, constant.constant): + if isinstance(constant.constant, int) and addr_in_section(constant.function.view, constant.constant): return self.lift_constant_pointer(constant) return Constant(constant.constant, vartype=self._lifter.lift(constant.expr_type))