From a5041f75af47b68c06ab3fb09fb9fae12311c2c7 Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Wed, 29 May 2024 20:51:37 +0200 Subject: [PATCH 1/2] Explicit hash/eqs --- decompiler/structures/pseudo/expressions.py | 78 +++++++++++++++++--- decompiler/structures/pseudo/instructions.py | 60 +++++++++++++++ decompiler/structures/pseudo/operations.py | 51 ++++++++++++- 3 files changed, 176 insertions(+), 13 deletions(-) diff --git a/decompiler/structures/pseudo/expressions.py b/decompiler/structures/pseudo/expressions.py index 5539dfc14..b30241d03 100644 --- a/decompiler/structures/pseudo/expressions.py +++ b/decompiler/structures/pseudo/expressions.py @@ -58,18 +58,6 @@ class DataflowObject(ABC): def __init__(self, tags: Optional[Tuple[Tag, ...]] = None): self.tags = tags - def __eq__(self, other) -> bool: - """Check for equality.""" - return type(other) == type(self) and hash(self) == hash(other) - - def __hash__(self) -> int: - """Return a hash value for the expression.""" - return hash(repr(self)) - - def __repr__(self): - """Return a debug representation.""" - return str(self) - @abstractmethod def __iter__(self) -> Iterator[DataflowObject]: """Iterate all nested DataflowObjects.""" @@ -149,6 +137,12 @@ def __init__(self, msg: str, tags: Optional[Tuple[Tag, ...]] = None): self.msg = msg super().__init__(tags) + def __eq__(self, __value): + return isinstance(__value, UnknownExpression) and self.msg == __value.msg + + def __hash__(self): + return hash(self.msg) + def __str__(self) -> str: """Return the error message as string representation.""" return self.msg @@ -183,6 +177,12 @@ def __init__( self._pointee = pointee super().__init__(tags) + def __eq__(self, __value): + return isinstance(__value, Constant) and self.value == __value.value and self._type == __value._type and self._pointee == __value.pointee + + def __hash__(self): + return hash((tuple(self.value) if isinstance(self.value, list) else self.value, self._type, self._pointee)) + def __repr__(self) -> str: value = str(self) if isinstance(self.value, str) else self.value if self.pointee: @@ -235,6 +235,12 @@ class NotUseableConstant(Constant): def __init__(self, value: str, tags: Optional[Tuple[Tag, ...]] = None): super().__init__(value, CustomType("double", 0), tags=tags) + def __eq__(self, __value): + return isinstance(__value, NotUseableConstant) and self.value == __value.value + + def __hash__(self): + return hash(self.value) + def __str__(self) -> str: """Return a string because NotUseableConstant are string only""" return self.value @@ -255,6 +261,12 @@ def __init__(self, name: str, value: Union[int, float], vartype: Type = UnknownT super().__init__(value, vartype, tags=tags) self._name = name + def __eq__(self, __value): + return isinstance(__value, Symbol) and self._name == __value._name and self.value == __value.value + + def __hash__(self): + return hash((self._name, self.value)) + @property def name(self) -> str: return self._name @@ -278,6 +290,12 @@ def copy(self) -> Symbol: class FunctionSymbol(Symbol): """Represents a function name""" + def __eq__(self, __value): + return isinstance(__value, FunctionSymbol) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def copy(self) -> FunctionSymbol: return FunctionSymbol(self.name, self.value, self._type.copy(), self.tags) @@ -285,6 +303,12 @@ def copy(self) -> FunctionSymbol: class ImportedFunctionSymbol(FunctionSymbol): """Represents an imported function name""" + def __eq__(self, __value): + return isinstance(__value, ImportedFunctionSymbol) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def copy(self) -> ImportedFunctionSymbol: return ImportedFunctionSymbol(self._name, self.value, self._type.copy(), self.tags) @@ -297,6 +321,12 @@ class IntrinsicSymbol(FunctionSymbol): def __init__(self, name: str): super().__init__(name, self.INTRINSIC_ADDRESS) + def __eq__(self, __value): + return isinstance(__value, IntrinsicSymbol) and self.name == __value.name + + def __hash__(self): + return hash(self.name) + def __repr__(self): return f"intrinsic '{self.name}'" @@ -324,6 +354,12 @@ def __init__( self.ssa_name = ssa_name super().__init__(tags) + def __eq__(self, __value): + return isinstance(__value, Variable) and self._name == __value._name and self.ssa_label == __value.ssa_label and self._type == __value._type and self.is_aliased == __value.is_aliased + + def __hash__(self): + return hash((self._name, self.ssa_label, self._type, self.is_aliased)) + def __repr__(self) -> str: """Return a debug representation of the variable, which includes all the attributes""" return f"{self.name}#{self.ssa_label} (type: {self.type} aliased: {self.is_aliased})" @@ -399,6 +435,12 @@ def __init__( self.initial_value = initial_value self.is_constant = is_constant + def __eq__(self, __value): + return isinstance(__value, GlobalVariable) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def copy( self, name: str = None, @@ -445,6 +487,12 @@ def __init__(self, high: Variable, low: Variable, vartype: Type = UnknownType(), self._low = low self._type = vartype + def __eq__(self, __value): + return isinstance(__value, RegisterPair) and self._high == __value._high and self._low == __value._low and self._type == __value._type + + def __hash__(self): + return hash((self._high, self._low, self._type)) + def __repr__(self) -> str: """Return debug representation of register pair""" return f"{repr(self._high)}:{repr(self._low)} type: {self.type}" @@ -507,6 +555,12 @@ def __init__(self, value: list[Constant], vartype: DecompiledType = UnknownType( tags, ) + def __eq__(self, __value): + return isinstance(__value, ConstantComposition) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self) -> str: """Return a string representation of the ConstantComposition""" return "{" + ",".join([str(x) for x in self.value]) + "}" diff --git a/decompiler/structures/pseudo/instructions.py b/decompiler/structures/pseudo/instructions.py index 75625e392..aa1b69dda 100644 --- a/decompiler/structures/pseudo/instructions.py +++ b/decompiler/structures/pseudo/instructions.py @@ -57,6 +57,12 @@ def __init__(self, comment: str, comment_style: str = "C", tags: Optional[Tuple[ self._comment_style = comment_style self._open_comment, self._close_comment = self.STYLES.get(comment_style, self.STYLES[self.DEFAULT_STYLE]) + def __eq__(self, __value): + return isinstance(__value, Comment) and self._comment == __value._comment and self._comment_style == __value._comment_style + + def __hash__(self): + return hash((self._comment, self._comment_style)) + def __repr__(self) -> str: """Return representation of comment.""" return f"{self._open_comment} {self._comment} {self._close_comment}" @@ -161,6 +167,12 @@ def __init__(self, destination: Expression, value: Expression, tags: Optional[Tu """Init a new Assignment.""" super(Assignment, self).__init__(destination, value, tags=tags) + def __eq__(self, __value): + return isinstance(__value, Assignment) and self._destination == __value._destination and self._value == __value._value + + def __hash__(self): + return hash((self._destination, self._value)) + def __str__(self) -> str: """Return a string representation starting with the lhs.""" if isinstance(self._destination, ListOperation) and not self._destination.operands: @@ -211,6 +223,12 @@ def __init__(self, destination: Variable, value: Variable, tags: Optional[Tuple[ """Init a new Relation.""" super(Relation, self).__init__(destination, value, tags=tags) + def __eq__(self, __value): + return isinstance(__value, Relation) and self._destination == __value._destination and self._value == __value._value + + def __hash__(self): + return hash((self._destination, self._value)) + def __str__(self) -> str: """Return a string representation starting with the lhs.""" return f"{self.destination} -> {self.value}" @@ -314,6 +332,12 @@ def __init__(self, condition: Condition, tags: Optional[Tuple[Tag, ...]] = None) """Init a new branch instruction.""" super(Branch, self).__init__(condition, tags=tags) + def __eq__(self, __value): + return isinstance(__value, Branch) and self._condition == __value._condition + + def __hash__(self): + return hash(self._condition) + def __repr__(self) -> str: """Return a debug representation of a branch""" return f"if {repr(self.condition)}" @@ -333,6 +357,12 @@ def __init__(self, condition: Expression, tags: Optional[Tuple[Tag, ...]] = None """Init a new branch instruction.""" super(IndirectBranch, self).__init__(condition, tags=tags) + def __eq__(self, __value): + return isinstance(__value, IndirectBranch) and self._condition + + def __hash__(self): + return hash(self._condition) + def __repr__(self) -> str: """Return a debug representation of a branch""" return f"jmp {repr(self.condition)}" @@ -355,6 +385,12 @@ def __init__(self, values, tags: Optional[Tuple[Tag, ...]] = None): super().__init__(tags) self._values = ListOperation(values) + def __eq__(self, __value): + return isinstance(__value, Return) and self._values == __value._values + + def __hash__(self): + return hash(self._values) + def __repr__(self) -> str: return f"return {repr(self._values)}" @@ -395,6 +431,12 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: class Break(Instruction): + def __eq__(self, __value): + return isinstance(__value, Break) + + def __hash__(self): + return hash(Break) + def __iter__(self) -> Iterator[Expression]: yield from () @@ -417,6 +459,12 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: class Continue(Instruction): + def __eq__(self, __value): + return isinstance(__value, Continue) + + def __hash__(self): + return hash(Continue) + def __iter__(self) -> Iterator[Expression]: yield from () @@ -457,6 +505,12 @@ def __init__( self._origin_block = origin_block if origin_block else {} super().__init__(destination, ListOperation(value), tags=tags) + def __eq__(self, __value): + return isinstance(__value, Phi) and self._destination == __value._destination and self._value == __value._value + + def __hash__(self): + return hash((self._destination, self._value)) + def __repr__(self): return f"{repr(self.destination)} = ϕ({repr(self.value)})" @@ -516,6 +570,12 @@ class MemPhi(Phi): def __init__(self, destination_var: Variable, source_vars: Sequence[Variable], tags: Optional[Tuple[Tag, ...]] = None): super().__init__(destination_var, source_vars, tags=tags) + def __eq__(self, __value): + return isinstance(__value, MemPhi) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self) -> str: return f"{self.destination} = ϕ({self.value})" diff --git a/decompiler/structures/pseudo/operations.py b/decompiler/structures/pseudo/operations.py index 3c05d6f95..3fdc03b89 100644 --- a/decompiler/structures/pseudo/operations.py +++ b/decompiler/structures/pseudo/operations.py @@ -193,6 +193,12 @@ def __init__( self._type = vartype super().__init__(tags) + def __eq__(self, __value): + return isinstance(__value, Operation) and self._operation == __value._operation and self._operands == __value._operands and self.type == __value.type + + def __hash__(self): + return hash((self._operation, tuple(self._operands), self.type)) + def __repr__(self) -> str: """Return debug representation of an operation. Used in equality checks""" return f"{self.operation.name} [{','.join(map(repr, self._operands))}] {self.type}" @@ -267,6 +273,12 @@ class ListOperation(Operation): def __init__(self, operands: Sequence[Expression], tags: Optional[Tuple[Tag, ...]] = None): super().__init__(OperationType.list_op, operands, tags=tags) + def __eq__(self, __value): + return isinstance(__value, ListOperation) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self) -> str: return ",".join(map(str, self.operands)) @@ -283,7 +295,7 @@ def accept(self, visitor: DataflowObjectVisitorInterface[T]) -> T: return visitor.visit_list_operation(self) -@dataclass +@dataclass(unsafe_hash=True) class ArrayInfo: """Class to store array info information for dereference if available base: variable storing start address of an array @@ -331,6 +343,12 @@ def __init__( self.contraction = contraction self.array_info = array_info + def __eq__(self, __value): + return isinstance(__value, UnaryOperation) and self.contraction == __value.contraction and self.array_info == __value.array_info and super().__eq__(__value) + + def __hash__(self): + return hash((self.contraction, self.array_info, super().__hash__())) + def __str__(self): """Return a string representation of the unary operation""" if self.operation == OperationType.cast and self.contraction: @@ -401,6 +419,12 @@ def __init__( self.member_offset = offset self.member_name = member_name + def __eq__(self, __value): + return isinstance(__value, MemberAccess) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self): # use -> when accessing member via a pointer to a struct: ptrBook->title # use . when accessing struct member directly: book.title @@ -441,6 +465,12 @@ class BinaryOperation(Operation): __match_args__ = ("operation", "left", "right") + def __eq__(self, __value): + return isinstance(__value, BinaryOperation) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self) -> str: """Return a string representation with infix notation.""" str_left = f"({self.left})" if isinstance(self.left, Operation) else f"{self.left}" @@ -484,6 +514,12 @@ def __init__( self._writes_memory = writes_memory self._meta_data = meta_data + def __eq__(self, __value): + return isinstance(__value, Call) and self._function == __value._function and self._operands == __value._operands + + def __hash__(self): + return hash((self._function, tuple(self._operands))) + def __repr__(self): """Return debug representation of a call""" if self._meta_data is not None: @@ -574,6 +610,13 @@ class Condition(BinaryOperation): OperationType.less_us: OperationType.greater_or_equal_us, } + def __eq__(self, __value): + v_ = isinstance(__value, Condition) and super().__eq__(__value) + return v_ + + def __hash__(self): + return super().__hash__() + @property def type(self) -> Type: """Conditions always return a boolean value.""" @@ -608,6 +651,12 @@ def __init__(self, condition: Expression, true: Expression, false: Expression, t """Initialize a new inline-if operation.""" super().__init__(OperationType.ternary, [condition, true, false], true.type, tags=tags) + def __eq__(self, __value): + return isinstance(__value, TernaryExpression) and super().__eq__(__value) + + def __hash__(self): + return super().__hash__() + def __str__(self) -> str: """Returns string representation""" return f"{self.condition} ? {self.true} : {self.false}" From 7501e85a72271f90d5a42ec20db2d47bbdea23bd Mon Sep 17 00:00:00 2001 From: rihi <19492038+rihi@users.noreply.github.com> Date: Thu, 13 Jun 2024 11:57:46 +0200 Subject: [PATCH 2/2] black --- decompiler/structures/pseudo/expressions.py | 21 +++++++++++++++++---- decompiler/structures/pseudo/operations.py | 14 ++++++++++++-- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/decompiler/structures/pseudo/expressions.py b/decompiler/structures/pseudo/expressions.py index b30241d03..3ace5ae8d 100644 --- a/decompiler/structures/pseudo/expressions.py +++ b/decompiler/structures/pseudo/expressions.py @@ -35,7 +35,7 @@ from ...util.insertion_ordered_set import InsertionOrderedSet from .complextypes import Enum -from .typing import ArrayType, CustomType, Type, UnknownType +from .typing import CustomType, Type, UnknownType T = TypeVar("T") DecompiledType = TypeVar("DecompiledType", bound=Type) @@ -178,7 +178,12 @@ def __init__( super().__init__(tags) def __eq__(self, __value): - return isinstance(__value, Constant) and self.value == __value.value and self._type == __value._type and self._pointee == __value.pointee + return ( + isinstance(__value, Constant) + and self.value == __value.value + and self._type == __value._type + and self._pointee == __value.pointee + ) def __hash__(self): return hash((tuple(self.value) if isinstance(self.value, list) else self.value, self._type, self._pointee)) @@ -355,7 +360,13 @@ def __init__( super().__init__(tags) def __eq__(self, __value): - return isinstance(__value, Variable) and self._name == __value._name and self.ssa_label == __value.ssa_label and self._type == __value._type and self.is_aliased == __value.is_aliased + return ( + isinstance(__value, Variable) + and self._name == __value._name + and self.ssa_label == __value.ssa_label + and self._type == __value._type + and self.is_aliased == __value.is_aliased + ) def __hash__(self): return hash((self._name, self.ssa_label, self._type, self.is_aliased)) @@ -488,7 +499,9 @@ def __init__(self, high: Variable, low: Variable, vartype: Type = UnknownType(), self._type = vartype def __eq__(self, __value): - return isinstance(__value, RegisterPair) and self._high == __value._high and self._low == __value._low and self._type == __value._type + return ( + isinstance(__value, RegisterPair) and self._high == __value._high and self._low == __value._low and self._type == __value._type + ) def __hash__(self): return hash((self._high, self._low, self._type)) diff --git a/decompiler/structures/pseudo/operations.py b/decompiler/structures/pseudo/operations.py index 3fdc03b89..127263214 100644 --- a/decompiler/structures/pseudo/operations.py +++ b/decompiler/structures/pseudo/operations.py @@ -194,7 +194,12 @@ def __init__( super().__init__(tags) def __eq__(self, __value): - return isinstance(__value, Operation) and self._operation == __value._operation and self._operands == __value._operands and self.type == __value.type + return ( + isinstance(__value, Operation) + and self._operation == __value._operation + and self._operands == __value._operands + and self.type == __value.type + ) def __hash__(self): return hash((self._operation, tuple(self._operands), self.type)) @@ -344,7 +349,12 @@ def __init__( self.array_info = array_info def __eq__(self, __value): - return isinstance(__value, UnaryOperation) and self.contraction == __value.contraction and self.array_info == __value.array_info and super().__eq__(__value) + return ( + isinstance(__value, UnaryOperation) + and self.contraction == __value.contraction + and self.array_info == __value.array_info + and super().__eq__(__value) + ) def __hash__(self): return hash((self.contraction, self.array_info, super().__hash__()))