Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Lifter] add tests for changes of #353 (ComplexTypes, etc) #368

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions decompiler/frontend/binaryninja/handlers/assignments.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def lift_set_field(self, assignment: mediumlevelil.MediumLevelILSetVarField, is_
struct_variable = self._lifter.lift(assignment.dest, is_aliased=True, parent=assignment)
destination = MemberAccess(
offset=assignment.offset,
member_name=struct_variable.type.get_member_by_offset(assignment.offset).name,
member_name=struct_variable.type.get_member_name_by_offset(assignment.offset),
operands=[struct_variable],
writes_memory=assignment.ssa_memory_version,
writes_memory=assignment.dest.version,
)
value = self._lifter.lift(assignment.src)
# case 2 (contraction):
Expand All @@ -99,7 +99,7 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al
(x = ) <- for the sake of example, only rhs expression is lifted here.
"""
source = self._lifter.lift(instruction.src, is_aliased=is_aliased, parent=instruction)
if isinstance(source.type, Struct) or isinstance(source.type, Union):
if isinstance(source.type, Struct) or isinstance(source.type, Class) or isinstance(source.type, Union):
return self._get_field_as_member_access(instruction, source, **kwargs)
cast_type = source.type.resize(instruction.size * self.BYTE_SIZE)
if instruction.offset:
Expand All @@ -112,11 +112,11 @@ def lift_get_field(self, instruction: mediumlevelil.MediumLevelILVarField, is_al

def _get_field_as_member_access(self, instruction: mediumlevelil.MediumLevelILVarField, source: Expression, **kwargs) -> MemberAccess:
"""Lift MLIL var_field as struct or union member read access."""
if isinstance(source.type, Struct):
member_name = source.type.get_member_by_offset(instruction.offset).name
if isinstance(source.type, Struct) or isinstance(source.type, Class):
member_name = source.type.get_member_name_by_offset(instruction.offset)
elif parent := kwargs.get("parent", None):
parent_type = self._lifter.lift(parent.dest.type)
member_name = source.type.get_member_by_type(parent_type).name
member_name = source.type.get_member_name_by_type(parent_type)
else:
logging.warning(f"Cannot get member name for instruction {instruction}")
member_name = f"field_{hex(instruction.offset)}"
Expand Down Expand Up @@ -213,14 +213,8 @@ def lift_store_struct(self, instruction: mediumlevelil.MediumLevelILStoreStruct,
"""Lift a MLIL_STORE_STRUCT_SSA instruction to pseudo (e.g. object->field = x)."""
vartype = self._lifter.lift(instruction.dest.expr_type)
struct_variable = self._lifter.lift(instruction.dest, is_aliased=True, parent=instruction)
member = vartype.type.get_member_by_offset(instruction.offset)
if member is not None:
name = member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
struct_member_access = MemberAccess(
member_name=name,
member_name=vartype.type.get_member_name_by_offset(instruction.offset),
offset=instruction.offset,
operands=[struct_variable],
vartype=vartype,
Expand Down
11 changes: 4 additions & 7 deletions decompiler/frontend/binaryninja/handlers/unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
Pointer,
UnaryOperation,
)
from decompiler.structures.pseudo.complextypes import Struct
from decompiler.structures.pseudo.complextypes import Class, Struct
from decompiler.structures.pseudo.operations import MemberAccess


Expand Down Expand Up @@ -98,12 +98,9 @@ def _lift_load_struct(self, instruction: mediumlevelil.MediumLevelILLoadStruct,
"""Lift a MLIL_LOAD_STRUCT_SSA (struct member access e.g. var#n->x) instruction."""
struct_variable = self._lifter.lift(instruction.src)
struct_ptr: Pointer = self._lifter.lift(instruction.src.expr_type)
struct_member = struct_ptr.type.get_member_by_offset(instruction.offset)
if struct_member is not None:
name = struct_member.name
else:
name = f"__offset_{instruction.offset}"
name.replace("-", "minus_")
name = f"field_{hex(instruction.offset)}".replace("-", "minus_")
if isinstance(struct_ptr.type, Class) or isinstance(struct_ptr.type, Struct):
name = struct_ptr.type.get_member_name_by_offset(instruction.offset)
return MemberAccess(vartype=struct_ptr, operands=[struct_variable], offset=instruction.offset, member_name=name)

def _lift_ftrunc(self, instruction: mediumlevelil.MediumLevelILFtrunc, **kwargs) -> UnaryOperation:
Expand Down
20 changes: 20 additions & 0 deletions decompiler/structures/pseudo/complextypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ def add_member(self, member: ComplexTypeMember):
def get_member_by_offset(self, offset: int) -> Optional[ComplexTypeMember]:
return self.members.get(offset)

def get_member_name_by_offset(self, offset: int) -> str:
"""Get the name of a member by its offset and gracefully handle unknown offsets."""
member = self.get_member_by_offset(offset)
if member is not None:
return member.name
else:
logging.warning(f"Cannot get member name for type {self} at offset {offset}")
return f"field_{hex(offset)}".replace("-", "minus_")

def declaration(self) -> str:
members = ";\n\t".join(self.members[k].declaration() for k in sorted(self.members.keys())) + ";"
return f"{self.type_specifier.value} {self.name} {{\n\t{members}\n}}"
Expand Down Expand Up @@ -103,6 +112,15 @@ def get_member_by_type(self, _type: Type) -> ComplexTypeMember:
if member.type == _type:
return member

def get_member_name_by_type(self, _type: Type) -> str:
"""Get the name of a member of a union by its type and gracefully handle unknown types."""
member = self.get_member_by_type(_type)
if member is not None:
return member.name
else:
logging.warning(f"Cannot get member name for union {self}")
return "unknown_field"


@dataclass(frozen=True, order=True)
class Enum(ComplexType):
Expand Down Expand Up @@ -136,6 +154,7 @@ class UniqueNameProvider:
"""The purpose of this class is to provide unique names for types, as duplicate names can potentially be encountered in the lifting stage (especially anonymous structs, etc.)
This class keeps track of all the names already used. If duplicates are found, they are renamed by appending suffixes with incrementing numbers.
E.g. `classname`, `classname__2`, `classname__3`, ...
Assumes that incoming names do not end with __{number}.
"""

def __init__(self):
Expand All @@ -145,6 +164,7 @@ def get_unique_name(self, name: str) -> str:
"""This method returns the input name if it was unique so far.
Otherwise it returns the name with an added incrementing suffix.
In any case, the name occurence of the name is counted.
Assumes that incoming names do not end with __{number}.
"""
if name not in self._name_to_count:
self._name_to_count[name] = 1
Expand Down
120 changes: 115 additions & 5 deletions tests/structures/pseudo/test_complextypes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import pytest
from decompiler.structures.pseudo import Float, Integer, Pointer
from decompiler.structures.pseudo.complextypes import (
Class,
ComplexTypeMap,
ComplexTypeMember,
ComplexTypeName,
ComplexTypeSpecifier,
Enum,
Struct,
Union,
UniqueNameProvider,
)


Expand Down Expand Up @@ -46,6 +48,65 @@ def test_get_member_by_offset(self, book, title, num_pages, author):
assert book.get_member_by_offset(4) == num_pages
assert book.get_member_by_offset(8) == author

def test_get_member_name_by_offset(self, book, title, num_pages, author):
assert book.get_member_name_by_offset(0) == title.name
assert book.get_member_name_by_offset(4) == num_pages.name
assert book.get_member_name_by_offset(8) == author.name
assert book.get_member_name_by_offset(0x100) == "field_0x100"
assert book.get_member_name_by_offset(-0x100) == "field_minus_0x100"

def test_get_complex_type_name(self, book):
assert book.complex_type_name == (ComplexTypeName(0, "Book"))


class TestClass:
def test_declaration(self, class_book: Struct, record_id: Union):
assert class_book.declaration() == "class ClassBook {\n\tchar * title;\n\tint num_pages;\n\tchar * author;\n}"
# nest complex type
class_book.add_member(
m := ComplexTypeMember(size=64, name="id", offset=12, type=record_id),
)
result = f"class ClassBook {{\n\tchar * title;\n\tint num_pages;\n\tchar * author;\n\t{m.declaration()};\n}}"
assert class_book.declaration() == result

def test_str(self, class_book: Struct):
assert str(class_book) == "ClassBook"

def test_copy(self, class_book: Struct):
new_class_book: Struct = class_book.copy()
assert id(new_class_book) != id(class_book)
assert new_class_book.size == class_book.size
assert new_class_book.type_specifier == class_book.type_specifier == ComplexTypeSpecifier.CLASS
assert id(new_class_book.members) != id(class_book.members)
assert new_class_book.get_member_by_offset(0) == class_book.get_member_by_offset(0)
assert id(new_class_book.get_member_by_offset(0)) != id(class_book.get_member_by_offset(0))
assert len(new_class_book.members) == len(class_book.members)

def test_add_members(self, class_book, title, num_pages, author):
empty_class_book = Class(name="ClassBook", members={}, size=96)
empty_class_book.add_member(title)
empty_class_book.add_member(author)
empty_class_book.add_member(num_pages)
assert empty_class_book == class_book

def test_get_member_by_offset(self, class_book, title, num_pages, author):
assert class_book.get_member_by_offset(0) == title
assert class_book.get_member_by_offset(4) == num_pages
assert class_book.get_member_by_offset(8) == author

def test_get_member_name_by_offset(self, class_book, title, num_pages, author):
assert class_book.get_member_name_by_offset(0) == title.name
assert class_book.get_member_name_by_offset(4) == num_pages.name
assert class_book.get_member_name_by_offset(8) == author.name
assert class_book.get_member_name_by_offset(0x100) == "field_0x100"
assert class_book.get_member_name_by_offset(-0x100) == "field_minus_0x100"

def test_get_complex_type_name(self, class_book):
assert class_book.complex_type_name == (ComplexTypeName(0, "ClassBook"))

def test_class_not_struct(self, class_book, book):
assert book != class_book


@pytest.fixture
def book() -> Struct:
Expand All @@ -60,6 +121,19 @@ def book() -> Struct:
)


@pytest.fixture
def class_book() -> Class:
return Class(
name="ClassBook",
members={
0: ComplexTypeMember(size=32, name="title", offset=0, type=Pointer(Integer.char())),
4: ComplexTypeMember(size=32, name="num_pages", offset=4, type=Integer.int32_t()),
8: ComplexTypeMember(size=32, name="author", offset=8, type=Pointer(Integer.char())),
},
size=96,
)


@pytest.fixture
def title() -> ComplexTypeMember:
return ComplexTypeMember(size=32, name="title", offset=0, type=Pointer(Integer.char()))
Expand Down Expand Up @@ -101,6 +175,15 @@ def test_get_member_by_type(self, record_id, float_id, int_id, double_id):
assert record_id.get_member_by_type(Integer.int32_t()) == int_id
assert record_id.get_member_by_type(Float.double()) == double_id

def test_get_member_name_by_type(self, record_id, float_id, int_id, double_id):
assert record_id.get_member_name_by_type(Float.float()) == float_id.name
assert record_id.get_member_name_by_type(Integer.int32_t()) == int_id.name
assert record_id.get_member_name_by_type(Float.double()) == double_id.name
assert record_id.get_member_name_by_type(record_id) == "unknown_field"

def test_get_complex_type_name(self, record_id):
assert record_id.complex_type_name == (ComplexTypeName(0, "RecordID"))


@pytest.fixture
def record_id() -> Union:
Expand Down Expand Up @@ -153,6 +236,9 @@ def test_add_members(self, empty_color, color, red, green, blue):
empty_color.add_member(blue)
assert empty_color == color

def test_get_complex_type_name(self, color):
assert color.complex_type_name == (ComplexTypeName(0, "Color"))


@pytest.fixture
def color():
Expand Down Expand Up @@ -188,20 +274,44 @@ def blue():


class TestComplexTypeMap:
def test_declarations(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union):
assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};"
def test_declarations(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union):
assert (
complex_types.declarations()
== f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};\n{class_book.declaration()};"
)
complex_types.add(book, 0)
assert complex_types.declarations() == f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};"
assert (
complex_types.declarations()
== f"{book.declaration()};\n{color.declaration()};\n{record_id.declaration()};\n{class_book.declaration()};"
)

def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, color: Enum, record_id: Union):
def test_retrieve_by_name(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union):
assert complex_types.retrieve_by_name(ComplexTypeName(0, "Book")) == book
assert complex_types.retrieve_by_name(ComplexTypeName(0, "RecordID")) == record_id
assert complex_types.retrieve_by_name(ComplexTypeName(0, "Color")) == color
assert complex_types.retrieve_by_name(ComplexTypeName(0, "ClassBook")) == class_book

def test_retrieve_by_id(self, complex_types: ComplexTypeMap, book: Struct, class_book: Class, color: Enum, record_id: Union):
assert complex_types.retrieve_by_id(0) == book
assert complex_types.retrieve_by_id(1) == color
assert complex_types.retrieve_by_id(2) == record_id
assert complex_types.retrieve_by_id(3) == class_book

@pytest.fixture
def complex_types(self, book: Struct, color: Enum, record_id: Union):
def complex_types(self, book: Struct, class_book: Class, color: Enum, record_id: Union):
complex_types = ComplexTypeMap()
complex_types.add(book, 0)
complex_types.add(color, 1)
complex_types.add(record_id, 2)
complex_types.add(class_book, 3)
return complex_types


class TestUniqueNameProvider:
def test_unique_names(self):
unique_name_provider = UniqueNameProvider()
input_names = ["aa", "", "b", "", "c", "c", "d", "c"]
excepted_output = ["aa", "", "b", "__2", "c", "c__2", "d", "c__3"]
output_names = [unique_name_provider.get_unique_name(name) for name in input_names]
assert output_names == excepted_output
assert len(set(output_names)) == len(output_names) # uniqueness
22 changes: 22 additions & 0 deletions tests/structures/pseudo/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from decompiler.structures.pseudo import OperationType, UnaryOperation
from decompiler.structures.pseudo.complextypes import ComplexTypeMember, Enum
from decompiler.structures.pseudo.expressions import (
Constant,
ExternConstant,
Expand Down Expand Up @@ -374,3 +375,24 @@ def test_copy(self):
original = ImportedFunctionSymbol("foo", 0x42)
copy = original.copy()
assert id(original) != id(copy) and original == copy


@pytest.fixture
def color():
return Enum(
0,
"Color",
{
0: ComplexTypeMember(0, "red", value=0, offset=0, type=Integer.int32_t()),
1: ComplexTypeMember(0, "green", value=1, offset=0, type=Integer.int32_t()),
2: ComplexTypeMember(0, "blue", value=2, offset=0, type=Integer.int32_t()),
},
)


class TestEnumConstant:
def test_available_value(self, color):
assert str(Constant(value=1, vartype=color)) == "green"

def test_unavailable_value(self, color):
assert str(Constant(value=0xFFFF, vartype=color)) == str(Constant(0xFFFF, vartype=i32))
Loading