Skip to content

Commit

Permalink
Fix: Unary Op, Backend + Comments
Browse files Browse the repository at this point in the history
  • Loading branch information
NeoQuix committed Mar 19, 2024
1 parent a03d0f7 commit b2e1c5b
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 37 deletions.
4 changes: 2 additions & 2 deletions decompiler/backend/cexpressiongenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,13 @@ def visit_constant_composition(self, expr: expressions.ConstantComposition):
"""Visit a Constant Array."""
match expr.type.type:
case CustomType(text="wchar16") | CustomType(text="wchar32"):
val = {"".join([x.value for x in expr.value])}
val = "".join([x.value for x in expr.value])
return f'L"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'L"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case Integer(8):
val = "".join([x.value for x in expr.value][:MAX_GLOBAL_INIT_LENGTH])
return f'"{val}"' if len(val) <= MAX_GLOBAL_INIT_LENGTH else f'"{val[:MAX_GLOBAL_INIT_LENGTH]}..."'
case _:
return f'{", ".join([hex(x.value) for x in expr.value])}' # Todo: Should we print every member? Could get pretty big
return f'{", ".join([self.visit(x) for x in expr.value])}' # Todo: Should we print every member? Could get pretty big

def visit_variable(self, expr: expressions.Variable) -> str:
"""Return a string representation of the variable."""
Expand Down
3 changes: 3 additions & 0 deletions decompiler/frontend/binaryninja/handlers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ def lift_constant_pointer(self, pointer: mediumlevelil.MediumLevelILConstPtr, **
if isinstance(res, Constant): # BNinja Error case handling
return res

if isinstance(res.type, Pointer) and res.type.type == CustomType.void():
return res

return UnaryOperation(
OperationType.address,
[res],
Expand Down
132 changes: 99 additions & 33 deletions decompiler/frontend/binaryninja/handlers/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,48 @@
GlobalVariable,
ImportedFunctionSymbol,
Integer,
OperationType,
Pointer,
Symbol,
UnaryOperation,
)

"""
Lift a given address inside of a binary by BNinjas DataVariable type.
If some code references a address, bninja stores the information about the address inside of a DataVariable (dv).
A dv has a type (which may be wrong/or set by a user) and a value (which can be anything).
We lift according to the type (bninjas) of the dv:
- CharType, FloatType, IntegerType, BoolType
- basic C types (char, int, float, ...)
=> just lift as the given type
==> Addition since Version 4.0: Check if variable references something, if yes, try to lift as pointer
- VoidType
- when bninja does not know the size of a variable (e.g. int array) it represents it as a void dv
=> C does not have a concept of void
=> lift as a void* with raw escaped bytes as value (still not C conformant, but better)
==> if we create a pointer, the caller (instruction) must remove the '&' operator
- ArrayType
- Strings (char [], wchar_16[], ...)
=> Lift as given type (array)
=> BNinja changes the .value field frequently and is not consistent (any; mostly bytes, list or string)
- PointerType
- pointer to something (basic type, void*, function pointer)
=> If the pointer points to some basic type, there _should_ be a dv at the value address
==> trust bninja lift normally
=> If a void*, then we try determine the value via get_unknown_pointer_value
- NamedTypeReferenceType
- enum/structs
=> not supported currently
=> has a BNinja bug when accessing certain PDB enum types
MISC:
- ._callers will be empty for each call of lift_global_variable
except when an caller calls the lifter with kwargs = {callers = [..]}
=> get_unknown_value does exactly this to keep track of all callers for a chain of global variables
(The call stack will be lifter.lift, lift_global_variable, lifter.lift, lift_global_variable, ...)
"""


class GlobalHandler(Handler):
"""Handler for global variables."""
Expand All @@ -46,7 +84,7 @@ def __init__(self, lifter):
FloatType: self._lift_basic_type,
BoolType: self._lift_basic_type,
VoidType: self._lift_void_type,
ArrayType: self._lift_constant_type,
ArrayType: self._lift_array_type,
PointerType: self._lift_pointer_type,
NamedTypeReferenceType: self._lift_named_type_ref,
}
Expand Down Expand Up @@ -107,6 +145,23 @@ def lift_global_variable(

return self._lift_datavariable_by_type[type(variable.type)](variable, parent)

def _lift_array_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None) -> GlobalVariable:
"""Lift constant data type (strings and jump tables) into code"""
type = self._lifter.lift(variable.type)
match variable.value:
case bytes(): # BNinja corner case: C-Strings (8Bit) are represented as python Bytes
value = [Constant(x, type.type) for x in str(variable.value.rstrip(b"\x00"))[2:-1]]
case _:
value = [Constant(x, type.type) for x in variable.value]

return self._build_global_variable(
name=variable.name,
type=type,
addr=variable.address,
init_value=ConstantComposition(value, type),
ssa_label=parent.ssa_memory_version if parent else 0,
)

def _lift_basic_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None) -> GlobalVariable:
"""Lift basic C type found by BNinja (int, float, char, ...)"""
# If variable references something in address space, then lift it as a pointer (BNinja 4.0 "Error")
Expand All @@ -126,7 +181,7 @@ def _lift_basic_type(self, variable: DataVariable, parent: Optional[MediumLevelI

def _lift_void_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None) -> GlobalVariable:
"Lift unknown type, by checking the value at the given address. Will always be lifted as a pointer. Try to extract datavariable, string or bytes as value"
value, type = self.get_unknown_pointer_value(variable.address, self._view, variable.address)
value, type = self.get_unknown_value(variable, self._view)
return self._build_global_variable(
name=self._lifter.lift(variable.symbol).name if variable.symbol else None,
type=type,
Expand All @@ -135,23 +190,6 @@ def _lift_void_type(self, variable: DataVariable, parent: Optional[MediumLevelIL
ssa_label=parent.ssa_memory_version if parent else 0,
)

def _lift_constant_type(self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None) -> GlobalVariable:
"""Lift constant data type (strings and jump tables) into code"""
type = self._lifter.lift(variable.type)
match variable.value:
case bytes(): # BNinja corner case: C-Strings (8Bit) are represented as Bytes
value = [Constant(x, type.type) for x in str(variable.value.rstrip(b"\x00"))[2:-1]]
case _:
value = [Constant(x, type.type) for x in variable.value]

return self._build_global_variable(
name=variable.name,
type=type,
addr=variable.address,
init_value=ConstantComposition(value, type),
ssa_label=parent.ssa_memory_version if parent else 0,
)

def _lift_pointer_type(
self, variable: DataVariable, parent: Optional[MediumLevelILInstruction] = None
) -> Union[GlobalVariable, Symbol]:
Expand All @@ -167,9 +205,7 @@ def _lift_pointer_type(
)
case VoidType(): # BNinja knows it's a pointer pointing at something
# Extract the initial_value and type from the location where the pointer is pointing to
init_value, type = self.get_unknown_pointer_value(variable.value, self._view, variable.address)
if not isinstance(type, (Pointer, Array)): # Fix type to be a pointer (happens when a datavariable is at the dest.)
type = Pointer(type, self._view.address_size * BYTE_SIZE)
init_value, type = self.get_unknown_pointer_value(variable, self._view)
case _:
init_value, type = (
self._lifter.lift(self._view.get_data_var_at(variable.value), view=self._view, caller_addr=variable.address),
Expand All @@ -189,20 +225,50 @@ def _lift_named_type_ref(self, variable: DataVariable, parent: Optional[MediumLe
"Unknown value", self._lifter.lift(variable.type)
) # BNinja error, need to check with the issue to get the correct value + entry for structs

def get_unknown_pointer_value(self, addr: int, view: BinaryView, caller_addr: int = 0):
"""Return symbol, datavariable, address, string or raw bytes at given address."""
if not self.addr_in_section(view, addr):
return addr, Pointer(CustomType.void())
if datavariable := view.get_data_var_at(addr):
self._callers.append(caller_addr)
return self._lifter.lift(datavariable, view=view, callers=self._callers), self._lifter.lift(datavariable.type)
if (data := self._get_different_string_types_at(addr, view)) and data[
0
] is not None: # Implicit pointer removal if called from a pointer value
def get_unknown_value(self, dv: DataVariable, view: BinaryView):
"""Return string or bytes at dv.address(!) (dv.type must be void)"""
if (data := self._get_different_string_types_at(dv.address, view)) and data[0] is not None:
type = Array(self._lifter.lift(data[1]), len(data[0]))
data = ConstantComposition([Constant(x, type.type) for x in data[0]], type)
else:
data, type = self.get_raw_bytes(addr, view), Pointer(CustomType.void())
data, type = self.get_raw_bytes(dv.address, view), Pointer(CustomType.void())
return data, type

def get_unknown_pointer_value(self, dv: DataVariable, view: BinaryView):
"""Return symbol, datavariable, address, string or raw bytes for a value of a datavariable(!) (dv should be a pointer)."""
if not self.addr_in_section(view, dv.value):
return dv.value, Pointer(CustomType.void())

if datavariable := view.get_data_var_at(dv.value):
self._callers.append(dv.address)
type = self._lifter.lift(datavariable.type)
value = self._lifter.lift(datavariable, view=view, callers=self._callers)
if not isinstance(type, (Pointer, Array)):
type = Pointer(type, self._view.address_size * BYTE_SIZE)
value = UnaryOperation(
OperationType.address,
[value],
vartype=value.type,
)
return value, type

if (data := self._get_different_string_types_at(dv.value, view)) and data[
0
] is not None: # Implicit pointer removal if called from a pointer value, does NOT need to be a UnaryOperation
vtype = Array(self._lifter.lift(data[1]), len(data[0]))
vdata = ConstantComposition([Constant(x, vtype.type) for x in data[0]], vtype)
data = self._build_global_variable(None, vtype, dv.value, vdata, None)
type = Pointer(vtype, self._view.address_size * BYTE_SIZE)
return (
UnaryOperation(
OperationType.address,
[data],
vartype=data.type,
),
type,
)
else:
data, type = self.get_raw_bytes(dv.value, view), Pointer(CustomType.void())
return data, type

def get_raw_bytes(self, addr: int, view: BinaryView) -> bytes:
Expand Down
4 changes: 2 additions & 2 deletions decompiler/structures/pseudo/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def __iter__(self) -> Iterator[Expression]:
case Expression():
yield self.initial_value
case _:
pass
raise TypeError(f"Type violation '{self.initial_value}'")

def __str__(self) -> str:
"""Return a string representation of the global variable."""
Expand Down Expand Up @@ -515,7 +515,7 @@ def __init__(self, value: list[Constant], vartype: DecompiledType = UnknownType(
)

def __str__(self) -> str:
"""Todo"""
"""Return a string representation of the ConstantComposition"""
return "{" + ",".join([str(x) for x in self.value]) + "}"

def copy(self) -> ConstantComposition:
Expand Down

0 comments on commit b2e1c5b

Please sign in to comment.