Skip to content

Commit

Permalink
Support passing and returning structs in calling conventions (angr#2961)
Browse files Browse the repository at this point in the history
* Refactor SimCC to be able to support struct parameters

* Implement microsoft amd64 cc struct passing

* Implement cdecl and stdcall struct passing

* Implement return value storage for linux amd64

* Implement struct returning for x86 arches, I think?

* Fix some issues regarding func_ty in kb Functions and SimProcedures

* Perform some lies

* Remove test_cc; it is a less comprehensive version of test_callable

* oh god help me

* change self.arg to self.va_arg (different semantics) in simprocedure

* Fix (?) a few usages of cc in static analysis

* s/func_ty/prototype/

* Fix procedures to return the right sized value for ints

* Minor fixes

* More ints

* simproc fixes

* I swear to god I'll get these sizes right

* java cc fixes

* No more sp_deltas and friends

* Yet more fixes

* I swear to god

* WHERE the hell did all these failing tests come from

* Adapt CallingConventionAnalysis to the latest changes.

* Function: Differentiate guessed prototypes and deduced prototypes.

* Fix test cases in test_calling_convention_analysis.py.

* Fix more test cases in test_calling_convention_analysis.py.

* Java hell is defeated

* clarify comment

* Not sure what this is about

Co-authored-by: Fish <[email protected]>
  • Loading branch information
rhelmot and ltfish authored Dec 13, 2021
1 parent a3b39e2 commit 3090a97
Show file tree
Hide file tree
Showing 86 changed files with 1,355 additions and 1,185 deletions.
5 changes: 3 additions & 2 deletions angr/analyses/callee_cleanup_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ def __init__(self, starts=None, hook_all=False):
l.error("Function at %#x has a misaligned return?", addr)
continue
args = size // self.project.arch.bytes
cc = self.project.factory.cc_from_arg_kinds([False]*args)
cc = self.project.factory.cc()
prototype = cc.guess_prototype([0]*args)
cc.CALLEE_CLEANUP = True
sym = self.project.loader.find_symbol(addr)
name = sym.name if sym is not None else None
lib = self.project.loader.find_object_containing(addr)
libname = lib.provides if lib is not None else None
self.project.hook(addr, SIM_PROCEDURES['stubs']['ReturnUnconstrained'](cc=cc, display_name=name, library_name=libname, is_stub=True))
self.project.hook(addr, SIM_PROCEDURES['stubs']['ReturnUnconstrained'](cc=cc, prototype=prototype, display_name=name, library_name=libname, is_stub=True))

def analyze(self, addr):
seen = set()
Expand Down
99 changes: 60 additions & 39 deletions angr/analyses/calling_convention.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import networkx
from archinfo.arch_arm import is_arm_arch

from ..calling_conventions import SimRegArg, SimStackArg, SimCC, DefaultCC
from ..calling_conventions import SimFunctionArgument, SimRegArg, SimStackArg, SimCC, DefaultCC
from ..sim_type import SimTypeInt, SimTypeFunction, SimType, SimTypeLongLong, SimTypeShort, SimTypeChar, SimTypeBottom
from ..sim_variable import SimStackVariable, SimRegisterVariable
from ..knowledge_plugins.key_definitions.atoms import Register, MemoryLocation, SpOffset
from ..knowledge_plugins.key_definitions.constants import OP_BEFORE, OP_AFTER
Expand Down Expand Up @@ -46,7 +47,7 @@ class UpdateArgumentsOption:

class CallingConventionAnalysis(Analysis):
"""
Analyze the calling convention of functions.
Analyze the calling convention of a function and guess a probable prototype.
The calling convention of a function can be inferred at both its call sites and the function itself. At call sites,
we consider all register and stack variables that are not alive after the function call as parameters to this
Expand All @@ -72,6 +73,7 @@ def __init__(self, func: 'Function', cfg: Optional['CFGModel']=None, analyze_cal
self.analyze_callsites = analyze_callsites

self.cc: Optional[SimCC] = None
self.prototype: Optional[SimTypeFunction] = None

if self._cfg is None and 'CFGFast' in self.kb.cfgs:
self._cfg = self.kb.cfgs['CFGFast']
Expand All @@ -85,28 +87,37 @@ def _analyze(self):

if self._function.is_simprocedure:
self.cc = self._function.calling_convention
self.prototype = self._function.prototype
if self.cc is None:
callsite_facts = self._analyze_callsites(max_analyzing_callsites=1)
cc = DefaultCC[self.project.arch.name](self.project.arch)
cc = self._adjust_cc(cc, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate)
prototype = self._adjust_prototype(self.prototype, callsite_facts,
update_arguments=UpdateArgumentsOption.AlwaysUpdate)
self.cc = cc
self.prototype = prototype
return
if self._function.is_plt:
self.cc = self._analyze_plt()
r = self._analyze_plt()
if r is not None:
self.cc, self.prototype = r
return

cc = self._analyze_function()
if self.analyze_callsites:
# only take the first 3 because running reaching definition analysis on all functions is costly
callsite_facts = self._analyze_callsites(max_analyzing_callsites=3)
cc = self._adjust_cc(cc, callsite_facts, update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs)

if cc is None:
r = self._analyze_function()
if r is None:
l.warning('Cannot determine calling convention for %r.', self._function)

self.cc = cc

def _analyze_plt(self) -> Optional[SimCC]:
else:
# adjust prototype if needed
cc, prototype = r
if self.analyze_callsites:
# only take the first 3 because running reaching definition analysis on all functions is costly
callsite_facts = self._analyze_callsites(max_analyzing_callsites=3)
prototype = self._adjust_prototype(prototype, callsite_facts,
update_arguments=UpdateArgumentsOption.UpdateWhenCCHasNoArgs)

self.cc = cc
self.prototype = prototype

def _analyze_plt(self) -> Optional[Tuple[SimCC,SimTypeFunction]]:
"""
Get the calling convention for a PLT stub.
Expand Down Expand Up @@ -135,18 +146,20 @@ def _analyze_plt(self) -> Optional[SimCC]:
if real_func is not None:
if real_func.is_simprocedure and self.project.is_hooked(real_func.addr):
hooker = self.project.hooked_by(real_func.addr)
if hooker is not None and (not hooker.is_stub or real_func.calling_convention.func_ty is not None):
return real_func.calling_convention
if hooker is not None and not hooker.is_stub:
return real_func.calling_convention, real_func.prototype
else:
return real_func.calling_convention
return real_func.calling_convention, real_func.prototype

# determine the calling convention by analyzing its callsites
callsite_facts = self._analyze_callsites(max_analyzing_callsites=1)
cc = DefaultCC[self.project.arch.name](self.project.arch)
cc = self._adjust_cc(cc, callsite_facts, update_arguments=UpdateArgumentsOption.AlwaysUpdate)
return cc
prototype = SimTypeFunction([ ], None)
prototype = self._adjust_prototype(prototype, callsite_facts,
update_arguments=UpdateArgumentsOption.AlwaysUpdate)
return cc, prototype

def _analyze_function(self) -> Optional[SimCC]:
def _analyze_function(self) -> Optional[Tuple[SimCC,SimTypeFunction]]:
"""
Go over the variable information in variable manager for this function, and return all uninitialized
register/stack variables.
Expand Down Expand Up @@ -174,15 +187,13 @@ def _analyze_function(self) -> Optional[SimCC]:
if cc is None:
l.warning('_analyze_function(): Cannot find a calling convention for %r that fits the given arguments.',
self._function)
return None
else:
# reorder args
args = self._reorder_args(input_args, cc)
cc.args = args
prototype = SimTypeFunction([self._guess_arg_type(arg) for arg in args], SimTypeInt())

# set return value
cc.ret_val = cc.return_val

return cc
return cc, prototype

def _analyze_callsites(self, max_analyzing_callsites: int=3) -> List[CallSiteFact]: # pylint:disable=no-self-use
"""
Expand Down Expand Up @@ -327,9 +338,9 @@ def _analyze_callsite_arguments(self,
defs_by_stack_offset = dict((-d.atom.addr.offset, d) for d in all_stack_defs
if isinstance(d.atom, MemoryLocation) and isinstance(d.atom.addr, SpOffset))

arg_session = default_cc.arg_session
arg_session = default_cc.arg_session(SimTypeInt().with_arch(self.project.arch))
for _ in range(30): # at most 30 arguments
arg_loc = arg_session.next_arg(False)
arg_loc = default_cc.next_arg(arg_session, SimTypeInt().with_arch(self.project.arch))
if isinstance(arg_loc, SimRegArg):
reg_offset = self.project.arch.registers[arg_loc.reg_name][0]
# is it initialized?
Expand All @@ -347,28 +358,25 @@ def _analyze_callsite_arguments(self,
else:
break

@staticmethod
def _adjust_cc(cc: SimCC, facts: List[CallSiteFact],
update_arguments: int=UpdateArgumentsOption.DoNotUpdate):
def _adjust_prototype(self, proto: Optional[SimTypeFunction], facts: List[CallSiteFact],
update_arguments: int=UpdateArgumentsOption.DoNotUpdate) -> Optional[SimTypeFunction]:

if cc is None:
return cc
if proto is None:
return None

# is the return value used anywhere?
if facts and all(fact.return_value_used is False for fact in facts):
cc.ret_val = None
else:
cc.ret_val = cc.RETURN_VAL
proto.returnty = None

if update_arguments == UpdateArgumentsOption.AlwaysUpdate or (
update_arguments == UpdateArgumentsOption.UpdateWhenCCHasNoArgs and
not cc.args
not proto.args
):
if len(set(len(fact.args) for fact in facts)) == 1:
fact = next(iter(facts))
cc.args = fact.args
proto.args = [self._guess_arg_type(arg) for arg in fact.args]

return cc
return proto

def _args_from_vars(self, variables: List, var_manager):
"""
Expand Down Expand Up @@ -516,5 +524,18 @@ def _reorder_args(self, args, cc):

return reg_args + args + stack_args

def _guess_arg_type(self, arg: SimFunctionArgument) -> SimType:
if arg.size == 4:
return SimTypeInt()
elif arg.size == 8:
return SimTypeLongLong()
elif arg.size == 2:
return SimTypeShort()
elif arg.size == 1:
return SimTypeChar()
else:
# Unsupported for now
return SimTypeBottom()


register_analysis(CallingConventionAnalysis, "CallingConvention")
4 changes: 3 additions & 1 deletion angr/analyses/complete_calling_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ def _analyze(self):
cc_analysis = self.project.analyses.CallingConvention(func, cfg=self._cfg,
analyze_callsites=self._analyze_callsites)
if cc_analysis.cc is not None:
_l.info("Determined calling convention for %r.", func)
_l.info("Determined calling convention and prototype for %r.", func)
func.calling_convention = cc_analysis.cc
func.prototype = cc_analysis.prototype
func.is_prototype_guessed = True
else:
_l.info("Cannot determine calling convention for %r.", func)

Expand Down
35 changes: 16 additions & 19 deletions angr/analyses/decompiler/callsite_maker.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Optional, List, Tuple, Any, Set, TYPE_CHECKING
import copy
import logging

import archinfo
from ailment import Stmt, Expr

from ...procedures.stubs.format_parser import FormatParser, FormatSpecifier
from ...errors import SimMemoryMissingError
from ...sim_type import SimTypeBottom, SimTypePointer, SimTypeChar
from ...calling_conventions import SimRegArg, SimStackArg
from ...sim_type import SimTypeBottom, SimTypePointer, SimTypeChar, SimTypeInt
from ...calling_conventions import SimRegArg, SimStackArg, SimCC
from ...knowledge_plugins.key_definitions.constants import OP_BEFORE
from ...knowledge_plugins.key_definitions.definition import Definition
from .. import Analysis, register_analysis
Expand Down Expand Up @@ -65,30 +66,28 @@ def _analyze(self):

args = [ ]
arg_locs = None
if func.calling_convention is None:
if cc is None:
l.warning('%s has an unknown calling convention.', repr(func))
else:
stackarg_sp_diff = func.calling_convention.STACKARG_SP_DIFF
if func.prototype is not None:
if prototype is not None:
# Make arguments
arg_locs = func.calling_convention.arg_locs()
if func.prototype.variadic:
arg_locs = cc.arg_locs(prototype)
if prototype.variadic:
# determine the number of variadic arguments
variadic_args = self._determine_variadic_arguments(func, func.calling_convention, last_stmt)
variadic_args = self._determine_variadic_arguments(func, cc, last_stmt)
if variadic_args:
arg_sizes = [arg.size // self.project.arch.byte_width for arg in func.prototype.args] + \
([self.project.arch.bytes] * variadic_args)
is_fp = [False] * len(arg_sizes)
arg_locs = func.calling_convention.arg_locs(is_fp=is_fp, sizes=arg_sizes)
else:
if func.calling_convention.args is not None:
arg_locs = func.calling_convention.arg_locs()
callsite_ty = copy.copy(prototype)
callsite_ty.args = list(callsite_ty.args)
for i in range(variadic_args):
callsite_ty.args.append(SimTypeInt().with_arch(self.project.arch))
arg_locs = cc.arg_locs(callsite_ty)

if arg_locs is not None:
for arg_loc in arg_locs:
if type(arg_loc) is SimRegArg:
size = arg_loc.size
offset = arg_loc._fix_offset(None, size, arch=self.project.arch)
offset = arg_loc.check_offset(cc.arch)

_, the_arg = self._resolve_register_argument(last_stmt, arg_loc)

Expand Down Expand Up @@ -194,7 +193,7 @@ def _find_variable_from_definition(self, def_):
def _resolve_register_argument(self, call_stmt, arg_loc) -> Tuple:

size = arg_loc.size
offset = arg_loc._fix_offset(None, size, arch=self.project.arch)
offset = arg_loc.check_offset(self.project.arch)

if self._reaching_definitions is not None:
# Find its definition
Expand Down Expand Up @@ -310,9 +309,7 @@ def _determine_variadic_arguments_for_format_strings(self, func, cc: 'SimCC', ca

fmt_str = None
min_arg_count = (max(potential_fmt_args) + 1)
arg_locs = cc.arg_locs(is_fp=[False] * min_arg_count,
sizes=[self.project.arch.bytes] * min_arg_count
)
arg_locs = cc.arg_locs(SimCC.guess_prototype([0]*min_arg_count, proto))

for fmt_arg_idx in potential_fmt_args:
arg_loc = arg_locs[fmt_arg_idx]
Expand Down
18 changes: 9 additions & 9 deletions angr/analyses/decompiler/clinic.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,8 @@ def _updatedict_handler(node):

@timethis
def _make_argument_list(self) -> List[SimVariable]:
if self.function.calling_convention is not None:
args: List[SimFunctionArgument] = self.function.calling_convention.args
if self.function.calling_convention is not None and self.function.prototype is not None:
args: List[SimFunctionArgument] = self.function.calling_convention.arg_locs(self.function.prototype)
arg_vars: List[SimVariable] = [ ]
if args:
for idx, arg in enumerate(args):
Expand Down Expand Up @@ -512,9 +512,10 @@ def _make_returns(self, ail_graph: networkx.DiGraph) -> networkx.DiGraph:
def _handle_Return(stmt_idx: int, stmt: ailment.Stmt.Return, block: Optional[ailment.Block]): # pylint:disable=unused-argument
if block is not None \
and not stmt.ret_exprs \
and self.function.calling_convention.ret_val is not None:
and self.function.prototype is not None \
and type(self.function.prototype.returnty) is not SimTypeBottom:
new_stmt = stmt.copy()
ret_val = self.function.calling_convention.ret_val
ret_val = self.function.calling_convention.return_val(self.function.prototype.returnty)
if isinstance(ret_val, SimRegArg):
reg = self.project.arch.registers[ret_val.reg_name]
new_stmt.ret_exprs.append(ailment.Expr.Register(
Expand Down Expand Up @@ -546,7 +547,7 @@ def _handler(block):

@timethis
def _make_function_prototype(self, arg_list: List[SimVariable], variable_kb):
if self.function.prototype is not None:
if self.function.prototype is not None and not self.function.is_prototype_guessed:
# do not overwrite an existing function prototype
# if you want to re-generate the prototype, clear the existing one first
return
Expand Down Expand Up @@ -574,12 +575,11 @@ def _make_function_prototype(self, arg_list: List[SimVariable], variable_kb):

func_args.append(func_arg)

if self.function.calling_convention is not None and self.function.calling_convention.ret_val is None:
returnty = SimTypeBottom(label="void")
else:
returnty = SimTypeInt()
# TODO: need a new method of determining whether a function returns void
returnty = SimTypeInt()

self.function.prototype = SimTypeFunction(func_args, returnty).with_arch(self.project.arch)
self.function.is_prototype_guessed = False

@timethis
def _recover_and_link_variables(self, ail_graph, arg_list):
Expand Down
13 changes: 10 additions & 3 deletions angr/analyses/identifier/custom_callable.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,31 @@ def set_base_state(self, state):
self._base_state = state

def __call__(self, *args):
self.perform_call(*args)
prototype = self._cc.guess_prototype(args)
self.perform_call(*args, prototype=prototype)
if self.result_state is not None:
return self.result_state.solver.simplify(self._cc.get_return_val(self.result_state, stack_base=self.result_state.regs.sp - self._cc.STACKARG_SP_DIFF))
loc = self._cc.return_val(prototype.returnty)
return self.result_state.solver.simplify(loc.get_value(self.result_state, stack_base=self.result_state.regs.sp - self._cc.STACKARG_SP_DIFF))
return None

def get_base_state(self, *args):
prototype = self._cc.guess_prototype(args)
self._base_state.ip = self._addr
state = self._project.factory.call_state(self._addr, *args,
prototype=prototype,
cc=self._cc,
base_state=self._base_state,
ret_addr=self._deadend_addr,
toc=self._toc)
return state

def perform_call(self, *args):
def perform_call(self, *args, prototype=None):
if prototype is None:
prototype = self._cc.guess_prototype(args)
self._base_state.ip = self._addr
state = self._project.factory.call_state(self._addr, *args,
cc=self._cc,
prototype=prototype,
base_state=self._base_state,
ret_addr=self._deadend_addr,
toc=self._toc)
Expand Down
Loading

0 comments on commit 3090a97

Please sign in to comment.