-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy patharchitecture.py
267 lines (237 loc) · 10.5 KB
/
architecture.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
from __future__ import print_function
import time, traceback
import binaryninja
from binaryninja.architecture import Architecture
from binaryninja.lowlevelil import LowLevelILFunction, LowLevelILLabel, LLIL_TEMP
from binaryninja.function import RegisterInfo, InstructionInfo
from binaryninja.log import log_info, log_warn, log_error
from binaryninja.enums import (BranchType, LowLevelILOperation,
LowLevelILFlagCondition, FlagRole, Endianness)
from . import mem
from .disassembler import specification
from .disassembler import ana, emu, out
from . import lowlevelil
from .experiments import llil_mangler
class MCS51(Architecture):
"""
Capitalization convention: memory-mapped stuff in allcaps, bits and true
registers lower? Except r0-r7, also lower? Foolish consistency.
"""
name = "8051"
# C 'pointers' tend to be 3 bytes, but architecture-wise it's just 2?
# Our fake address space that keeps all flash banks mapped needs 3.
# Full XRAM/IRAM tags need 5.
address_size = 2 # sets default return value size, nothing else... ???
endianness = Endianness.BigEndian # up to compiler... needs to be chosen
default_int_size = 1
max_instr_length = 3
stack_pointer = 'SP'
regs = {r:RegisterInfo(r,1) for r in ['SP', 'A', 'B',]}
regs['DPTR'] = RegisterInfo('DPTR',2)
regs['DPL'] = RegisterInfo('DPTR',1)
regs['DPH'] = RegisterInfo('DPTR',1,1) # FIXME what endianness is this?
if 0:
regs.update({r:RegisterInfo(r,1)
for r in ['R%d' % n for n in range(8)]})
else:
# This is cute, but I'm not yet sure if it's useful. Register merging
# doesn't come in until HLIL?
#
# On closer look, this might be the only way to make calling
# conventions work. At least as they are now.
# Need to re-visit once this subregister bug is fixed:
# https://github.com/Vector35/binaryninja-api/issues/715
regs['PTR'] = RegisterInfo('Y0',3,1) # C pointers under some compilers
regs['Y0'] = RegisterInfo('Y0',4)
regs['Y4'] = RegisterInfo('Y4',4)
regs['T0'] = RegisterInfo('Y0',2)
regs['T2'] = RegisterInfo('Y0',2,2)
regs['T4'] = RegisterInfo('Y4',2)
regs['T6'] = RegisterInfo('Y4',2,2)
regs['R0'] = RegisterInfo('Y0',1)
regs['R1'] = RegisterInfo('Y0',1,1)
regs['R2'] = RegisterInfo('Y0',1,2)
regs['R3'] = RegisterInfo('Y0',1,3)
regs['R4'] = RegisterInfo('Y4',1)
regs['R5'] = RegisterInfo('Y4',1,1)
regs['R6'] = RegisterInfo('Y4',1,2)
regs['R7'] = RegisterInfo('Y4',1,3)
flags = [
# actual flags stored in PSW special function register:
'p', # parity of accumulator
#'ud', # user defined/unused by base hardware
'ov', # signed overflow on add
#'rs0', 'rs1', # R0-R7 register bank select
#'f0', # software use, like ud
'ac', # aux carry, because BCD is *important*!
'c',
# synthesized flags:
'z', # "There is no zero bit in the PSW. The JZ and JNZ instructions
's', # test the Accumulator data for that condition."
]
flag_write_types = [
'', # first element *might* be ignored due to known bug
'c',
'zsp', # modify A, without touching other flags
'zspc', # modify A and carry flag
'zspc ov', # */ operations
#'zspc ov ac', # +- operations
'*', # +- operations
# should mov indirect into PSW/ACC have its own flag settings?
]
flags_written_by_flag_write_type = {
'c': ['c'],
'zsp': ['z','s','p'],
'zspc': ['z','s','p','c'],
#'zspc ov': ['z','s','p','c','ov'],
'*': ['z','s','p','c','ov','ac'],
}
flag_roles = {
# real:
'c': FlagRole.CarryFlagRole,
'ac': FlagRole.HalfCarryFlagRole,
'ov': FlagRole.OverflowFlagRole,
'p': FlagRole.OddParityFlagRole,
# imaginary:
's': FlagRole.NegativeSignFlagRole,
'z': FlagRole.ZeroFlagRole,
}
flags_required_for_flag_condition = {
LowLevelILFlagCondition.LLFC_E: ["z"],
LowLevelILFlagCondition.LLFC_NE: ["z"],
LowLevelILFlagCondition.LLFC_NEG: ["s"],
LowLevelILFlagCondition.LLFC_POS: ["s"],
LowLevelILFlagCondition.LLFC_UGE: ["c"],
LowLevelILFlagCondition.LLFC_ULT: ["c"],
# not set by nes.py, going to try setting:
LowLevelILFlagCondition.LLFC_O: ["ov"],
LowLevelILFlagCondition.LLFC_NO: ["ov"],
}
def get_instruction_info(self, data, addr):
if not len(data):
return # edge case during linear sweep
nfo = InstructionInfo()
# ana
size, branch = self.lut.branches[data[0]]
nfo.length = size
# emu
if branch:
branch_type, target = branch
if callable(target):
target = target(data, addr, size) if size <= len(data) else 0
if branch_type == BranchType.CallDestination:
# TODO: keep track of return-effect functions, tweak target +=dx
pass
# TODO: arch is probably global; need to store this in bv somehow :|
nfo.add_branch(branch_type, target=target)
if branch_type == BranchType.TrueBranch:
nfo.add_branch(BranchType.FalseBranch, addr + size)
return nfo
def get_instruction_text(self, data, addr):
# ana
size, vals = self.lut.decoders[data[0]]
assert len(data) >= size
vals = [decoder(data, addr, size) for decoder in vals]
# out / outop
toks = self.lut.text[data[0]]
return out.render(toks, vals), size
def get_instruction_low_level_il(self, data, addr, il):
# ana
code = data[0]
size, vals = self.lut.decoders[code]
if len(data) < size:
# incomplete code due to disassembling data or missing memory
return size # abort further analysis before it errors
vals = [decoder(data, addr, size) for decoder in vals]
# sem
build = llil_mangler.patch_at(self, addr) or self.lut.llil[code]
size_override = build(il, vals, addr)
return size_override if size_override != None else size
#def get_flag_condition_low_level_il(self, cond, il):
# il.append(il.unimplemented())
def get_flag_write_low_level_il(self, op, size, write_type, flag,
operands, il):
# This can't be right; why doesn't it work on its own?
if 0 and flag == 'c':
fun = self.get_default_flag_write_low_level_il
return fun(op, size, FlagRole.CarryFlagRole, operands, il)
elif 0 and op == LowLevelILOperation.LLIL_RLC:
#return il.const(0, 1)
return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x80))
elif 0 and op == LowLevelILOperation.LLIL_RRC:
#return il.const(0, 1)
return il.test_bit(1, il.reg(1, operands[0]), il.const(0, 0x01))
else:
fun = Architecture.get_flag_write_low_level_il
retval = fun(self, op, size, write_type, flag, operands, il)
#log_info('flag_write '+hex(il.current_address)+' | '+repr(retval)+' | '+repr((op, size, write_type, flag, operands, il)))
return retval
flag = self.get_flag_index(flag)
return self.get_default_flag_write_low_level_il(op, size, self._flag_roles[flag], operands, il)
# default fallback
if 0 and op == LowLevelILOperation.LLIL_SBB and flag == 'c':
left, right, carry = operands
return il.logical_shift_right(1, il.sub(1, left, il.add(1, right, carry)), il.const(1, 8))
if 0 and flag == 'c':
fun = self.get_default_flag_write_low_level_il
return fun(op, size, FlagRole.CarryFlagRole, operands, il)
if 0:
fun = self.get_default_flag_write_low_level_il
return fun(op, size, FlagRole.CarryFlagRole, operands, il)
@specification.lazy_memoized_property
def lut(self):
"""Look up tables generated once.
All available architectures are *instantiated* on start, even if never
used. To be a good neighbour but still get to write fun code, complex
processing should be deferred until needed using this decorator.
"""
luts = Tables()
if 1: # DEBUG
urls = [
('spu plugin',
'https://github.com/bambu/binaryninja-spu/blob/master/spu.py'),
('nes plugin',
'https://github.com/Vector35/binaryninja-api/blob/dev/python/examples/nes.py'),
('m68k plugin',
'https://github.com/alexforencich/binaryninja-m68k/blob/master/__init__.py'),
]
md = '## Still Unlifted\n\n' + luts.unlifted
md += '\n\n## Reference Examples\n\n'
for title,url in urls:
md += '- [{0}]({1})\n'.format(title, url)
binaryninja.show_markdown_report("Architecture Progress", md)
return luts
def get_associated_arch_by_address(self, addr):
# Waaait a second. add_branch has an optional 'arch' argument
#
# Can I branch from x86 into BPF? Or .NET IL? Or obfs. interpreter
# uops? In one idb?
# OMG IF YES TEST TEST TEST THIS omg, there's even a hinter
#
# guess this is from arm thumb shenanigans? or 32/64 in general?
return self, addr
##
## That from-IDA patching thing them game hackers are so keen on...
##
def always_branch(self, data, addr):
return # TODO do this, even if that's not how you normally patch
def convert_to_nop(data, addr):
return
def assemble(code, addr):
# TODO either hand-assemble, or find some nice embeddable asm /w
# macros and proper labels and stuff? will need to double-check syntax
# compat
# also TODO: sdcc 8051 training binary
return
class Tables:
def __init__(self):
elapsed = time.time()
spec = specification.InstructionSpec()
self.decoders = spec.refine(ana.operand_decoders)
self.branches = spec.refine(emu.branch_type)
self.text = spec.refine(out.tokens)
self.llil = spec.refine(lowlevelil.low_level_il)
# FIXME hack until I refactor this a bit:
self.unlifted = lowlevelil.unlifted_todo(spec.spec, self.llil)
elapsed = time.time() - elapsed
log_info('Building 8051 tables took %0.3f seconds' % elapsed)