-
Notifications
You must be signed in to change notification settings - Fork 30
/
asm-rv32i.mu4
389 lines (311 loc) · 12.8 KB
/
asm-rv32i.mu4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
| This file is part of muforth: https://muforth.dev/
|
| Copyright 2002-2024 David Frech. (Read the LICENSE for details.)
loading RISC-V RV32 assembler (I subset)
| Since the RISC-V instruction set is modular, I want to make the assembler
| - and disassembler - modular as well.
|
| This file only defines the RV32I subset - the 32-bit wide encoding of the
| 32-bit integer instructions.
( Support for inline asm in any colon word.)
variable saved-state ( so we know which colon compiler to return to.)
compiler
: asm{ state @ saved-state ! __inline-asm ; ( start assembler)
: } saved-state @ state ! ; ( exit assembler mode and restart colon compiler)
forth
| Let's define the registers first. There are 32 general-purpose
| registers. We define them with "weird" bits in the upper part so we can
| identify them, while they are sitting on the stack, as registers. ;-)
| I was hoping these "weird bits" would spell "five dude", not "five
| doodoo". ;-)
"5_d00d 8 << constant reg-signature
: 32regs 0 32 for dup reg-signature or constant 1+ next drop ;
assembler
( Define the generic register names.)
32regs x0 x1 x2 x3 x4 x5 x6 x7
x8 x9 x10 x11 x12 x13 x14 x15
x16 x17 x18 x19 x20 x21 x22 x23
x24 x25 x26 x27 x28 x29 x30 x31
| Also define the C - or "Unix" - ABI names.
| https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
|
| Name | ABI Mnemonic | Meaning | Preserved across calls?
| x0 | zero | Zero | - (Immutable)
| x1 | ra | Return address | No
| x2 | sp | Stack pointer | Yes
| x3 | gp | Global pointer | - (Unallocatable)
| x4 | tp | Thread pointer | - (Unallocatable)
| x5 - x7 | t0 - t2 | Temporary registers | No
| x8 - x9 | s0 - s1 | Callee-saved registers | Yes
| x10 - x17 | a0 - a7 | Argument registers | No
| x18 - x27 | s2 - s11 | Callee-saved registers | Yes
| x28 - x31 | t3 - t6 | Temporary registers | No
32regs zero ra sp gp tp t0 t1 t2
s0 s1 a0 a1 a2 a3 a4 a5
a6 a7 s2 s3 s4 s5 s6 s7
s8 s9 s10 s11 t3 t4 t5 t6
forth
| Even though .regname is *used* in the disassembler, let's define it here
| since we are thinking about register names and ABI conventions.
defer .regname
: .udec radix preserve decimal (u.) type ;
( Print a "raw" register name; eg, "x12".)
-: ( reg) ." x" .udec ;
: raw-regs [ #] is .regname ;
( Print an ABI register name.)
-: ( reg)
dup 0= if drop ." zero" ^ then
dup 16 u< if
z" ra sp gp tp t0 t1 t2 s0 s1 a0 a1 a2 a3 a4 a5 "
else
16 - z" a6 a7 s2 s3 s4 s5 s6 s7 s8 s9 s10s11t3 t4 t5 t6 "
then
swap 3 * + 3 -trailing type ;
: abi-regs [ #] is .regname ; abi-regs ( default)
( Shifting bits into position.)
: >rd ( op bits - op') 7 << or ;
: >funct3 ( op bits - op') 12 << or ;
: >rs1 ( op bits - op') 15 << or ;
: >rs2 ( op bits - op') 20 << or ;
: >funct7 ( op bits - op') 25 << or ;
( Dealing with register fields.)
: >reg ( reg op - op reg-masked) swap 31 and ;
: ?reg ( reg op - op reg-masked)
over -32 and reg-signature = if ( valid reg) >reg ^ then
error" expected a register" ;
: rd ( reg op - op') ?reg >rd ;
: rs1 ( reg op - op') ?reg >rs1 ;
: rs2 ( reg op - op') ?reg >rs2 ;
: rs1-imm ( zimm op - op') >reg >rs1 ;
: rs2-imm ( zimm op - op') >reg >rs2 ;
( For grabbing single bits out of something.)
: bit@ ( mask bit# - bit) u>> 1 and ;
( Branch offsets are from current pc, not pc plus some value.)
: >pc-rel ( dest op - offset op)
swap \m here - swap ;
( Does a signed value fit into a bit field?)
: sfits? ( value bits - f) 1- >> 1+ 2 u< ; ( signed)
( Complain when it doesn't fit.)
: ?sfits ( value bits) sfits? if ^ then
error" relative branch out of range" ;
: op, ( op) \m , ; ( for now, instructions are 32 bits wide)
| There is some subtlety involved in the way that RISC-V immediate values
| work. Many instructions contain a 12-bit *signed* offset. A few
| instructions allow loading a 20-bit immediate into the highest 20 bits of a
| register.
|
| But because the low 12 are *signed*, one has to be careful when splitting
| a 32-bit immediate value into a high 20-bit part and a low 12-bit part
|
| If the low part is negative - if bit 11 is set - hex 1000 needs to be
| added to the high part to compensate! When the negative low part is
| sign-extended, that is equivalent to adding ffff_f000 to the whole value -
| and this is precisely -1000. Tricky!
|
| An easy way to adjust the high part is essentially to "round up" by
| adding "800 and then throwing away the low 12 bits. If bit 11 was set,
| adding "800 carries into bit 12, and we add the "1000 that we need. If bit
| 11 was clear, adding "800 and truncating does nothing.
|
| This adjustment is folded into the calculation of U-imm, below.
( The "easy" immediates.)
: I-imm ( imm op - op') swap "fff and >rs2 ;
: S-imm ( imm op - op') over 5 >> "7f and >funct7
swap "1f and >rd ;
: U-imm ( imm op - op') swap "800 + ( round up)
12 >> "f_ffff and >funct3 ;
( These two are complicated. ;-)
: SB-imm ( imm op - op')
over 12 bit@ 31 << or ( imm[12])
over 5 >> "3f and >funct7 ( imm[10:5])
over "1e and rot 11 bit@ or >rd ( imm[4:1|11]) ;
: UJ-imm ( imm op - op')
over 20 bit@ 31 << or ( imm[20])
over 12 >> "ff and >funct3 ( imm[19:12])
over "7fe and rot 11 bit@ or >rs2 ( imm[10:1|11]) ;
: R-type ( opcode f3 f7) push >funct3 pop >funct7 constant
does> @ ( rs1 rs2 rd op) rd rs2 rs1 op, ;
: RI-type ( opcode f3 f7) push >funct3 pop >funct7 constant
does> @ ( rs1 rs2-imm rd op) rd rs2-imm rs1 op, ;
: I-type ( opcode f3) >funct3 constant
does> @ ( imm rs1 rd op) rd rs1 I-imm op, ;
: II-type ( opcode f3) >funct3 constant
does> @ ( imm rs1-imm rd op) rd rs1-imm I-imm op, ;
: S-type ( opcode f3) >funct3 constant
does> @ ( off base src op) rs2 rs1 S-imm op, ;
| NOTE: This is *not* a create/does word! We need to handle lui and auipc
| differently, so U-type is a normal colon word.
: U-type ( imm rd op) rd U-imm op, ;
: SB-type ( opcode f3) >funct3 constant
does> @ ( dest rs1 rs2 op) rs2 rs1
>pc-rel over #13 ?sfits SB-imm op, ;
: UJ-type ( opcode) constant
does> @ ( dest rd op) rd
>pc-rel over #21 ?sfits UJ-imm op, ;
: priv ( opcode f3 imm12) push >funct3 pop >rs2 constant
does> @ op, ;
: _fence ( opcode f3) >funct3 constant
does> @ ( pred succ op) -rot swap 4 << or >rs2 op, ;
| Since the encodings use fields with oddball widths - 5 bits, 7 bits, 3
| bits - we won't be able to easily use either octal or hex.
|
| So, binary it is!
assembler binary
( load)
00_000_11 000 I-type lb
00_000_11 001 I-type lh
00_000_11 010 I-type lw
00_000_11 100 I-type lbu
00_000_11 101 I-type lhu
( misc-mem)
00_011_11 000 _fence fence ( pred succ)
00_011_11 001 0 priv fence.i
( op-imm)
00_100_11 000 I-type addi
00_100_11 001 0000000 RI-type slli ( rs2 is shift amount)
00_100_11 010 I-type slti
00_100_11 011 I-type sltiu
00_100_11 100 I-type xori
00_100_11 101 0000000 RI-type srli ( rs2 is shift amount)
00_100_11 101 0100000 RI-type srai ( rs2 is shift amount)
00_100_11 110 I-type ori
00_100_11 111 I-type andi
( auipc)
00_101_11 : auipc ( label rd - offset) swap \m here - dup rot [ #] U-type ;
( store)
01_000_11 000 S-type sb
01_000_11 001 S-type sh
01_000_11 010 S-type sw
.ifdef A-ext
( amo)
| XXX aq and rl bits are both zero. Need some kind of postfix way to set
| them? Or do something like _fence?
01_011_11 010 0000000 R-type amoadd.w
01_011_11 010 0000100 R-type amoswap.w
forth
01_011_11 010 0001000 R-type lr.w ( needs rs2 set to zero)
assembler
01_011_11 010 0001100 R-type sc.w
01_011_11 010 0010000 R-type amoxor.w
01_011_11 010 0100000 R-type amoor.w
01_011_11 010 0110000 R-type amoand.w
01_011_11 010 1000000 R-type amomin.w
01_011_11 010 1010000 R-type amomax.w
01_011_11 010 1100000 R-type amominu.w
01_011_11 010 1110000 R-type amomaxu.w
( Set rs2 to zero.)
: lr.w ( rs1 rd) asm{ x0 swap lr.w } ;
.then
( op)
01_100_11 000 0000000 R-type add
01_100_11 000 0100000 R-type sub
01_100_11 001 0000000 R-type sll
01_100_11 010 0000000 R-type slt
01_100_11 011 0000000 R-type sltu
01_100_11 100 0000000 R-type xor
01_100_11 101 0000000 R-type srl
01_100_11 101 0100000 R-type sra
01_100_11 110 0000000 R-type or
01_100_11 111 0000000 R-type and
.ifdef M-ext
( op)
01_100_11 000 0000001 R-type mul
01_100_11 001 0000001 R-type mulh
01_100_11 010 0000001 R-type mulhsu
01_100_11 011 0000001 R-type mulhu
01_100_11 100 0000001 R-type div
01_100_11 101 0000001 R-type divu
01_100_11 110 0000001 R-type rem
01_100_11 111 0000001 R-type remu
.then
( lui)
01_101_11 : lui ( imm rd) [ #] U-type ;
( XXX Do we need these if we have the "control structure" words?)
( branch)
11_000_11 000 SB-type beq
11_000_11 001 SB-type bne
11_000_11 100 SB-type blt
11_000_11 101 SB-type bge
11_000_11 110 SB-type bltu
11_000_11 111 SB-type bgeu
( jalr)
11_001_11 000 I-type jalr
( jal)
11_011_11 UJ-type jal
( system)
11_100_11 000 0000000_00000 priv ecall
11_100_11 000 0000000_00001 priv ebreak
11_100_11 000 0000000_00010 priv uret
11_100_11 000 0001000_00010 priv sret
11_100_11 000 0010000_00010 priv hret
11_100_11 000 0011000_00010 priv mret
11_100_11 000 0001000_00101 priv wfi
11_100_11 001 I-type csrrw ( CSR read/write)
11_100_11 010 I-type csrrs ( CSR read/set)
11_100_11 011 I-type csrrc ( CSR read/clear)
11_100_11 101 II-type csrrwi ( CSR read/write immediate)
11_100_11 110 II-type csrrsi ( CSR read/set immediate)
11_100_11 111 II-type csrrci ( CSR read/clear immediate)
( Utility.)
( Synthetic instructions.)
: nop asm{ 0 x0 x0 addi } ;
: mv ( rs rd) asm{ 0 -rot addi } ;
: li ( imm rd) asm{ x0 swap addi } ;
: subi ( imm rd) asm{ swap negate swap addi } ;
: la ( imm rd) asm{ 2dup lui dup addi } ;
: not ( rs rd) asm{ -1 -rot xori } ;
: neg ( rs rd) asm{ x0 -rot sub } ;
: seqz ( rs rd) asm{ 1 -rot sltiu } ;
: snez ( rs rd) asm{ x0 -rot sltu } ;
: sltz ( rs rd) asm{ x0 swap slt } ;
: sgtz ( rs rd) asm{ x0 -rot slt } ;
: j ( dest) asm{ x0 jal } ;
: jr ( off rs) asm{ x0 jalr } ;
( src dst ==> csr rs1 rd csrXX )
: csrr ( csr rd) asm{ x0 swap csrrs } ; ( read, don't write)
: csrw ( rs csr) asm{ swap x0 csrrw } ; ( write, don't read)
: csrs ( rs csr) asm{ swap x0 csrrs } ; ( set, don't read)
: csrc ( rs csr) asm{ swap x0 csrrc } ; ( clear, don't read)
: csrwi ( zimm csr) asm{ swap x0 csrrwi } ; ( write imm, don't read)
: csrsi ( zimm csr) asm{ swap x0 csrrsi } ; ( set imm, don't read)
: csrci ( zimm csr) asm{ swap x0 csrrci } ; ( clear imm, don't read)
forth
| Control structures.
|
| RISC-V is a "compare-and-branch" architecture. There are no condition
| code bits anywhere. Thus we have to rethink somewhat how we define the
| looping and conditional branching operators in the assembler.
| SB-cond words compose and return an instruction with an _empty_ offset.
| The op is compiled by if and fixed up later by resolve.
: SB-cond ( opcode f3) >funct3 constant
does> @ ( rs1 rs2 op - op') rs2 rs1 ;
( Remember: branch is always _opposite_ of condition.)
assembler
( Compare two registers: ( rs1 rs2 - op)
11_000_11 000 SB-cond != ( beq)
11_000_11 001 SB-cond = ( bne)
11_000_11 100 SB-cond >= ( blt)
11_000_11 101 SB-cond < ( bge)
11_000_11 110 SB-cond u>= ( bltu)
11_000_11 111 SB-cond u< ( bgeu)
( Compare a register against zero: ( rs - op)
: 0= asm{ x0 = } ;
: 0!= asm{ x0 != } ;
: 0< asm{ x0 < } ;
: 0>= asm{ x0 >= } ;
( Resolve a relative jump from src to dest.)
: resolve ( src dest)
over - ( offset) over image-@ ( src offset op)
dup "07f and 11_011_11 = if ( UJ-type) over #21 ?sfits UJ-imm else
( SB-type) over #13 ?sfits SB-imm then
swap image-! ;
( Control structure words.)
: if ( op - src ) \m here swap op, ;
: then ( src - ) \m here \a resolve ;
: else ( src1 - src2) \m here dup \a j swap \a then ;
: begin ( - dest) \m here ;
: until ( op dest) \a if swap \a resolve ;
: again ( dest) \a j ; ( x0 jal, *not* a conditional branch!)
: while ( op dest - src dest) \a if swap ;
: repeat ( src dest) \a again \a then ;
forth decimal