mu/target/RISC-V/dis-rv32i.mu4

| This file is part of muforth: https://muforth.dev/
|
| Copyright 2002-2024 David Frech. (Read the LICENSE for details.)

loading RISC-V RV32 disassembler (I subset)

| Compiling the instruction table.
|
| If a match is found, the cfa following match is executed, and then the
| word that called match is exited. This shortcuts any further tests.
|
| If no match was found, the following cfa is skipped, and the matching
| continues apace.

: matched?  ( op mask match - op matched)
   push  over and  pop = ( matched) ;

( mask op and test against match)
: match   ( op mask match - op)
   matched?  pop  swap if      @execute  ^  then  cell+ push ;

| If matched, drop op2 and execute following cfa. Otherwise, skip following
| cfa and leave both op1 and op2 on the stack.

: submatch   ( op1 op2 mask match - op1 | op1 op2)
   matched?  pop  swap if  nip @execute  ^  then  cell+ push ;

variable instr-pc  ( points to _start_ of current instruction)

( All the op printing words have the stack signature: op - op)

( General-purpose)   comment %unused%
: sext    ( n width - n')
   1 swap <<  push  dup  r@ 2/ and if  r@ -  then  rdrop ;  %unused%

: sext12  ( n -n')   dup    "800 and if     "-1000 +  then ;
: sext20  ( n -n')   dup "8_0000 and if  "-10_0000 +  then ;

: reg>      ( bits - reg)   31 and ;
( XXX - leave out the reg> mask?)
: rd>       ( op - rd)    7 >>  reg> ;
: rs1>      ( op - rs1)  15 >>  reg> ;
: rs2>      ( op - rs2)  20 >>  reg> ;
: shamt>    ( op - rs2)  20 >>  63 and ;  ( hi bit should be 0 for rv32)

: funct3>   ( op - f3)   12 >>    7 and ;
: funct7>   ( op - f7)   25 >> ;

( Again, three of the immediate forms are easy.)
: I-imm>    ( op - op imm)  dup 20 >>  "fff and  sext12 ;
: U-imm>    ( op - op imm)  dup 12 >>            sext20  12 << ;
: S-imm>    ( op - op imm)  dup funct7>  5 <<  over rd>  or  sext12 ;

( And two are hard.)
: SB-imm>   ( op - op imm)
   dup  31 bit@          11 <<   ( imm[12])
   over  7 bit@          10 <<   ( imm[11])  or
   over funct7>  "3f and  4 <<   ( imm[10:5])  or
   over  8 >>    "0f and         ( imm[4:1])  or  sext12  2* ;

: UJ-imm>   ( op - op imm)
   dup  31 bit@          19 <<  ( imm[20])
   over 20 bit@          10 <<  ( imm[11])  or
   over 21 >>  "3ff and         ( imm[10:1])  or
   over 12 >>   "ff and  11 <<  ( imm[19:12])  or  sext20  2* ;

( Standard ways of printing different "types".)
: .hex   radix preserve  hex       (.) type ;

: 2sp  space space ;

: 4#   # # # # ;
: .h16      radix preserve  hex  <#                  4# #> type ;
: .h32      radix preserve  hex  <# 4#               4# #> type ;
: .h16_16   radix preserve  hex  <# 4#  char _ hold  4# #> type ;

: .h16__   .h16 2sp ;
: .h32__   .h32 2sp ;

: .u32  .h16_16 ;  ( XXX this can be changed to .h32 if you prefer)

( Trailing underscore signifies the following space.)
: .rd    ( op - op)  dup rd>   .regname ;
: .rs1   ( op - op)  dup rs1>  .regname ;
: .rs2   ( op - op)  dup rs2>  .regname ;

: .rd_   ( op - op)  .rd  space ;
: .rs1_  ( op - op)  .rs1 space ;
: .rs2_  ( op - op)  .rs2 space ;

( Show immediates as signed hex values.)
: .imm   .hex ;

: .3reg   .rd_  .rs1_  .rs2 ;
: .rs1-off  ( op imm - op)   .imm  ." ("  .rs1  ." )" ;

: label?  ( addr)  .labels. find-constant ;

: .rel-dest  ( off)
   instr-pc @ +  dup ea !
   dup label? if  nip link>name type ^  then  .u32 ;

: .undecoded   ." *undecoded*" ;
: .unimplemented  ." *unimplemented*" ;
: .reserved  ." *reserved*" ;

: .load
   dup funct3>  4 *  z" lb  lh  lw  ??? lbu lhu ??? ??? "  +
   3 -trailing type space  .rd_  I-imm>  .rs1-off ;

| Two "special case" 3-regs. They have slightly odd encodings and are
| matched specially.
: .sub   ." sub "  .3reg ;
: .sra   ." sra "  .3reg ;

( neg is encoded as sub rd, x0, rs2)
: .neg   ." neg "  .rd_  .rs2 ;

: .op-reg
   dup funct3>  4 *  z" add sll slt sltuxor srl or  and "  +
   4 -trailing type space  .3reg ;

: .muldiv
   dup funct3>  6 *  z" mul   mulh  mulhsumulhu div   divu  rem   remu  "  +
   6 -trailing type space  .3reg ;

: .store
   dup funct3>  2*  z" sbshsw??"  +  2 type space  .rs2_  S-imm>  .rs1-off ;

( Shift immediates are special.)
| NOTE: for the 64-bit version of these, the shift amount field is 6 bits
| wide, not 5.

: .shift-imm   .rd_  .rs1_  dup shamt> .imm ;
: .slli   ." slli "  .shift-imm ;
: .srli   ." srli "  .shift-imm ;
: .srai   ." srai "  .shift-imm ;

: .nop  ( encoded as addi x0 x0 0)     ." nop" ;

( load immediate)
: .li  ( encoded as addi rd x0 imm)    ." li "   .rd_  I-imm> .imm ;

( register move)
: .mv  ( encoded as addi rd rs1 0)     ." mv "   .rd_  .rs1 ;

( 1's complement)
( Calling this .nnot to not conflict with .not, the conditional op.)
: .nnot  ( encoded as xori rd rs1 -1)  ." not "  .rd_  .rs1 ;

: .ui   ( op pc - op pc+ui)   push  .rd_  U-imm>  dup .imm  pop +  dup ea ! ;

: .(hex)   ." ("  .hex  ." )" ;
: .auipc   ." auipc "  instr-pc @  .ui  space space  .(hex) ;
: .lui     ." lui "             0  .ui  drop ;

: .branch
   dup funct3>  4 *  z" beq bne ??? ??? blt bge bltubgeu"  +
   4 -trailing type space  .rs1_  .rs2_  SB-imm>  .rel-dest ;

: .uj-dest  UJ-imm>  .rel-dest ;
: .jal   dup rd> 0= if
   ." j "          .uj-dest  ^  then
   ." jal "  .rd_  .uj-dest ;

: .jr    ." jr "          I-imm> .rs1-off ;
: .jalr  ." jalr "  .rd_  I-imm> .rs1-off ;

: .fence    ." fence" ;
: .fence.i  ." fence.i" ;

: .ecall   ." ecall" ;
: .ebreak  ." ebreak" ;
: .wfi     ." wfi" ;

: .dret    ." dret" ;
: .ret  dup 28 >>  3 and  z" ushm" +  1 type  ." ret" ;

: csr?  ( addr - 0 | 'link -1)  .csr. find-constant ;

: .csrname  ( addr)  csr? if  link>name type  then ;
: .csrnum   ( addr op - op)  swap  space space  .(hex) ;  ( print last, as a "comment")
: .csrreg   ( op - addr op)  dup 20 >>  ( addr)  swap  over .csrname ;
: .csrboth  ( op - op)  .csrreg  .csrnum ;

( Print a "write op": w/s/c for write/set/clear.)
: .csrwop  ( op - op)   dup funct3> 3 and  z" ?wsc" +  1 type ;

: .csr_r    ." csrr "  .rd_  .csrboth ;

: .csr_w    ." csr"  .csrwop    space  .csrreg  space     .rs1        .csrnum ;
: .csr_wi   ." csr"  .csrwop   ." i "  .csrreg  space  dup rs1> .imm  .csrnum ;

: .csr_rw   ." csrr"  .csrwop   space  .rd_     .rs1_             .csrboth ;
: .csr_rwi  ." csrr"  .csrwop  ." i "  .rd_  dup rs1> .imm space  .csrboth ;


| Decoding both RV32A and RV64A for simplicity. Their encodings are
| intertwined, so doing both here is easier than unnecessarily breaking out
| the RV64A for "later".

: .amo-size  dup funct3>  1 and  2*   z" .w.d"  +  2 type ;
: .amo-aqrl  dup funct7>  3 and  6 *  z"       .rl   .aq   .aqrl "  +
             5 -trailing type ;
: .amo-suffix   .amo-size  .amo-aqrl  space ;

: .amo-2op   dup 27 >>  z" add xor or  and min max minumaxu"  +
             ." amo"  4 -trailing type  .amo-suffix  .3reg ;

: .lr        ." lr"       .amo-suffix  .rd_  .rs1 ;
: .sc        ." sc"       .amo-suffix  .3reg ;
: .amoswap   ." amoswap"  .amo-suffix  .3reg ;


binary  ( !!)

: .misc-mem
   dup funct3>
   111  000  submatch .fence
   111  001  submatch .fence.i
   drop
   .undecoded ;

| We need to look at high-order bits of OP instructions to figure out what
| they are. We do that here rather than in the main "shred32" routine.

: .op  ( op - op)
   dup #12 >>              ( funct7  rs2   rs1  f3)
   1111111_00000_11111_111  0100000_00000_00000_000  submatch .neg  ( sub rd x0 rs)
   1111111_00000_00000_111  0100000_00000_00000_000  submatch .sub
   1111111_00000_00000_111  0100000_00000_00000_101  submatch .sra
   1111111_00000_00000_000  0000000_00000_00000_000  submatch .op-reg
   1111111_00000_00000_000  0000001_00000_00000_000  submatch .muldiv
   drop
   .undecoded ;

| The shift-with-immediate-count instructions have a slightly unusual
| encoding. Let's match and handle those before doing the "normal"
| immediate instructions.

: .op-imm  ( op - op)
   dup #7 >>                      (    imm12     rs1  f3   rd)
   1111111_11111_11111_111_11111  0000000_00000_00000_000_00000
                                                     submatch .nop  ( addi x0 x0 0)

   drop  dup #12 >>         (    imm12     rs1  f3)
   0000000_00000_11111_111  0000000_00000_00000_000  submatch .li    ( addi rd x0 imm)
   1111111_11111_00000_111  0000000_00000_00000_000  submatch .mv    ( addi rd rs 0)
   1111111_11111_00000_111  1111111_11111_00000_100  submatch .nnot  ( xori rd rs -1)

                          ( funct7  shamt  rs1  f3)
   1111111_00000_00000_111  0000000_00000_00000_001  submatch .slli
   1111111_00000_00000_111  0000000_00000_00000_101  submatch .srli
   1111111_00000_00000_111  0100000_00000_00000_101  submatch .srai

   ( Nothing special matched. Must be a "normal" immediate op.)
   #7 and  #6 *  z" addi  ????  slti  sltiu xori  ????  ori   andi  "  +
   #5 -trailing type space  .rd_  .rs1_  I-imm> .imm ;

: .jalr-jr
   dup #7 >>
   111_11111  000_00000  submatch .jr
   111_00000  000_00000  submatch .jalr
   drop
   .undecoded ;

: .system
   dup #7 >>
   1111111_11111_00000_111_00000  0000000_00000_00000_000_00000  submatch .ecall
   1111111_11111_00000_111_00000  0000000_00001_00000_000_00000  submatch .ebreak

   1111111_11111_00000_111_00000  0111101_10010_00000_000_00000  submatch .dret  ( debug)
   1100111_11111_00000_111_00000  0000000_00010_00000_000_00000  submatch .ret

   1111111_11111_00000_111_00000  0001000_00101_00000_000_00000  submatch .wfi

   ( Match any other system instruction with f3=000 or 100)
   00000_011_00000  00000_000_00000  submatch .undecoded

  ( rs1       rd     rs1       rd)
   11111_111_00000  00000_010_00000  submatch .csr_r   ( read-only: rs1 is x0)

   00000_100_11111  00000_000_00000  submatch .csr_w   ( write-only: rd is x0)
   00000_100_11111  00000_100_00000  submatch .csr_wi  ( write-only imm)

   00000_100_00000  00000_000_00000  submatch .csr_rw   ( read/write)
   00000_100_00000  00000_100_00000  submatch .csr_rwi  ( read/write imm)
   drop
   .undecoded ;

| NOTE: For simplicity I'm matching both the 32-bit - f3=010 - and 64-bit -
| f3=011 - atomics here.

: .amo  ( atomics)
   dup #12 >>              ( op  aqrl  rs2   rs1  f3)
   00011_00_00000_00000_110  00000_00_00000_00000_010  submatch .amo-2op
   11111_00_00000_00000_110  00001_00_00000_00000_010  submatch .amoswap
   11111_00_11111_00000_110  00010_00_00000_00000_010  submatch .lr    ( rs2=0)
   11111_00_00000_00000_110  00011_00_00000_00000_010  submatch .sc
   drop
   .undecoded ;

| Create a defer'ed word for a subset disassembler. Its default behaviour
| will be to execute .undecoded .
: subset   undeferred preserve  ['] .undecoded  undeferred !  defer ;

( To be filled in by subset disassemblers.)

( Compressed - C - subset. The entire 16-bit encoding space.)
subset shred16  ( op - op)

( subsets FD - floating point)
subset .load-fp
subset .store-fp
subset .madd
subset .msub
subset .nmsub
subset .nmadd
subset .op-fp

( These two are for RV64 additions to base - I - subset.)
subset .op-imm-32
subset .op-32

: shred32  ( op - op)
   ( NOTE:    xx_111_11  is reserved for wider instruction sets.)

   11_111_11  00_000_11  match .load
   11_111_11  00_001_11  match .load-fp   ( FD subsets)
            ( 00_010_11         custom-0)
   11_111_11  00_011_11  match .misc-mem
   11_111_11  00_100_11  match .op-imm
   11_111_11  00_101_11  match .auipc
   11_111_11  00_110_11  match .op-imm-32  ( rv64)

   11_111_11  01_000_11  match .store
   11_111_11  01_001_11  match .store-fp  ( FD subsets)
            ( 01_010_11         custom-1)
   11_111_11  01_011_11  match .amo       ( A subset)
   11_111_11  01_100_11  match .op
   11_111_11  01_101_11  match .lui
   11_111_11  01_110_11  match .op-32  ( rv64)

   ( The following "row" is all floating point instructions.)
   11_111_11  10_000_11  match .madd
   11_111_11  10_001_11  match .msub
   11_111_11  10_010_11  match .nmsub
   11_111_11  10_011_11  match .nmadd
   11_111_11  10_100_11  match .op-fp
            ( 10_101_11         reserved)
            ( 10_110_11         custom-2)

   11_111_11  11_000_11  match .branch
   11_111_11  11_001_11  match .jalr-jr
            ( 11_010_11         reserved)
   11_111_11  11_011_11  match .jal
   11_111_11  11_100_11  match .system
            ( 11_101_11         reserved)
            ( 11_110_11         custom-3)

   .undecoded ;

decimal

( Support for interactive disassembly.)
: dis+  ( a - a' 0)  drop  p @  0 advance  0 ;
: dis-  ( a - a' 0)            -4 advance  0 ;

: op16?  ( op - f)   3 and 3 u< ;
: op*    ( - op16)   c* c* lohi> ;

| Fetch an instruction, 16 bits at a time. If low bits of first 16-bit
| "packet" are 00, 01, or 10, this is a 16-bit compressed instruction, so
| return it. Otherwise, fetch another packet and concatenate the two.
|
| NOTE: While there is space in the RISC-V encoding for 64-bit and 128-bit
| *encodings*, we ignore them for now, and assume that every instruction we
| will see will consist of either one or two 16-bit packets.

( The workhorse.)
: 1dis  ( a)
   dup  ea !  ( default ea: stay put)
   dup label? if
      over _addr .nesting space  ." label "  link>name type  then
   dup .addr .nesting space  dup  p !  instr-pc !
   op* ( low16)  dup op16? if
                  4 spaces  dup .h16__  shred16  else
      op* ( hi16) 16 <<  +  dup .h32__  shred32  then
   drop  space ;


.ifdef testing

| I'd like to be able to test the immediate encoding and decoding routines
| are proper inverses. The easiest way to test this is to create a series
| of 1-bit immediate values, each corresponding to one bit in the "space" of
| immediate values. The highest bit should be represented by a negative,
| sign-extended value.
|
| For each of these, encode using the assembler's routine, and decode using
| the disassembler's and make sure the values match. A quirk of the code: S
| and SB forms encode a 12-bit field, but in the case of SB it actually
| encodes a 13-bit value with the LSB 0. So we 2* before doing the SB test.
|
| UJ is similar. The field is 20 bits wide, but the values encoded by it
| are also shifted left by one bit.
|
| The routines that are most likely to be wrong are the SB and UJ
| immediates - they are pretty complex.

( Test the 12-bit and 20-bit immediates.)
: check  ( imm codeced which - imm)
   cr  count type space  over .hex space  dup .hex space
   over = if  ." OK" ^  then  ." failed" ;

: checkS-SB  ( imm - imm*2)
   dup 0 S-imm ( op)  S-imm> nip  ( orig codeced)  z"  S" check
   2* dup 0 SB-imm ( op)  SB-imm> nip  ( orig codeced)  z" SB" check ;

: checkUJ  ( imm - imm*2)
   2* dup 0 UJ-imm ( op)  UJ-imm> nip  ( orig codeced)  z" UJ" check ;

: test
   1  11 for  checkS-SB  next  ( signed, positive)
   sext12  checkS-SB  ( signed, negative)  drop

   1  19 for  checkUJ  next  ( signed, positive)
   sext20  checkUJ  ( signed, negative)  drop ;

.then  ( testing)