Skip to content

Commit

Permalink
Merge branch 'master' into develop-documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Udit Subramanya committed Jan 10, 2025
2 parents 31db570 + 01974e1 commit c2551ad
Show file tree
Hide file tree
Showing 115 changed files with 10,710 additions and 2,894 deletions.
8 changes: 8 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Language: Cpp
BasedOnStyle: LLVM
IndentWidth: 2
TabWidth: 2
ColumnLimit: 0
UseTab: Never
BreakBeforeBraces: Attach
AlwaysBreakTemplateDeclarations: true
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
strategy:
fail-fast: false
matrix:
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm]
name: [regression, opencl, cache, config1, config2, debug, scope, stress, synthesis, vm, vector]
xlen: [32, 64]

steps:
Expand Down
43 changes: 32 additions & 11 deletions ci/regression.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ regression()
./ci/blackbox.sh --driver=simx --app=vecadd --rebuild=3

# test for matmul
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"
CONFIGS="-DTC_NUM=4 -DTC_SIZE=8" ./ci/blackbox.sh --cores=4 --app=matmul --driver=simx --threads=32 --warps=32 --args="-n128 -d1"

echo "regression tests done!"
}
Expand Down Expand Up @@ -158,6 +158,7 @@ cache()

# reduce l1 line size
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DDISABLE_L1" ./ci/blackbox.sh --driver=rtlsim --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE" ./ci/blackbox.sh --driver=simx --app=io_addr
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
Expand All @@ -168,12 +169,10 @@ cache()
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx

# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DLMEM_NUM_BANKS=2 -DDCACHE_NUM_BANKS=4" ./ci/blackbox.sh --driver=simx --app=sgemmx --threads=8

# replacement policy
CONFIGS="-DDCACHE_REPL_POLICY=0" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
Expand Down Expand Up @@ -322,6 +321,14 @@ config2()
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=1" ./ci/blackbox.sh --driver=opae --app=mstress
CONFIGS="-DPLATFORM_MEMORY_INTERLEAVE=0" ./ci/blackbox.sh --driver=opae --app=mstress

# test memory ports
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=simx --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DMEM_BLOCK_SIZE=8 -DPLATFORM_MEMORY_BANKS=2" ./ci/blackbox.sh --driver=rtlsim --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=opae --app=mstress --threads=8
CONFIGS="-DMEM_BLOCK_SIZE=8" ./ci/blackbox.sh --driver=xrt --app=mstress --threads=8

echo "configuration-2 tests done!"
}

Expand Down Expand Up @@ -359,8 +366,8 @@ scope()
{
echo "begin scope tests..."

SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=256 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=opae --app=demo --args="-n1" --scope
SCOPE_DEPTH=128 ./ci/blackbox.sh --driver=xrt --app=demo --args="-n1" --scope

echo "debugging scope done!"
}
Expand All @@ -381,15 +388,25 @@ synthesis()
echo "begin synthesis tests..."

PREFIX=build_base make -C hw/syn/yosys clean
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE" make -C hw/syn/yosys synthesis
PREFIX=build_base CONFIGS="-DDPI_DISABLE -DEXT_F_DISABLE -DNUM_WARPS=2 -DNUM_THREADS=2" make -C hw/syn/yosys synthesis

echo "synthesis tests done!"
}

vector()
{
echo "begin vector tests..."

make -C sim/simx clean && CONFIGS="-DEXT_V_ENABLE" make -C sim/simx
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh

echo "vector tests done!"
}

show_usage()
{
echo "Vortex Regression Test"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]"
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]"
}

declare -a tests=()
Expand Down Expand Up @@ -439,6 +456,9 @@ while [ "$1" != "" ]; do
--synthesis )
tests+=("synthesis")
;;
--vector )
tests+=("vector")
;;
--all )
tests=()
tests+=("unittest")
Expand All @@ -454,6 +474,7 @@ while [ "$1" != "" ]; do
tests+=("scope")
tests+=("stress")
tests+=("synthesis")
tests+=("vector")
;;
-h | --help )
show_usage
Expand Down
8 changes: 7 additions & 1 deletion docs/debugging.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@ The recommended method to enable debugging is to pass the `--debug` flag to `bla
// Running demo program on rtlsim in debug mode
$ ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1

A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution. You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.
A debug trace `run.log` is generated in the current directory during the program execution. The trace includes important states of the simulated processor (memory, caches, pipeline, stalls, etc..). A waveform trace `trace.vcd` is also generated in the current directory during the program execution.
By default all library modules unde the /libs/ folder are excluded from the trace to reduce the waveform file size, you can chnage that behavoir by either explicitly commenting out `TRACING_OFF`/`TRACING_ON` inside a lib module source (e.g. VX_stream_buffer.sv) or simply enabling a full trace using the following command.

// Debugging the demo program with rtlsim in full tracing mode
$ CONFIGS="-DTRACING_ALL" ./ci/blackbox.sh --driver=rtlsim --app=demo --debug=1

You can visualize the waveform trace using any tool that can open VCD files (Modelsim, Quartus, Vivado, etc..). [GTKwave] (http://gtkwave.sourceforge.net) is a great open-source scope analyzer that also works with VCD files.

## FPGA Debugging

Expand Down
7 changes: 4 additions & 3 deletions hw/rtl/VX_cluster.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
VX_dcr_bus_if.slave dcr_bus_if,

// Memory
VX_mem_bus_if.master mem_bus_if,
VX_mem_bus_if.master mem_bus_if [`L2_MEM_PORTS],

// Status
output wire busy
Expand Down Expand Up @@ -79,7 +79,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
VX_mem_bus_if #(
.DATA_SIZE (`L1_LINE_SIZE),
.TAG_WIDTH (L1_MEM_ARB_TAG_WIDTH)
) per_socket_mem_bus_if[`NUM_SOCKETS]();
) per_socket_mem_bus_if[`NUM_SOCKETS * `L1_MEM_PORTS]();

`RESET_RELAY (l2_reset, reset);

Expand All @@ -91,6 +91,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.NUM_WAYS (`L2_NUM_WAYS),
.WORD_SIZE (L2_WORD_SIZE),
.NUM_REQS (L2_NUM_REQS),
.MEM_PORTS (`L2_MEM_PORTS),
.CRSQ_SIZE (`L2_CRSQ_SIZE),
.MSHR_SIZE (`L2_MSHR_SIZE),
.MRSQ_SIZE (`L2_MRSQ_SIZE),
Expand Down Expand Up @@ -144,7 +145,7 @@ module VX_cluster import VX_gpu_pkg::*; #(

.dcr_bus_if (socket_dcr_bus_if),

.mem_bus_if (per_socket_mem_bus_if[socket_id]),
.mem_bus_if (per_socket_mem_bus_if[socket_id * `L1_MEM_PORTS +: `L1_MEM_PORTS]),

`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
Expand Down
72 changes: 53 additions & 19 deletions hw/rtl/VX_config.vh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
`ifndef VX_CONFIG_VH
`define VX_CONFIG_VH



`ifndef MIN
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
`endif
Expand Down Expand Up @@ -87,6 +85,10 @@
`endif
`endif

`ifndef VLEN
`define VLEN 256
`endif

`ifndef NUM_CLUSTERS
`define NUM_CLUSTERS 1
`endif
Expand Down Expand Up @@ -170,8 +172,8 @@
`define L3_LINE_SIZE `MEM_BLOCK_SIZE
`endif

`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
`ifndef PLATFORM_MEMORY_BANKS
`define PLATFORM_MEMORY_BANKS 2
`endif

`ifdef XLEN_64
Expand All @@ -193,7 +195,7 @@
`endif

`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 64'h0F0000000
`endif

Expand All @@ -218,7 +220,7 @@
`endif

`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
`endif

Expand All @@ -239,7 +241,7 @@
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR `IO_BASE_ADDR
`endif
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`define IO_COUT_SIZE 64

`ifndef IO_MPM_ADDR
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
Expand Down Expand Up @@ -303,13 +305,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV32 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (2)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (4)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (1024)
`endif
`ifndef PT_SIZE_LIMIT
Expand All @@ -319,13 +321,13 @@
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV39 //or BARE
`endif
`ifndef PT_LEVEL
`ifndef PT_LEVEL
`define PT_LEVEL (3)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (8)
`endif
`ifndef NUM_PTE_ENTRY
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (512)
`endif
`ifndef PT_SIZE_LIMIT
Expand Down Expand Up @@ -578,6 +580,10 @@
`define ICACHE_REPL_POLICY 1
`endif

`ifndef ICACHE_MEM_PORTS
`define ICACHE_MEM_PORTS 1
`endif

// Dcache Configurable Knobs //////////////////////////////////////////////////

// Cache Enable
Expand All @@ -604,7 +610,7 @@

// Number of Banks
`ifndef DCACHE_NUM_BANKS
`define DCACHE_NUM_BANKS `MIN(`NUM_LSU_LANES, 4)
`define DCACHE_NUM_BANKS `MIN(DCACHE_NUM_REQS, 16)
`endif

// Core Response Queue Size
Expand Down Expand Up @@ -647,6 +653,15 @@
`define DCACHE_REPL_POLICY 1
`endif

// Number of Memory Ports
`ifndef L1_MEM_PORTS
`ifdef L1_DISABLE
`define L1_MEM_PORTS `MIN(DCACHE_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`else
`define L1_MEM_PORTS `MIN(`DCACHE_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`endif
`endif

// LMEM Configurable Knobs ////////////////////////////////////////////////////

`ifndef LMEM_DISABLE
Expand Down Expand Up @@ -674,7 +689,7 @@

// Number of Banks
`ifndef L2_NUM_BANKS
`define L2_NUM_BANKS `MIN(4, `NUM_SOCKETS)
`define L2_NUM_BANKS `MIN(L2_NUM_REQS, 16)
`endif

// Core Response Queue Size
Expand Down Expand Up @@ -717,6 +732,15 @@
`define L2_REPL_POLICY 1
`endif

// Number of Memory Ports
`ifndef L2_MEM_PORTS
`ifdef L2_ENABLE
`define L2_MEM_PORTS `MIN(`L2_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L2_MEM_PORTS `MIN(L2_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif

// L3cache Configurable Knobs /////////////////////////////////////////////////

// Cache Size
Expand All @@ -726,7 +750,7 @@

// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`define L3_NUM_BANKS `MIN(L3_NUM_REQS, 16)
`endif

// Core Response Queue Size
Expand Down Expand Up @@ -769,9 +793,13 @@
`define L3_REPL_POLICY 1
`endif

// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
// Number of Memory Ports
`ifndef L3_MEM_PORTS
`ifdef L3_ENABLE
`define L3_MEM_PORTS `MIN(`L3_NUM_BANKS, `PLATFORM_MEMORY_BANKS)
`else
`define L3_MEM_PORTS `MIN(L3_NUM_REQS, `PLATFORM_MEMORY_BANKS)
`endif
`endif

// ISA Extensions /////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -806,6 +834,12 @@
`define EXT_M_ENABLED 0
`endif

`ifdef EXT_V_ENABLE
`define EXT_V_ENABLED 1
`else
`define EXT_V_ENABLED 0
`endif

`ifdef EXT_ZICOND_ENABLE
`define EXT_ZICOND_ENABLED 1
`else
Expand All @@ -822,7 +856,7 @@
`define ISA_STD_N 13
`define ISA_STD_Q 16
`define ISA_STD_S 18
`define ISA_STD_U 20
`define ISA_STD_V 21

`define ISA_EXT_ICACHE 0
`define ISA_EXT_DCACHE 1
Expand Down Expand Up @@ -859,7 +893,7 @@
| (0 << 18) /* S - Supervisor mode implemented */ \
| (0 << 19) /* T - Tentatively reserved for Transactional Memory extension */ \
| (1 << 20) /* U - User mode implemented */ \
| (0 << 21) /* V - Tentatively reserved for Vector extension */ \
| (`EXT_V_ENABLED << 21) /* V - Tentatively reserved for Vector extension */ \
| (0 << 22) /* W - Reserved */ \
| (1 << 23) /* X - Non-standard extensions present */ \
| (0 << 24) /* Y - Reserved */ \
Expand Down
Loading

0 comments on commit c2551ad

Please sign in to comment.